From a550c873e085dfdec3312b65500b36083ff1fc9f Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 18 May 2021 13:03:15 -0700 Subject: [PATCH 001/188] uapi/auxvec: Define the aux vector AT_MINSIGSTKSZ mainline inclusion from mainline-v5.14-rc1 commit 7cd60e43a6def40ecb75deb8decc677995970d0b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 7cd60e43a6de uapi/auxvec: Define the aux vector AT_MINSIGSTKSZ. -------------------------------- Define AT_MINSIGSTKSZ in the generic uapi header. It is already used as generic ABI in glibc's generic elf.h, and this define will prevent future namespace conflicts. In particular, x86 is also using this generic definition. Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Reviewed-by: Len Brown Acked-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20210518200320.17239-2-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- include/uapi/linux/auxvec.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h index abe5f2b6581b..c7e502bf5a6f 100644 --- a/include/uapi/linux/auxvec.h +++ b/include/uapi/linux/auxvec.h @@ -33,5 +33,8 @@ #define AT_EXECFN 31 /* filename of program */ +#ifndef AT_MINSIGSTKSZ +#define AT_MINSIGSTKSZ 51 /* minimal stack size for signal delivery */ +#endif #endif /* _UAPI_LINUX_AUXVEC_H */ -- Gitee From d265ef4aa02fee14f5fd82a1128106000239eaef Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 18 May 2021 13:03:16 -0700 Subject: [PATCH 002/188] x86/signal: Introduce helpers to get the maximum signal frame size mainline inclusion from mainline-v5.14-rc1 commit 939ef713297df2cc910592305aa26af0e87f28ac category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 939ef713297d x86/signal: Introduce helpers to get the maximum signal frame size. -------------------------------- Signal frames do not have a fixed format and can vary in size when a number of things change: supported XSAVE features, 32 vs. 64-bit apps, etc. Add support for a runtime method for userspace to dynamically discover how large a signal stack needs to be. Introduce a new variable, max_frame_size, and helper functions for the calculation to be used in a new user interface. Set max_frame_size to a system-wide worst-case value, instead of storing multiple app-specific values. Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Reviewed-by: Len Brown Acked-by: Thomas Gleixner Acked-by: H.J. Lu Link: https://lkml.kernel.org/r/20210518200320.17239-3-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/signal.h | 2 ++ arch/x86/include/asm/sigframe.h | 2 ++ arch/x86/kernel/cpu/common.c | 3 ++ arch/x86/kernel/fpu/signal.c | 19 ++++++++++ arch/x86/kernel/signal.c | 59 +++++++++++++++++++++++++++++-- 5 files changed, 83 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 7fb516b6893a..8b6631dffefd 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -29,6 +29,8 @@ unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size); +unsigned long fpu__get_fpstate_size(void); + extern void fpu__init_prepare_fx_sw_frame(void); #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 84eab2724875..5b1ed650b124 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -85,4 +85,6 @@ struct rt_sigframe_x32 { #endif /* CONFIG_X86_64 */ +void __init init_sigframe_size(void); + #endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4917c2698ac1..f18bb226546a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -58,6 +58,7 @@ #include #include #include +#include #include "cpu.h" @@ -1378,6 +1379,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) fpu__init_system(c); + init_sigframe_size(); + #ifdef CONFIG_X86_32 /* * Regardless of whether PCID is enumerated, the SDM says diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index b7b92cdf3add..888c8e0d2ff1 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -513,6 +513,25 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, return sp; } + +unsigned long fpu__get_fpstate_size(void) +{ + unsigned long ret = xstate_sigframe_size(); + + /* + * This space is needed on (most) 32-bit kernels, or when a 32-bit + * app is running on a 64-bit kernel. To keep things simple, just + * assume the worst case and always include space for 'freg_state', + * even for 64-bit apps on 64-bit kernels. This wastes a bit of + * space, but keeps the code simple. + */ + if ((IS_ENABLED(CONFIG_IA32_EMULATION) || + IS_ENABLED(CONFIG_X86_32)) && use_fxsr()) + ret += sizeof(struct fregs_state); + + return ret; +} + /* * Prepare the SW reserved portion of the fxsave memory layout, indicating * the presence of the extended state information in the memory layout diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b001ba811cab..04a76b28ee24 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -212,6 +212,11 @@ do { \ * Set up a signal frame. */ +/* x86 ABI requires 16-byte alignment */ +#define FRAME_ALIGNMENT 16UL + +#define MAX_FRAME_PADDING (FRAME_ALIGNMENT - 1) + /* * Determine which stack to use.. */ @@ -222,9 +227,9 @@ static unsigned long align_sigframe(unsigned long sp) * Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ - sp = ((sp + 4) & -16ul) - 4; + sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4; #else /* !CONFIG_X86_32 */ - sp = round_down(sp, 16) - 8; + sp = round_down(sp, FRAME_ALIGNMENT) - 8; #endif return sp; } @@ -679,6 +684,56 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; } +/* + * There are four different struct types for signal frame: sigframe_ia32, + * rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case + * -- the largest size. It means the size for 64-bit apps is a bit more + * than needed, but this keeps the code simple. + */ +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +# define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct sigframe_ia32) +#else +# define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct rt_sigframe) +#endif + +/* + * The FP state frame contains an XSAVE buffer which must be 64-byte aligned. + * If a signal frame starts at an unaligned address, extra space is required. + * This is the max alignment padding, conservatively. + */ +#define MAX_XSAVE_PADDING 63UL + +/* + * The frame data is composed of the following areas and laid out as: + * + * ------------------------- + * | alignment padding | + * ------------------------- + * | (f)xsave frame | + * ------------------------- + * | fsave header | + * ------------------------- + * | alignment padding | + * ------------------------- + * | siginfo + ucontext | + * ------------------------- + */ + +/* max_frame_size tells userspace the worst case signal stack size. */ +static unsigned long __ro_after_init max_frame_size; + +void __init init_sigframe_size(void) +{ + max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING; + + max_frame_size += fpu__get_fpstate_size() + MAX_XSAVE_PADDING; + + /* Userspace expects an aligned size. */ + max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT); + + pr_info("max sigframe size: %lu\n", max_frame_size); +} + static inline int is_ia32_compat_frame(struct ksignal *ksig) { return IS_ENABLED(CONFIG_IA32_EMULATION) && -- Gitee From ea41b029584d522ba0739511d1dd6ff070c90e1d Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 18 May 2021 13:03:17 -0700 Subject: [PATCH 003/188] x86/elf: Support a new ELF aux vector AT_MINSIGSTKSZ mainline inclusion from mainline-v5.14-rc1 commit 1c33bb0507508af24fd754dd7123bd8e997fab2f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1c33bb050750 x86/elf: Support a new ELF aux vector AT_MINSIGSTKSZ. -------------------------------- Historically, signal.h defines MINSIGSTKSZ (2KB) and SIGSTKSZ (8KB), for use by all architectures with sigaltstack(2). Over time, the hardware state size grew, but these constants did not evolve. Today, literal use of these constants on several architectures may result in signal stack overflow, and thus user data corruption. A few years ago, the ARM team addressed this issue by establishing getauxval(AT_MINSIGSTKSZ). This enables the kernel to supply a value at runtime that is an appropriate replacement on current and future hardware. Add getauxval(AT_MINSIGSTKSZ) support to x86, analogous to the support added for ARM in 94b07c1f8c39 ("arm64: signal: Report signal frame size to userspace via auxv"). Also, include a documentation to describe x86-specific auxiliary vectors. Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Reviewed-by: Len Brown Acked-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20210518200320.17239-4-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- Documentation/x86/elf_auxvec.rst | 53 ++++++++++++++++++++++++++++++ Documentation/x86/index.rst | 1 + arch/x86/include/asm/elf.h | 4 +++ arch/x86/include/uapi/asm/auxvec.h | 4 +-- arch/x86/kernel/signal.c | 5 +++ 5 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 Documentation/x86/elf_auxvec.rst diff --git a/Documentation/x86/elf_auxvec.rst b/Documentation/x86/elf_auxvec.rst new file mode 100644 index 000000000000..18e4744717f9 --- /dev/null +++ b/Documentation/x86/elf_auxvec.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +x86-specific ELF Auxiliary Vectors +================================== + +This document describes the semantics of the x86 auxiliary vectors. + +Introduction +============ + +ELF Auxiliary vectors enable the kernel to efficiently provide +configuration-specific parameters to userspace. In this example, a program +allocates an alternate stack based on the kernel-provided size:: + + #include + #include + #include + #include + #include + #include + + #ifndef AT_MINSIGSTKSZ + #define AT_MINSIGSTKSZ 51 + #endif + + .... + stack_t ss; + + ss.ss_sp = malloc(ss.ss_size); + assert(ss.ss_sp); + + ss.ss_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ; + ss.ss_flags = 0; + + if (sigaltstack(&ss, NULL)) + err(1, "sigaltstack"); + + +The exposed auxiliary vectors +============================= + +AT_SYSINFO is used for locating the vsyscall entry point. It is not +exported on 64-bit mode. + +AT_SYSINFO_EHDR is the start address of the page containing the vDSO. + +AT_MINSIGSTKSZ denotes the minimum stack size required by the kernel to +deliver a signal to user-space. AT_MINSIGSTKSZ comprehends the space +consumed by the kernel to accommodate the user context for the current +hardware configuration. It does not comprehend subsequent user-space stack +consumption, which must be added by the user. (e.g. Above, user-space adds +SIGSTKSZ to AT_MINSIGSTKSZ.) diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index 6832df92f084..6b1384c13852 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -35,3 +35,4 @@ x86-specific Documentation x86_64/index sva sgx + elf_auxvec diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index d445bf8f5214..0876ba991601 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -314,6 +314,7 @@ do { \ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* @@ -330,6 +331,7 @@ extern unsigned long task_size_32bit(void); extern unsigned long task_size_64bit(int full_addr_space); extern unsigned long get_mmap_base(int is_legacy); extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); +extern unsigned long get_sigframe_size(void); #ifdef CONFIG_X86_32 @@ -351,6 +353,7 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */ @@ -359,6 +362,7 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) #define AT_SYSINFO 32 diff --git a/arch/x86/include/uapi/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h index 580e3c567046..6beb55bbefa4 100644 --- a/arch/x86/include/uapi/asm/auxvec.h +++ b/arch/x86/include/uapi/asm/auxvec.h @@ -12,9 +12,9 @@ /* entries in ARCH_DLINFO: */ #if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) -# define AT_VECTOR_SIZE_ARCH 2 +# define AT_VECTOR_SIZE_ARCH 3 #else /* else it's non-compat x86-64 */ -# define AT_VECTOR_SIZE_ARCH 1 +# define AT_VECTOR_SIZE_ARCH 2 #endif #endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 04a76b28ee24..e941dc4c46bd 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -734,6 +734,11 @@ void __init init_sigframe_size(void) pr_info("max sigframe size: %lu\n", max_frame_size); } +unsigned long get_sigframe_size(void) +{ + return max_frame_size; +} + static inline int is_ia32_compat_frame(struct ksignal *ksig) { return IS_ENABLED(CONFIG_IA32_EMULATION) && -- Gitee From 155a148f99570ecbc33772922416a2fd25e6b33b Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 18 May 2021 13:03:18 -0700 Subject: [PATCH 004/188] selftest/sigaltstack: Use the AT_MINSIGSTKSZ aux vector if available mainline inclusion from mainline-v5.14-rc1 commit bdf6c8b84a4fa726c382ef6d3518f3ae123a7ebd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit bdf6c8b84a4f selftest/sigaltstack: Use the AT_MINSIGSTKSZ aux vector if available. -------------------------------- The SIGSTKSZ constant may not represent enough stack size in some architectures as the hardware state size grows. Use getauxval(AT_MINSIGSTKSZ) to increase the stack size. Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Reviewed-by: Len Brown Acked-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20210518200320.17239-5-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- tools/testing/selftests/sigaltstack/sas.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c index 8934a3766d20..c53b070755b6 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "../kselftest.h" @@ -24,6 +25,11 @@ #define SS_AUTODISARM (1U << 31) #endif +#ifndef AT_MINSIGSTKSZ +#define AT_MINSIGSTKSZ 51 +#endif + +static unsigned int stack_size; static void *sstack, *ustack; static ucontext_t uc, sc; static const char *msg = "[OK]\tStack preserved"; @@ -47,7 +53,7 @@ void my_usr1(int sig, siginfo_t *si, void *u) #endif if (sp < (unsigned long)sstack || - sp >= (unsigned long)sstack + SIGSTKSZ) { + sp >= (unsigned long)sstack + stack_size) { ksft_exit_fail_msg("SP is not on sigaltstack\n"); } /* put some data on stack. other sighandler will try to overwrite it */ @@ -108,6 +114,10 @@ int main(void) stack_t stk; int err; + /* Make sure more than the required minimum. */ + stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ; + ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size); + ksft_print_header(); ksft_set_plan(3); @@ -117,7 +127,7 @@ int main(void) sigaction(SIGUSR1, &act, NULL); act.sa_sigaction = my_usr2; sigaction(SIGUSR2, &act, NULL); - sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + sstack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (sstack == MAP_FAILED) { ksft_exit_fail_msg("mmap() - %s\n", strerror(errno)); @@ -139,7 +149,7 @@ int main(void) } stk.ss_sp = sstack; - stk.ss_size = SIGSTKSZ; + stk.ss_size = stack_size; stk.ss_flags = SS_ONSTACK | SS_AUTODISARM; err = sigaltstack(&stk, NULL); if (err) { @@ -161,7 +171,7 @@ int main(void) } } - ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + ustack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (ustack == MAP_FAILED) { ksft_exit_fail_msg("mmap() - %s\n", strerror(errno)); @@ -170,7 +180,7 @@ int main(void) getcontext(&uc); uc.uc_link = NULL; uc.uc_stack.ss_sp = ustack; - uc.uc_stack.ss_size = SIGSTKSZ; + uc.uc_stack.ss_size = stack_size; makecontext(&uc, switch_fn, 0); raise(SIGUSR1); -- Gitee From 8ae3b61189ae235dba1ba6c395771d47abcd81f9 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 18 May 2021 13:03:20 -0700 Subject: [PATCH 005/188] selftest/x86/signal: Include test cases for validating sigaltstack mainline inclusion from mainline-v5.14-rc1 commit 8919f07276991c7bf0d0802f0356331c5c62f7a2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 8919f0727699 selftest/x86/signal: Include test cases for validating sigaltstack. -------------------------------- The test measures the kernel's signal delivery with different (enough vs. insufficient) stack sizes. Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Reviewed-by: Len Brown Acked-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20210518200320.17239-7-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/sigaltstack.c | 128 ++++++++++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/sigaltstack.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 9c4c75f06396..15496633f7e4 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ test_vdso test_vsyscall mov_ss_trap \ - syscall_arg_fault fsgsbase_restore + syscall_arg_fault fsgsbase_restore sigaltstack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c new file mode 100644 index 000000000000..f689af75e979 --- /dev/null +++ b/tools/testing/selftests/x86/sigaltstack.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* sigaltstack()-enforced minimum stack */ +#define ENFORCED_MINSIGSTKSZ 2048 + +#ifndef AT_MINSIGSTKSZ +# define AT_MINSIGSTKSZ 51 +#endif + +static int nerrs; + +static bool sigalrm_expected; + +static unsigned long at_minstack_size; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static int setup_altstack(void *start, unsigned long size) +{ + stack_t ss; + + memset(&ss, 0, sizeof(ss)); + ss.ss_size = size; + ss.ss_sp = start; + + return sigaltstack(&ss, NULL); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + if (sigalrm_expected) { + printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected SIGALRM)."); + nerrs++; + } else { + printf("[OK]\tSIGSEGV signal delivered.\n"); + } + + siglongjmp(jmpbuf, 1); +} + +static void sigalrm(int sig, siginfo_t *info, void *ctx_void) +{ + if (!sigalrm_expected) { + printf("[FAIL]\tWrong signal delivered: SIGALRM (expected SIGSEGV)."); + nerrs++; + } else { + printf("[OK]\tSIGALRM signal delivered.\n"); + } +} + +static void test_sigaltstack(void *altstack, unsigned long size) +{ + if (setup_altstack(altstack, size)) + err(1, "sigaltstack()"); + + sigalrm_expected = (size > at_minstack_size) ? true : false; + + sethandler(SIGSEGV, sigsegv, 0); + sethandler(SIGALRM, sigalrm, SA_ONSTACK); + + if (!sigsetjmp(jmpbuf, 1)) { + printf("[RUN]\tTest an alternate signal stack of %ssufficient size.\n", + sigalrm_expected ? "" : "in"); + printf("\tRaise SIGALRM. %s is expected to be delivered.\n", + sigalrm_expected ? "It" : "SIGSEGV"); + raise(SIGALRM); + } + + clearhandler(SIGALRM); + clearhandler(SIGSEGV); +} + +int main(void) +{ + void *altstack; + + at_minstack_size = getauxval(AT_MINSIGSTKSZ); + + altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (altstack == MAP_FAILED) + err(1, "mmap()"); + + if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size) + test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1); + + test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ); + + return nerrs == 0 ? 0 : 1; +} -- Gitee From 5d55ecf90da11a1d8a721ecc65c66d12e1a8c685 Mon Sep 17 00:00:00 2001 From: Lin Wang Date: Thu, 21 Jul 2022 11:01:45 +0800 Subject: [PATCH 006/188] Revert "x86/fpu: Correct pkru/xstate inconsistency" hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA -------------------------------- This reverts commit 039ea4960ea38882bd1dbcbb594935d75303b530. Since the enabling of Intel AMX involves the complete rewriting of the PKRU handling, then this workaround is no longer needed. Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 13 +++++-------- arch/x86/kernel/process_32.c | 6 ++++-- arch/x86/kernel/process_64.c | 6 ++++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 70b9bc5403c5..4e5af2b00d89 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -531,11 +531,9 @@ static inline void __fpregs_load_activate(void) * The FPU context is only stored/restored for a user task and * PF_KTHREAD is used to distinguish between kernel and user threads. */ -static inline void switch_fpu_prepare(struct task_struct *prev, int cpu) +static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - struct fpu *old_fpu = &prev->thread.fpu; - - if (static_cpu_has(X86_FEATURE_FPU) && !(prev->flags & PF_KTHREAD)) { + if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -554,11 +552,10 @@ static inline void switch_fpu_prepare(struct task_struct *prev, int cpu) * Load PKRU from the FPU context if available. Delay loading of the * complete FPU state until the return to userland. */ -static inline void switch_fpu_finish(struct task_struct *next) +static inline void switch_fpu_finish(struct fpu *new_fpu) { u32 pkru_val = init_pkru_value; struct pkru_state *pk; - struct fpu *next_fpu = &next->thread.fpu; if (!static_cpu_has(X86_FEATURE_FPU)) return; @@ -572,7 +569,7 @@ static inline void switch_fpu_finish(struct task_struct *next) * PKRU state is switched eagerly because it needs to be valid before we * return to userland e.g. for a copy_to_user() operation. */ - if (!(next->flags & PF_KTHREAD)) { + if (!(current->flags & PF_KTHREAD)) { /* * If the PKRU bit in xsave.header.xfeatures is not set, * then the PKRU component was in init state, which means @@ -581,7 +578,7 @@ static inline void switch_fpu_finish(struct task_struct *next) * in memory is not valid. This means pkru_val has to be * set to 0 and not to init_pkru_value. */ - pk = get_xsave_addr(&next_fpu->state.xsave, XFEATURE_PKRU); + pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); pkru_val = pk ? pk->pkru : 0; } __write_pkru(pkru_val); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 98bf8fd18902..4f2f54e1281c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -159,12 +159,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_p, cpu); + switch_fpu_prepare(prev_fpu, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -211,7 +213,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); - switch_fpu_finish(next_p); + switch_fpu_finish(next_fpu); /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad3f82a18de9..df342bedea88 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -535,13 +535,15 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_p, cpu); + switch_fpu_prepare(prev_fpu, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -593,7 +595,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(next_p); + switch_fpu_finish(next_fpu); /* Reload sp0. */ update_task_stack(next_p); -- Gitee From 81e681021d18818ecb9701cfd2d72e4bb07d09aa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:29 +0200 Subject: [PATCH 007/188] x86/pkeys: Revert a5eff7259790 ("x86/pkeys: Add PKRU value to init_fpstate") mainline inclusion from mainline-v5.14-rc1 commit b3607269ff57fd3c9690cb25962c5e4b91a0fd3b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit b3607269ff57 x86/pkeys: Revert a5eff7259790 ("x86/pkeys: Add PKRU value to init_fpstate"). -------------------------------- This cannot work and it's unclear how that ever made a difference. init_fpstate.xsave.header.xfeatures is always 0 so get_xsave_addr() will always return a NULL pointer, which will prevent storing the default PKRU value in init_fpstate. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121451.451391598@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/cpu/common.c | 5 ----- arch/x86/mm/pkeys.c | 6 ------ 2 files changed, 11 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f18bb226546a..06aa0227b11c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -468,8 +468,6 @@ static bool pku_disabled; static __always_inline void setup_pku(struct cpuinfo_x86 *c) { - struct pkru_state *pk; - /* check the boot processor, plus compile options for PKU: */ if (!cpu_feature_enabled(X86_FEATURE_PKU)) return; @@ -480,9 +478,6 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) return; cr4_set_bits(X86_CR4_PKE); - pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU); - if (pk) - pk->pkru = init_pkru_value; /* * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE * cpuid bit to be set. We need to ensure that we diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 8873ed1438a9..379c39612793 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -10,7 +10,6 @@ #include /* boot_cpu_has, ... */ #include /* vma_pkey() */ -#include /* init_fpstate */ int __execute_only_pkey(struct mm_struct *mm) { @@ -154,7 +153,6 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf, static ssize_t init_pkru_write_file(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - struct pkru_state *pk; char buf[32]; ssize_t len; u32 new_init_pkru; @@ -177,10 +175,6 @@ static ssize_t init_pkru_write_file(struct file *file, return -EINVAL; WRITE_ONCE(init_pkru_value, new_init_pkru); - pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU); - if (!pk) - return -EINVAL; - pk->pkru = new_init_pkru; return count; } -- Gitee From 66a782e5581cd0550d3b26cc2657996126f6a3d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:30 +0200 Subject: [PATCH 008/188] x86/fpu: Mark various FPU state variables __ro_after_init mainline inclusion from mainline-v5.14-rc1 commit ce578f16348b003675c928a1992498b33b515f18 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ce578f16348b x86/fpu: Mark various FPU state variables __ro_after_init. -------------------------------- Nothing modifies these after booting. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20210623121451.611751529@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/init.c | 4 ++-- arch/x86/kernel/fpu/xstate.c | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 701f196d7c68..95aa1090ea72 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) /* * Boot time FPU feature detection code: */ -unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int mxcsr_feature_mask __ro_after_init = 0xffffffffu; EXPORT_SYMBOL_GPL(mxcsr_feature_mask); static void __init fpu__init_system_mxcsr(void) @@ -135,7 +135,7 @@ static void __init fpu__init_system_generic(void) * This is inherent to the XSAVE architecture which puts all state * components into a single, continuous memory block: */ -unsigned int fpu_kernel_xstate_size; +unsigned int fpu_kernel_xstate_size __ro_after_init; EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); /* Get alignment of the TYPE. */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6655e0df2fb7..f66be7a6f5dd 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -61,17 +61,21 @@ static short xsave_cpuid_features[] __initdata = { */ u64 xfeatures_mask_all __read_mostly; -static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; -static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; -static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; -static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; +static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = + { [ 0 ... XFEATURE_MAX - 1] = -1}; +static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = + { [ 0 ... XFEATURE_MAX - 1] = -1}; +static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init = + { [ 0 ... XFEATURE_MAX - 1] = -1}; +static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init = + { [ 0 ... XFEATURE_MAX - 1] = -1}; /* * The XSAVE area of kernel can be in standard or compacted format; * it is always in standard format for user mode. This is the user * mode standard format size used for signal and ptrace frames. */ -unsigned int fpu_user_xstate_size; +unsigned int fpu_user_xstate_size __ro_after_init; /* * Return whether the system supports a given xfeature. -- Gitee From 807abad35464cad9c42209a521254490949d7f59 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:31 +0200 Subject: [PATCH 009/188] x86/fpu: Make xfeatures_mask_all __ro_after_init mainline inclusion from mainline-v5.14-rc1 commit 4e8e4313cf81add679e1c57677d689c02e382a67 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 4e8e4313cf81 x86/fpu: Make xfeatures_mask_all __ro_after_init. -------------------------------- Nothing has to modify this after init. But of course there is code which unconditionally masks xfeatures_mask_all on CPU hotplug. This goes unnoticed during boot hotplug because at that point the variable is still RW mapped. This is broken in several ways: 1) Masking this in post init CPU hotplug means that any modification of this state goes unnoticed until actual hotplug happens. 2) If that ever happens then these bogus feature bits are already populated all over the place and the system is in inconsistent state vs. the compacted XSTATE offsets. If at all then this has to panic the machine because the inconsistency cannot be undone anymore. Make this a one-time paranoia check in xstate init code and disable xsave when this happens. Reported-by: Kan Liang Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121451.712803952@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f66be7a6f5dd..ce2c6f8bd2d1 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -59,7 +59,7 @@ static short xsave_cpuid_features[] __initdata = { * This represents the full set of bits that should ever be set in a kernel * XSAVE buffer, both supervisor and user xstates. */ -u64 xfeatures_mask_all __read_mostly; +u64 xfeatures_mask_all __ro_after_init; static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; @@ -213,19 +213,8 @@ void fpstate_sanitize_xstate(struct fpu *fpu) */ void fpu__init_cpu_xstate(void) { - u64 unsup_bits; - if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all) return; - /* - * Unsupported supervisor xstates should not be found in - * the xfeatures mask. - */ - unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED; - WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n", - unsup_bits); - - xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED; cr4_set_bits(X86_CR4_OSXSAVE); @@ -825,6 +814,7 @@ void __init fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; static int on_boot_cpu __initdata = 1; + u64 xfeatures; int err; int i; @@ -879,6 +869,8 @@ void __init fpu__init_system_xstate(void) } xfeatures_mask_all &= fpu__get_supported_xfeatures_mask(); + /* Store it for paranoia check at the end */ + xfeatures = xfeatures_mask_all; /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); @@ -896,8 +888,18 @@ void __init fpu__init_system_xstate(void) setup_init_fpu_buf(); setup_xstate_comp_offsets(); setup_supervisor_only_offsets(); - print_xstate_offset_size(); + /* + * Paranoia check whether something in the setup modified the + * xfeatures mask. + */ + if (xfeatures != xfeatures_mask_all) { + pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n", + xfeatures, xfeatures_mask_all); + goto out_disable; + } + + print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask_all, fpu_kernel_xstate_size, -- Gitee From 56fb98a9b0c29dfdc56391bb3f772933e237c33b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:32 +0200 Subject: [PATCH 010/188] x86/fpu: Get rid of fpu__get_supported_xfeatures_mask() mainline inclusion from mainline-v5.14-rc1 commit ce38f038ede735fd425ebda10d1758420a669a87 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ce38f038ede7 x86/fpu: Get rid of fpu__get_supported_xfeatures_mask(). -------------------------------- This function is really not doing what the comment advertises: "Find supported xfeatures based on cpu features and command-line input. This must be called after fpu__init_parse_early_param() is called and xfeatures_mask is enumerated." fpu__init_parse_early_param() does not exist anymore and the function just returns a constant. Remove it and fix the caller and get rid of further references to fpu__init_parse_early_param(). Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121451.816404717@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 1 - arch/x86/kernel/cpu/common.c | 5 ++--- arch/x86/kernel/fpu/init.c | 11 ----------- arch/x86/kernel/fpu/xstate.c | 4 +++- 4 files changed, 5 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4e5af2b00d89..0c15fa492d59 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -45,7 +45,6 @@ extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); -extern u64 fpu__get_supported_xfeatures_mask(void); /* * Debugging facility: diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 06aa0227b11c..a5780924492e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1791,9 +1791,8 @@ void print_cpu_info(struct cpuinfo_x86 *c) } /* - * clearcpuid= was already parsed in fpu__init_parse_early_param. - * But we need to keep a dummy __setup around otherwise it would - * show up as an environment variable for init. + * clearcpuid= was already parsed in cpu_parse_early_param(). This dummy + * function prevents it from becoming an environment variable for init. */ static __init int setup_clearcpuid(char *arg) { diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 95aa1090ea72..64e29927cc32 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -216,17 +216,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) fpu_user_xstate_size = fpu_kernel_xstate_size; } -/* - * Find supported xfeatures based on cpu features and command-line input. - * This must be called after fpu__init_parse_early_param() is called and - * xfeatures_mask is enumerated. - */ -u64 __init fpu__get_supported_xfeatures_mask(void) -{ - return XFEATURE_MASK_USER_SUPPORTED | - XFEATURE_MASK_SUPERVISOR_SUPPORTED; -} - /* Legacy code to initialize eager fpu mode. */ static void __init fpu__init_system_ctx_switch(void) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index ce2c6f8bd2d1..648ee67c0778 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -868,7 +868,9 @@ void __init fpu__init_system_xstate(void) xfeatures_mask_all &= ~BIT_ULL(i); } - xfeatures_mask_all &= fpu__get_supported_xfeatures_mask(); + xfeatures_mask_all &= XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED; + /* Store it for paranoia check at the end */ xfeatures = xfeatures_mask_all; -- Gitee From 42b921a4711684a66edb7cc6f462fba73bf4d089 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:33 +0200 Subject: [PATCH 011/188] x86/fpu: Remove unused get_xsave_field_ptr() mainline inclusion from mainline-v5.14-rc1 commit 4098b3eef37be19572d270f9b761c3e8ffcf37ac category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 4098b3eef37b x86/fpu: Remove unused get_xsave_field_ptr(). -------------------------------- Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121451.915614415@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 1 - arch/x86/kernel/fpu/xstate.c | 30 ------------------------------ 2 files changed, 31 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 47a92232d595..d22e973845c6 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -101,7 +101,6 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); -const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); int xfeature_size(int xfeature_nr); struct membuf; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 648ee67c0778..f2889ac2943e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -998,36 +998,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) } EXPORT_SYMBOL_GPL(get_xsave_addr); -/* - * This wraps up the common operations that need to occur when retrieving - * data from xsave state. It first ensures that the current task was - * using the FPU and retrieves the data in to a buffer. It then calculates - * the offset of the requested field in the buffer. - * - * This function is safe to call whether the FPU is in use or not. - * - * Note that this only works on the current task. - * - * Inputs: - * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, - * XFEATURE_SSE, etc...) - * Output: - * address of the state in the xsave area or NULL if the state - * is not present or is in its 'init state'. - */ -const void *get_xsave_field_ptr(int xfeature_nr) -{ - struct fpu *fpu = ¤t->thread.fpu; - - /* - * fpu__save() takes the CPU's xstate registers - * and saves them off to the 'fpu memory buffer. - */ - fpu__save(fpu); - - return get_xsave_addr(&fpu->state.xsave, xfeature_nr); -} - #ifdef CONFIG_ARCH_HAS_PKEYS /* -- Gitee From bb15b128694c319eef4680df94999014eeec9240 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:34 +0200 Subject: [PATCH 012/188] x86/fpu: Move inlines where they belong mainline inclusion from mainline-v5.14-rc1 commit e68524456c855e500f0a636adb1aa977e1e0b4d8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit e68524456c85 x86/fpu: Move inlines where they belong. -------------------------------- They are only used in fpstate_init() and there is no point to have them in a header just to make reading the code harder. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.023118522@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 14 -------------- arch/x86/kernel/fpu/core.c | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0c15fa492d59..ffa48b852300 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -86,20 +86,6 @@ extern void fpstate_init_soft(struct swregs_state *soft); static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif -static inline void fpstate_init_xstate(struct xregs_state *xsave) -{ - /* - * XRSTORS requires these bits set in xcomp_bv, or it will - * trigger #GP: - */ - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; -} - -static inline void fpstate_init_fxstate(struct fxregs_state *fx) -{ - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; -} extern void fpstate_sanitize_xstate(struct fpu *fpu); /* Returns 0 or the negated trap number, which results in -EFAULT for #PF */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 571220ac8bea..9ff467bb4894 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -181,6 +181,21 @@ void fpu__save(struct fpu *fpu) fpregs_unlock(); } +static inline void fpstate_init_xstate(struct xregs_state *xsave) +{ + /* + * XRSTORS requires these bits set in xcomp_bv, or it will + * trigger #GP: + */ + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; +} + +static inline void fpstate_init_fxstate(struct fxregs_state *fx) +{ + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} + /* * Legacy x87 fpstate state init: */ -- Gitee From 7f26af0f9fed731f83d9979c36bc142bbbfcd47b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:36 +0200 Subject: [PATCH 013/188] x86/fpu: Sanitize xstateregs_set() mainline inclusion from mainline-v5.14-rc1 commit 43be46e89698a41dbf4fff81a322f4c2ae21b5e2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 43be46e89698 x86/fpu: Sanitize xstateregs_set(). -------------------------------- xstateregs_set() operates on a stopped task and tries to copy the provided buffer into the task's fpu.state.xsave buffer. Any error while copying or invalid state detected after copying results in wiping the target task's FPU state completely including supervisor states. That's just wrong. The caller supplied invalid data or has a problem with unmapped memory, so there is absolutely no justification to corrupt the target state. Fix this with the following modifications: 1) If data has to be copied from userspace, allocate a buffer and copy from user first. 2) Use copy_kernel_to_xstate() unconditionally so that header checking works correctly. 3) Return on error without corrupting the target state. This prevents corrupting states and lets the caller deal with the problem it caused in the first place. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.214903673@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 4 --- arch/x86/kernel/fpu/regset.c | 42 +++++++++++++------------------ arch/x86/kernel/fpu/xstate.c | 14 ++++++----- 3 files changed, 25 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index d22e973845c6..1bb2d16f485b 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -111,8 +111,4 @@ void copy_supervisor_to_kernel(struct xregs_state *xsave); void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask); - -/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -int validate_user_xstate_header(const struct xstate_header *hdr); - #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 6bb874441de8..a50c0a935499 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -2,11 +2,13 @@ /* * FPU register's regset abstraction, for ptrace, core dumps, etc. */ +#include +#include + #include #include #include #include -#include /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, @@ -108,10 +110,10 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - struct xregs_state *xsave; + struct xregs_state *tmpbuf = NULL; int ret; - if (!boot_cpu_has(X86_FEATURE_XSAVE)) + if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) return -ENODEV; /* @@ -120,32 +122,22 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (pos != 0 || count != fpu_user_xstate_size) return -EFAULT; - xsave = &fpu->state.xsave; - - fpu__prepare_write(fpu); + if (!kbuf) { + tmpbuf = vmalloc(count); + if (!tmpbuf) + return -ENOMEM; - if (using_compacted_format()) { - if (kbuf) - ret = copy_kernel_to_xstate(xsave, kbuf); - else - ret = copy_user_to_xstate(xsave, ubuf); - } else { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); - if (!ret) - ret = validate_user_xstate_header(&xsave->header); + if (copy_from_user(tmpbuf, ubuf, count)) { + ret = -EFAULT; + goto out; + } } - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - - /* - * In case of failure, mark all states as init: - */ - if (ret) - fpstate_init(&fpu->state); + fpu__prepare_write(fpu); + ret = copy_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf); +out: + vfree(tmpbuf); return ret; } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f2889ac2943e..2e4527e02f29 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -543,7 +543,7 @@ int using_compacted_format(void) } /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -int validate_user_xstate_header(const struct xstate_header *hdr) +static int validate_user_xstate_header(const struct xstate_header *hdr) { /* No unknown or supervisor features may be set */ if (hdr->xfeatures & ~xfeatures_mask_user()) @@ -1155,7 +1155,7 @@ void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) } /* - * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format + * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] format * and copy to the target thread. This is called from xstateregs_set(). */ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) @@ -1202,14 +1202,16 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) */ xsave->header.xfeatures |= hdr.xfeatures; + /* mxcsr reserved bits must be masked to zero for historical reasons. */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + return 0; } /* - * Convert from a ptrace or sigreturn standard-format user-space buffer to - * kernel XSAVES format and copy to the target thread. This is called from - * xstateregs_set(), as well as potentially from the sigreturn() and - * rt_sigreturn() system calls. + * Convert from a sigreturn standard-format user-space buffer to kernel + * XSAVE[S] format and copy to the target thread. This is called from the + * sigreturn() and rt_sigreturn() system calls. */ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) { -- Gitee From 61096e8284300953c1865d0f09f487627bb67ad6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:37 +0200 Subject: [PATCH 014/188] x86/fpu: Reject invalid MXCSR values in copy_kernel_to_xstate() mainline inclusion from mainline-v5.14-rc1 commit 947f4947cf00ea1e6d319eb182c64ea51ba4de8d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 947f4947cf00 x86/fpu: Reject invalid MXCSR values in copy_kernel_to_xstate(). -------------------------------- Instead of masking out reserved bits, check them and reject the provided state as invalid if not zero. Suggested-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.308388343@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 2e4527e02f29..f286c5d21561 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1154,6 +1154,19 @@ void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) membuf_zero(&to, to.left); } +static inline bool mxcsr_valid(struct xstate_header *hdr, const u32 *mxcsr) +{ + u64 mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; + + /* Only check if it is in use */ + if (hdr->xfeatures & mask) { + /* Reserved bits in MXCSR must be zero. */ + if (*mxcsr & ~mxcsr_feature_mask) + return false; + } + return true; +} + /* * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] format * and copy to the target thread. This is called from xstateregs_set(). @@ -1172,6 +1185,9 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) if (validate_user_xstate_header(&hdr)) return -EINVAL; + if (!mxcsr_valid(&hdr, kbuf + offsetof(struct fxregs_state, mxcsr))) + return -EINVAL; + for (i = 0; i < XFEATURE_MAX; i++) { u64 mask = ((u64)1 << i); @@ -1202,9 +1218,6 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) */ xsave->header.xfeatures |= hdr.xfeatures; - /* mxcsr reserved bits must be masked to zero for historical reasons. */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - return 0; } -- Gitee From 440608cfb114338522f66d1aadcaf6d0822bb925 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 23 Jun 2021 14:01:38 +0200 Subject: [PATCH 015/188] x86/fpu: Simplify PTRACE_GETREGS code mainline inclusion from mainline-v5.14-rc1 commit 3a3351126ee8f1f1c86c4c79c60a650c1da89733 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 3a3351126ee8 x86/fpu: Simplify PTRACE_GETREGS code. -------------------------------- ptrace() has interfaces that let a ptracer inspect a ptracee's register state. This includes XSAVE state. The ptrace() ABI includes a hardware-format XSAVE buffer for both the SETREGS and GETREGS interfaces. In the old days, the kernel buffer and the ptrace() ABI buffer were the same boring non-compacted format. But, since the advent of supervisor states and the compacted format, the kernel buffer has diverged from the format presented in the ABI. This leads to two paths in the kernel: 1. Effectively a verbatim copy_to_user() which just copies the kernel buffer out to userspace. This is used when the kernel buffer is kept in the non-compacted form which means that it shares a format with the ptrace ABI. 2. A one-state-at-a-time path: copy_xstate_to_kernel(). This is theoretically slower since it does a bunch of piecemeal copies. Remove the verbatim copy case. Speed probably does not matter in this path, and the vast majority of new hardware will use the one-state-at-a-time path anyway. This ensures greater testing for the "slow" path. This also makes enabling PKRU in this interface easier since a single path can be patched instead of two. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.408457100@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/regset.c | 24 +++--------------------- arch/x86/kernel/fpu/xstate.c | 6 +++--- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index a50c0a935499..d60e77d39222 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -77,32 +77,14 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { struct fpu *fpu = &target->thread.fpu; - struct xregs_state *xsave; - if (!boot_cpu_has(X86_FEATURE_XSAVE)) + if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) return -ENODEV; - xsave = &fpu->state.xsave; - fpu__prepare_read(fpu); - if (using_compacted_format()) { - copy_xstate_to_kernel(to, xsave); - return 0; - } else { - fpstate_sanitize_xstate(fpu); - /* - * Copy the 48 bytes defined by the software into the xsave - * area in the thread struct, so that we can copy the whole - * area to user using one user_regset_copyout(). - */ - memcpy(&xsave->i387.sw_reserved, xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); - - /* - * Copy the xstate memory layout. - */ - return membuf_write(&to, xsave, fpu_user_xstate_size); - } + copy_xstate_to_kernel(to, &fpu->state.xsave); + return 0; } int xstateregs_set(struct task_struct *target, const struct user_regset *regset, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f286c5d21561..70fd9c62edf3 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1069,11 +1069,11 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, } /* - * Convert from kernel XSAVES compacted format to standard format and copy - * to a kernel-space ptrace buffer. + * Convert from kernel XSAVE or XSAVES compacted format to UABI + * non-compacted format and copy to a kernel-space ptrace buffer. * * It supports partial copy but pos always starts from zero. This is called - * from xstateregs_get() and there we check the CPU has XSAVES. + * from xstateregs_get() and there we check the CPU has XSAVE. */ void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) { -- Gitee From a364d062e34eb6436973b2c702a72b1bcea02c32 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 23 Jun 2021 14:01:39 +0200 Subject: [PATCH 016/188] x86/fpu: Rewrite xfpregs_set() mainline inclusion from mainline-v5.14-rc1 commit 6164331d15f7d912fb9369245368e9564ea49813 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 6164331d15f7 x86/fpu: Rewrite xfpregs_set(). -------------------------------- xfpregs_set() was incomprehensible. Almost all of the complexity was due to trying to support nonsensically sized writes or -EFAULT errors that would have partially or completely overwritten the destination before failing. Nonsensically sized input would only have been possible using PTRACE_SETREGSET on REGSET_XFP. Fortunately, it appears (based on Debian code search results) that no one uses that API at all, let alone with the wrong sized buffer. Failed user access can be handled more cleanly by first copying to kernel memory. Just rewrite it to require sensible input. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.504234607@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/regset.c | 37 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index d60e77d39222..f24ce873bfc2 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -47,30 +47,39 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; + struct user32_fxsr_struct newstate; int ret; - if (!boot_cpu_has(X86_FEATURE_FXSR)) + BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state)); + + if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; + /* No funny business with partial or oversized writes is permitted. */ + if (pos != 0 || count != sizeof(newstate)) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); + if (ret) + return ret; + + /* Mask invalid MXCSR bits (for historical reasons). */ + newstate.mxcsr &= mxcsr_feature_mask; + fpu__prepare_write(fpu); - fpstate_sanitize_xstate(fpu); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &fpu->state.fxsave, 0, -1); + /* Copy the state */ + memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate)); - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; + /* Clear xmm8..15 */ + BUILD_BUG_ON(sizeof(fpu->state.fxsave.xmm_space) != 16 * 16); + memset(&fpu->state.fxsave.xmm_space[8], 0, 8 * 16); - /* - * update the header bits in the xsave header, indicating the - * presence of FP and SSE state. - */ - if (boot_cpu_has(X86_FEATURE_XSAVE)) + /* Mark FP and SSE as in use when XSAVE is enabled */ + if (use_xsave()) fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; - return ret; + return 0; } int xstateregs_get(struct task_struct *target, const struct user_regset *regset, -- Gitee From 41d4cce0fd9d52cf8f50db25f30b62bee7cdbe09 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 23 Jun 2021 14:01:40 +0200 Subject: [PATCH 017/188] x86/fpu: Fail ptrace() requests that try to set invalid MXCSR values mainline inclusion from mainline-v5.14-rc1 commit 145e9e0d8c6fada4a40f9fc65b34658077874d9c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 145e9e0d8c6f x86/fpu: Fail ptrace() requests that try to set invalid MXCSR values. -------------------------------- There is no benefit from accepting and silently changing an invalid MXCSR value supplied via ptrace(). Instead, return -EINVAL on invalid input. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.613614842@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/regset.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index f24ce873bfc2..5610f77cacad 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -63,8 +63,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - /* Mask invalid MXCSR bits (for historical reasons). */ - newstate.mxcsr &= mxcsr_feature_mask; + /* Do not allow an invalid MXCSR value. */ + if (newstate.mxcsr & ~mxcsr_feature_mask) + return -EINVAL; fpu__prepare_write(fpu); -- Gitee From 156d480d76d227abd4affd2bb3668ec1aa596949 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 23 Jun 2021 14:01:41 +0200 Subject: [PATCH 018/188] x86/fpu: Clean up fpregs_set() mainline inclusion from mainline-v5.14-rc1 commit da53f60bb86e60830932926cf1093953a811912c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit da53f60bb86e x86/fpu: Clean up fpregs_set(). -------------------------------- fpregs_set() has unnecessary complexity to support short or nonzero-offset writes and to handle the case in which a copy from userspace overwrites some of the target buffer and then fails. Support for partial writes is useless -- just require that the write has offset 0 and the correct size, and copy into a temporary kernel buffer to avoid clobbering the state if the user access fails. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.710467587@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/regset.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 5610f77cacad..7041b140de60 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -304,31 +304,32 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - fpu__prepare_write(fpu); - fpstate_sanitize_xstate(fpu); + /* No funny business with partial or oversized writes is permitted. */ + if (pos != 0 || count != sizeof(struct user_i387_ia32_struct)) + return -EINVAL; - if (!boot_cpu_has(X86_FEATURE_FPU)) + if (!cpu_feature_enabled(X86_FEATURE_FPU)) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - if (!boot_cpu_has(X86_FEATURE_FXSR)) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &fpu->state.fsave, 0, - -1); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); + if (ret) + return ret; - if (pos > 0 || count < sizeof(env)) - convert_from_fxsr(&env, target); + fpu__prepare_write(fpu); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); - if (!ret) - convert_to_fxsr(&target->thread.fpu.state.fxsave, &env); + if (cpu_feature_enabled(X86_FEATURE_FXSR)) + convert_to_fxsr(&fpu->state.fxsave, &env); + else + memcpy(&fpu->state.fsave, &env, sizeof(env)); /* - * update the header bit in the xsave header, indicating the + * Update the header bit in the xsave header, indicating the * presence of FP. */ - if (boot_cpu_has(X86_FEATURE_XSAVE)) + if (cpu_feature_enabled(X86_FEATURE_XSAVE)) fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; - return ret; + + return 0; } #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ -- Gitee From b452b49511fd8c24d7e4face27f8d23b2c74d322 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:42 +0200 Subject: [PATCH 019/188] x86/fpu: Make copy_xstate_to_kernel() usable for [x]fpregs_get() mainline inclusion from mainline-v5.14-rc1 commit eb6f51723f03c9a1c098ed196a31a03e626b9fb6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit eb6f51723f03 x86/fpu: Make copy_xstate_to_kernel() usable for [x]fpregs_get(). -------------------------------- When xsave with init state optimization is used then a component's state in the task's xsave buffer can be stale when the corresponding feature bit is not set. fpregs_get() and xfpregs_get() invoke fpstate_sanitize_xstate() to update the task's xsave buffer before retrieving the FX or FP state. That's just duplicated code as copy_xstate_to_kernel() already handles this correctly. Add a copy mode argument to the function which allows to restrict the state copy to the FP and SSE features. Also rename the function to copy_xstate_to_uabi_buf() so the name reflects what it is doing. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.805327286@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 12 +++++++-- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/xstate.c | 42 +++++++++++++++++++++++-------- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 1bb2d16f485b..732ae793c2ab 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -103,12 +103,20 @@ extern void __init update_regset_xstate_info(unsigned int size, void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int using_compacted_format(void); int xfeature_size(int xfeature_nr); -struct membuf; -void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); void copy_supervisor_to_kernel(struct xregs_state *xsave); void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask); +enum xstate_copy_mode { + XSTATE_COPY_FP, + XSTATE_COPY_FX, + XSTATE_COPY_XSAVE, +}; + +struct membuf; +void copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, + enum xstate_copy_mode mode); + #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 7041b140de60..783f84dfcd46 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -93,7 +93,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, fpu__prepare_read(fpu); - copy_xstate_to_kernel(to, &fpu->state.xsave); + copy_xstate_to_uabi_buf(to, &fpu->state.xsave, XSTATE_COPY_XSAVE); return 0; } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 70fd9c62edf3..549500447a75 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1068,14 +1068,20 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, membuf_write(to, from_xstate ? xstate : init_xstate, size); } -/* - * Convert from kernel XSAVE or XSAVES compacted format to UABI - * non-compacted format and copy to a kernel-space ptrace buffer. +/** + * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer + * @to: membuf descriptor + * @xsave: The kernel xstate buffer to copy from + * @copy_mode: The requested copy mode + * + * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming + * format, i.e. from the kernel internal hardware dependent storage format + * to the requested @mode. UABI XSTATE is always uncompacted! * - * It supports partial copy but pos always starts from zero. This is called - * from xstateregs_get() and there we check the CPU has XSAVE. + * It supports partial copy but @to.pos always starts from zero. */ -void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) +void copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, + enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.xsave; @@ -1083,12 +1089,22 @@ void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) unsigned int zerofrom; int i; - /* - * The destination is a ptrace buffer; we put in only user xstates: - */ - memset(&header, 0, sizeof(header)); header.xfeatures = xsave->header.xfeatures; - header.xfeatures &= xfeatures_mask_user(); + + /* Mask out the feature bits depending on copy mode */ + switch (copy_mode) { + case XSTATE_COPY_FP: + header.xfeatures &= XFEATURE_MASK_FP; + break; + + case XSTATE_COPY_FX: + header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE; + break; + + case XSTATE_COPY_XSAVE: + header.xfeatures &= xfeatures_mask_user(); + break; + } /* Copy FP state up to MXCSR */ copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387, @@ -1109,6 +1125,9 @@ void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space, sizeof(xsave->i387.xmm_space)); + if (copy_mode != XSTATE_COPY_XSAVE) + goto out; + /* Zero the padding area */ membuf_zero(&to, sizeof(xsave->i387.padding)); @@ -1150,6 +1169,7 @@ void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) zerofrom = xstate_offsets[i] + xstate_sizes[i]; } +out: if (to.left) membuf_zero(&to, to.left); } -- Gitee From dc8c35258244f320dd96dcf412207023054c8a1f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:43 +0200 Subject: [PATCH 020/188] x86/fpu: Use copy_xstate_to_uabi_buf() in xfpregs_get() mainline inclusion from mainline-v5.14-rc1 commit adc997b3d66d1cfa8c15a7dbafdaef239a51b5db category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit adc997b3d66d x86/fpu: Use copy_xstate_to_uabi_buf() in xfpregs_get(). -------------------------------- Use the new functionality of copy_xstate_to_uabi_buf() to retrieve the FX state when XSAVE* is in use. This avoids overwriting the FPU state buffer with fpstate_sanitize_xstate() which is error prone and duplicated code. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121452.901736860@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/regset.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 783f84dfcd46..ccbe25f6627d 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -33,13 +33,18 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, { struct fpu *fpu = &target->thread.fpu; - if (!boot_cpu_has(X86_FEATURE_FXSR)) + if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; fpu__prepare_read(fpu); - fpstate_sanitize_xstate(fpu); - return membuf_write(&to, &fpu->state.fxsave, sizeof(struct fxregs_state)); + if (!use_xsave()) { + return membuf_write(&to, &fpu->state.fxsave, + sizeof(fpu->state.fxsave)); + } + + copy_xstate_to_uabi_buf(to, &fpu->state.xsave, XSTATE_COPY_FX); + return 0; } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, -- Gitee From 773688d94c7689a4a68094fea95f03657f0ed00b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:44 +0200 Subject: [PATCH 021/188] x86/fpu: Use copy_xstate_to_uabi_buf() in fpregs_get() mainline inclusion from mainline-v5.14-rc1 commit 3f7f75634ccefefcc929696f346db7a748e78f79 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 3f7f75634cce x86/fpu: Use copy_xstate_to_uabi_buf() in fpregs_get(). -------------------------------- Use the new functionality of copy_xstate_to_uabi_buf() to retrieve the FX state when XSAVE* is in use. This avoids to overwrite the FPU state buffer with fpstate_sanitize_xstate() which is error prone and duplicated code. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.014441775@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/regset.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index ccbe25f6627d..4b799e42ebc7 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -210,10 +210,10 @@ static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave) * FXSR floating point environment conversions. */ -void -convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) +static void __convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk, + struct fxregs_state *fxsave) { - struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -247,6 +247,12 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) memcpy(&to[i], &from[i], sizeof(to[0])); } +void +convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) +{ + __convert_from_fxsr(env, tsk, &tsk->thread.fpu.state.fxsave); +} + void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env) @@ -279,25 +285,29 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, { struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; + struct fxregs_state fxsave, *fx; fpu__prepare_read(fpu); - if (!boot_cpu_has(X86_FEATURE_FPU)) + if (!cpu_feature_enabled(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, to); - if (!boot_cpu_has(X86_FEATURE_FXSR)) { + if (!cpu_feature_enabled(X86_FEATURE_FXSR)) { return membuf_write(&to, &fpu->state.fsave, sizeof(struct fregs_state)); } - fpstate_sanitize_xstate(fpu); + if (use_xsave()) { + struct membuf mb = { .p = &fxsave, .left = sizeof(fxsave) }; - if (to.left == sizeof(env)) { - convert_from_fxsr(to.p, target); - return 0; + /* Handle init state optimized xstate correctly */ + copy_xstate_to_uabi_buf(mb, &fpu->state.xsave, XSTATE_COPY_FP); + fx = &fxsave; + } else { + fx = &fpu->state.fxsave; } - convert_from_fxsr(&env, target); + __convert_from_fxsr(&env, target, fx); return membuf_write(&to, &env, sizeof(env)); } -- Gitee From 7af3d3136fe8496cc82e0c0502d5fdbd38a18285 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:45 +0200 Subject: [PATCH 022/188] x86/fpu: Remove fpstate_sanitize_xstate() mainline inclusion from mainline-v5.14-rc1 commit afac9e894364418731d1d7e66c1118b31fd130e8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit afac9e894364 x86/fpu: Remove fpstate_sanitize_xstate(). -------------------------------- No more users. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.124819167@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 2 - arch/x86/kernel/fpu/xstate.c | 79 ----------------------------- 2 files changed, 81 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index ffa48b852300..b3f5aad3e822 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -86,8 +86,6 @@ extern void fpstate_init_soft(struct swregs_state *soft); static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif -extern void fpstate_sanitize_xstate(struct fpu *fpu); - /* Returns 0 or the negated trap number, which results in -EFAULT for #PF */ #define user_insn(insn, output, input...) \ ({ \ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 549500447a75..af88bb25cea2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -128,85 +128,6 @@ static bool xfeature_is_supervisor(int xfeature_nr) return ecx & 1; } -/* - * When executing XSAVEOPT (or other optimized XSAVE instructions), if - * a processor implementation detects that an FPU state component is still - * (or is again) in its initialized state, it may clear the corresponding - * bit in the header.xfeatures field, and can skip the writeout of registers - * to the corresponding memory layout. - * - * This means that when the bit is zero, the state component might still contain - * some previous - non-initialized register state. - * - * Before writing xstate information to user-space we sanitize those components, - * to always ensure that the memory layout of a feature will be in the init state - * if the corresponding header bit is zero. This is to ensure that user-space doesn't - * see some stale state in the memory layout during signal handling, debugging etc. - */ -void fpstate_sanitize_xstate(struct fpu *fpu) -{ - struct fxregs_state *fx = &fpu->state.fxsave; - int feature_bit; - u64 xfeatures; - - if (!use_xsaveopt()) - return; - - xfeatures = fpu->state.xsave.header.xfeatures; - - /* - * None of the feature bits are in init state. So nothing else - * to do for us, as the memory layout is up to date. - */ - if ((xfeatures & xfeatures_mask_all) == xfeatures_mask_all) - return; - - /* - * FP is in init state - */ - if (!(xfeatures & XFEATURE_MASK_FP)) { - fx->cwd = 0x37f; - fx->swd = 0; - fx->twd = 0; - fx->fop = 0; - fx->rip = 0; - fx->rdp = 0; - memset(&fx->st_space[0], 0, 128); - } - - /* - * SSE is in init state - */ - if (!(xfeatures & XFEATURE_MASK_SSE)) - memset(&fx->xmm_space[0], 0, 256); - - /* - * First two features are FPU and SSE, which above we handled - * in a special way already: - */ - feature_bit = 0x2; - xfeatures = (xfeatures_mask_user() & ~xfeatures) >> 2; - - /* - * Update all the remaining memory layouts according to their - * standard xstate layout, if their header bit is in the init - * state: - */ - while (xfeatures) { - if (xfeatures & 0x1) { - int offset = xstate_comp_offsets[feature_bit]; - int size = xstate_sizes[feature_bit]; - - memcpy((void *)fx + offset, - (void *)&init_fpstate.xsave + offset, - size); - } - - xfeatures >>= 1; - feature_bit++; - } -} - /* * Enable the extended processor state save/restore feature. * Called once per CPU onlining. -- Gitee From 5b1a24960589ae601eeca9d202f902f14bd8c1f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:46 +0200 Subject: [PATCH 023/188] x86/fpu/regset: Move fpu__read_begin() into regset mainline inclusion from mainline-v5.14-rc1 commit 5a32fac8dbe8adc08c10e2c8770c95aebfc627cd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 5a32fac8dbe8 x86/fpu/regset: Move fpu__read_begin() into regset. -------------------------------- The function can only be used from the regset get() callbacks safely. So there is no reason to have it globally exposed. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.234942936@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 1 - arch/x86/kernel/fpu/core.c | 20 -------------------- arch/x86/kernel/fpu/regset.c | 22 +++++++++++++++++++--- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index b3f5aad3e822..74c4861fe866 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -26,7 +26,6 @@ /* * High level FPU state handling functions: */ -extern void fpu__prepare_read(struct fpu *fpu); extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9ff467bb4894..d390644e5fa1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -281,26 +281,6 @@ static void fpu__initialize(struct fpu *fpu) trace_x86_fpu_init_state(fpu); } -/* - * This function must be called before we read a task's fpstate. - * - * There's two cases where this gets called: - * - * - for the current task (when coredumping), in which case we have - * to save the latest FPU registers into the fpstate, - * - * - or it's called for stopped tasks (ptrace), in which case the - * registers were already saved by the context-switch code when - * the task scheduled out. - * - * If the task has used the FPU before then save it. - */ -void fpu__prepare_read(struct fpu *fpu) -{ - if (fpu == ¤t->thread.fpu) - fpu__save(fpu); -} - /* * This function must be called before we write a task's fpstate. * diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 4b799e42ebc7..937adf702b37 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -28,6 +28,22 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r return 0; } +/* + * The regset get() functions are invoked from: + * + * - coredump to dump the current task's fpstate. If the current task + * owns the FPU then the memory state has to be synchronized and the + * FPU register state preserved. Otherwise fpstate is already in sync. + * + * - ptrace to dump fpstate of a stopped task, in which case the registers + * have already been saved to fpstate on context switch. + */ +static void sync_fpstate(struct fpu *fpu) +{ + if (fpu == ¤t->thread.fpu) + fpu__save(fpu); +} + int xfpregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { @@ -36,7 +52,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; - fpu__prepare_read(fpu); + sync_fpstate(fpu); if (!use_xsave()) { return membuf_write(&to, &fpu->state.fxsave, @@ -96,7 +112,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) return -ENODEV; - fpu__prepare_read(fpu); + sync_fpstate(fpu); copy_xstate_to_uabi_buf(to, &fpu->state.xsave, XSTATE_COPY_XSAVE); return 0; @@ -287,7 +303,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; struct fxregs_state fxsave, *fx; - fpu__prepare_read(fpu); + sync_fpstate(fpu); if (!cpu_feature_enabled(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, to); -- Gitee From f76ac48e93ef13e3c3dfcb23465b148813077026 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:47 +0200 Subject: [PATCH 024/188] x86/fpu: Move fpu__write_begin() to regset mainline inclusion from mainline-v5.14-rc1 commit dbb60ac764581e62f2116c5a6b8926ba3a872dd4 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit dbb60ac76458 x86/fpu: Move fpu__write_begin() to regset. -------------------------------- The only usecase for fpu__write_begin is the set() callback of regset, so the function is pointlessly global. Move it to the regset code and rename it to fpu_force_restore() which is exactly decribing what the function does. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.328652975@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 1 - arch/x86/kernel/fpu/core.c | 24 ------------------------ arch/x86/kernel/fpu/regset.c | 25 ++++++++++++++++++++++--- 3 files changed, 22 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 74c4861fe866..63b7709df887 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -26,7 +26,6 @@ /* * High level FPU state handling functions: */ -extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d390644e5fa1..22437011f4f0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -281,30 +281,6 @@ static void fpu__initialize(struct fpu *fpu) trace_x86_fpu_init_state(fpu); } -/* - * This function must be called before we write a task's fpstate. - * - * Invalidate any cached FPU registers. - * - * After this function call, after registers in the fpstate are - * modified and the child task has woken up, the child task will - * restore the modified FPU state from the modified context. If we - * didn't clear its cached status here then the cached in-registers - * state pending on its former CPU could be restored, corrupting - * the modifications. - */ -void fpu__prepare_write(struct fpu *fpu) -{ - /* - * Only stopped child tasks can be used to modify the FPU - * state in the fpstate buffer: - */ - WARN_ON_FPU(fpu == ¤t->thread.fpu); - - /* Invalidate any cached state: */ - __fpu_invalidate_fpregs_state(fpu); -} - /* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 937adf702b37..ddc290d9bf8e 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -44,6 +44,25 @@ static void sync_fpstate(struct fpu *fpu) fpu__save(fpu); } +/* + * Invalidate cached FPU registers before modifying the stopped target + * task's fpstate. + * + * This forces the target task on resume to restore the FPU registers from + * modified fpstate. Otherwise the task might skip the restore and operate + * with the cached FPU registers which discards the modifications. + */ +static void fpu_force_restore(struct fpu *fpu) +{ + /* + * Only stopped child tasks can be used to modify the FPU + * state in the fpstate buffer: + */ + WARN_ON_FPU(fpu == ¤t->thread.fpu); + + __fpu_invalidate_fpregs_state(fpu); +} + int xfpregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { @@ -88,7 +107,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (newstate.mxcsr & ~mxcsr_feature_mask) return -EINVAL; - fpu__prepare_write(fpu); + fpu_force_restore(fpu); /* Copy the state */ memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate)); @@ -146,7 +165,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, } } - fpu__prepare_write(fpu); + fpu_force_restore(fpu); ret = copy_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf); out: @@ -346,7 +365,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - fpu__prepare_write(fpu); + fpu_force_restore(fpu); if (cpu_feature_enabled(X86_FEATURE_FXSR)) convert_to_fxsr(&fpu->state.fxsave, &env); -- Gitee From c4a524e9adf74adde3d195aa5b1797f07f001278 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:48 +0200 Subject: [PATCH 025/188] x86/fpu: Get rid of using_compacted_format() mainline inclusion from mainline-v5.14-rc1 commit 02b93c0b00df222b9ccf7a1fbd0eb59353d0a58c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 02b93c0b00df x86/fpu: Get rid of using_compacted_format(). -------------------------------- This function is pointlessly global and a complete misnomer because it's usage is related to both supervisor state checks and compacted format checks. Remove it and just make the conditions check the XSAVES feature. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.425493349@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 1 - arch/x86/kernel/fpu/xstate.c | 22 ++++------------------ 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 732ae793c2ab..6e5ba42ffd42 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -101,7 +101,6 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); -int using_compacted_format(void); int xfeature_size(int xfeature_nr); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index af88bb25cea2..3513d664c95c 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -449,20 +449,6 @@ int xfeature_size(int xfeature_nr) return eax; } -/* - * 'XSAVES' implies two different things: - * 1. saving of supervisor/system state - * 2. using the compacted format - * - * Use this function when dealing with the compacted format so - * that it is obvious which aspect of 'XSAVES' is being handled - * by the calling code. - */ -int using_compacted_format(void) -{ - return boot_cpu_has(X86_FEATURE_XSAVES); -} - /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ static int validate_user_xstate_header(const struct xstate_header *hdr) { @@ -581,9 +567,9 @@ static void do_extra_xstate_size_checks(void) check_xstate_against_struct(i); /* * Supervisor state components can be managed only by - * XSAVES, which is compacted-format only. + * XSAVES. */ - if (!using_compacted_format()) + if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) XSTATE_WARN_ON(xfeature_is_supervisor(i)); /* Align from the end of the previous feature */ @@ -593,9 +579,9 @@ static void do_extra_xstate_size_checks(void) * The offset of a given state in the non-compacted * format is given to us in a CPUID leaf. We check * them for being ordered (increasing offsets) in - * setup_xstate_features(). + * setup_xstate_features(). XSAVES uses compacted format. */ - if (!using_compacted_format()) + if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) paranoid_xstate_size = xfeature_uncompacted_offset(i); /* * The compacted-format offset always depends on where -- Gitee From b4de00625c17a8d9ffb720ceea84a66e2848cc22 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 23 Jun 2021 14:01:49 +0200 Subject: [PATCH 026/188] x86/kvm: Avoid looking up PKRU in XSAVE buffer mainline inclusion from mainline-v5.14-rc1 commit 71ef453355a9197fcfd8ff22391a4ad7861d79e6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 71ef453355a9 x86/kvm: Avoid looking up PKRU in XSAVE buffer. -------------------------------- PKRU is being removed from the kernel XSAVE/FPU buffers. This removal will probably include warnings for code that look up PKRU in those buffers. KVM currently looks up the location of PKRU but doesn't even use the pointer that it gets back. Rework the code to avoid calling get_xsave_addr() except in cases where its result is actually used. This makes the code more clear and also avoids the inevitable PKRU warnings. This is probably a good cleanup and could go upstream idependently of any PKRU rework. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.541037562@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kvm/x86.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e43760895eec..dc1d4ea948fd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4542,20 +4542,21 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { + u32 size, offset, ecx, edx; u64 xfeature_mask = valid & -valid; int xfeature_nr = fls64(xfeature_mask) - 1; - void *src = get_xsave_addr(xsave, xfeature_nr); - - if (src) { - u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, xfeature_nr, - &size, &offset, &ecx, &edx); - if (xfeature_nr == XFEATURE_PKRU) - memcpy(dest + offset, &vcpu->arch.pkru, - sizeof(vcpu->arch.pkru)); - else - memcpy(dest + offset, src, size); + void *src; + + cpuid_count(XSTATE_CPUID, xfeature_nr, + &size, &offset, &ecx, &edx); + if (xfeature_nr == XFEATURE_PKRU) { + memcpy(dest + offset, &vcpu->arch.pkru, + sizeof(vcpu->arch.pkru)); + } else { + src = get_xsave_addr(xsave, xfeature_nr); + if (src) + memcpy(dest + offset, src, size); } valid -= xfeature_mask; @@ -4585,18 +4586,20 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { + u32 size, offset, ecx, edx; u64 xfeature_mask = valid & -valid; int xfeature_nr = fls64(xfeature_mask) - 1; - void *dest = get_xsave_addr(xsave, xfeature_nr); - - if (dest) { - u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, xfeature_nr, - &size, &offset, &ecx, &edx); - if (xfeature_nr == XFEATURE_PKRU) - memcpy(&vcpu->arch.pkru, src + offset, - sizeof(vcpu->arch.pkru)); - else + + cpuid_count(XSTATE_CPUID, xfeature_nr, + &size, &offset, &ecx, &edx); + + if (xfeature_nr == XFEATURE_PKRU) { + memcpy(&vcpu->arch.pkru, src + offset, + sizeof(vcpu->arch.pkru)); + } else { + void *dest = get_xsave_addr(xsave, xfeature_nr); + + if (dest) memcpy(dest, src + offset, size); } -- Gitee From b8c79608298056729fe509b9f31b9f957f46f7bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:50 +0200 Subject: [PATCH 027/188] x86/fpu: Cleanup arch_set_user_pkey_access() mainline inclusion from mainline-v5.14-rc1 commit 9fe8a6f5eed8fff6b2d7dbc99b911334e311732d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 9fe8a6f5eed8 x86/fpu: Cleanup arch_set_user_pkey_access(). -------------------------------- The function does a sanity check with a WARN_ON_ONCE() but happily proceeds when the pkey argument is out of range. Clean it up. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.635764326@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3513d664c95c..440b0d8c15e6 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -912,11 +912,10 @@ EXPORT_SYMBOL_GPL(get_xsave_addr); * rights for @pkey to @init_val. */ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, - unsigned long init_val) + unsigned long init_val) { - u32 old_pkru; - int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); - u32 new_pkru_bits = 0; + u32 old_pkru, new_pkru_bits = 0; + int pkey_shift; /* * This check implies XSAVE support. OSPKE only gets @@ -930,7 +929,8 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, * values originating from in-kernel users. Complain * if a bad value is observed. */ - WARN_ON_ONCE(pkey >= arch_max_pkey()); + if (WARN_ON_ONCE(pkey >= arch_max_pkey())) + return -EINVAL; /* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) @@ -939,6 +939,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, new_pkru_bits |= PKRU_WD_BIT; /* Shift the bits in to the correct place in PKRU for pkey: */ + pkey_shift = pkey * PKRU_BITS_PER_PKEY; new_pkru_bits <<= pkey_shift; /* Get old PKRU and mask off any old bits in place: */ -- Gitee From 23eb334da2ebebda782037b47939d5cc444df072 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:51 +0200 Subject: [PATCH 028/188] x86/fpu: Get rid of copy_supervisor_to_kernel() mainline inclusion from mainline-v5.14-rc1 commit 1f3171252dc586745bb548d48f3bcedfea34b58d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1f3171252dc5 x86/fpu: Get rid of copy_supervisor_to_kernel(). -------------------------------- If the fast path of restoring the FPU state on sigreturn fails or is not taken and the current task's FPU is active then the FPU has to be deactivated for the slow path to allow a safe update of the tasks FPU memory state. With supervisor states enabled, this requires to save the supervisor state in the memory state first. Supervisor states require XSAVES so saving only the supervisor state requires to reshuffle the memory buffer because XSAVES uses the compacted format and therefore stores the supervisor states at the beginning of the memory state. That's just an overengineered optimization. Get rid of it and save the full state for this case. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.734561971@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 1 - arch/x86/kernel/fpu/signal.c | 13 +++++--- arch/x86/kernel/fpu/xstate.c | 55 ------------------------------- 3 files changed, 8 insertions(+), 61 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 6e5ba42ffd42..6611e069e834 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -104,7 +104,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int xfeature_size(int xfeature_nr); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); -void copy_supervisor_to_kernel(struct xregs_state *xsave); void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 888c8e0d2ff1..501059579066 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -401,15 +401,18 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * the optimisation). */ fpregs_lock(); - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - /* - * Supervisor states are not modified by user space input. Save - * current supervisor states first and invalidate the FPU regs. + * If supervisor states are available then save the + * hardware state in current's fpstate so that the + * supervisor state is preserved. Save the full state for + * simplicity. There is no point in optimizing this by only + * saving the supervisor states and then shuffle them to + * the right place in memory. This is the slow path and the + * above XRSTOR failed or ia32_fxstate is true. Shrug. */ if (xfeatures_mask_supervisor()) - copy_supervisor_to_kernel(&fpu->state.xsave); + copy_xregs_to_kernel(&fpu->state.xsave); set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 440b0d8c15e6..0c65e3d817c9 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1204,61 +1204,6 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) return 0; } -/* - * Save only supervisor states to the kernel buffer. This blows away all - * old states, and is intended to be used only in __fpu__restore_sig(), where - * user states are restored from the user buffer. - */ -void copy_supervisor_to_kernel(struct xregs_state *xstate) -{ - struct xstate_header *header; - u64 max_bit, min_bit; - u32 lmask, hmask; - int err, i; - - if (WARN_ON(!boot_cpu_has(X86_FEATURE_XSAVES))) - return; - - if (!xfeatures_mask_supervisor()) - return; - - max_bit = __fls(xfeatures_mask_supervisor()); - min_bit = __ffs(xfeatures_mask_supervisor()); - - lmask = xfeatures_mask_supervisor(); - hmask = xfeatures_mask_supervisor() >> 32; - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - if (WARN_ON_FPU(err)) - return; - - /* - * At this point, the buffer has only supervisor states and must be - * converted back to normal kernel format. - */ - header = &xstate->header; - header->xcomp_bv |= xfeatures_mask_all; - - /* - * This only moves states up in the buffer. Start with - * the last state and move backwards so that states are - * not overwritten until after they are moved. Note: - * memmove() allows overlapping src/dst buffers. - */ - for (i = max_bit; i >= min_bit; i--) { - u8 *xbuf = (u8 *)xstate; - - if (!((header->xfeatures >> i) & 1)) - continue; - - /* Move xfeature 'i' into its normal location */ - memmove(xbuf + xstate_comp_offsets[i], - xbuf + xstate_supervisor_only_offsets[i], - xstate_sizes[i]); - } -} - /** * copy_dynamic_supervisor_to_kernel() - Save dynamic supervisor states to * an xsave area -- Gitee From aa18f937e9b7cb98925cd913c72d74af7dbf68d0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:52 +0200 Subject: [PATCH 029/188] x86/fpu: Rename copy_xregs_to_kernel() and copy_kernel_to_xregs() mainline inclusion from mainline-v5.14-rc1 commit b16313f71c1050ad5c92548925e0e9cec26989ab category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit b16313f71c10 x86/fpu: Rename copy_xregs_to_kernel() and copy_kernel_to_xregs(). -------------------------------- The function names for xsave[s]/xrstor[s] operations are horribly named and a permanent source of confusion. Rename: copy_xregs_to_kernel() to os_xsave() copy_kernel_to_xregs() to os_xrstor() These are truly low level wrappers around the actual instructions XSAVE[OPT]/XRSTOR and XSAVES/XRSTORS with the twist that the selection based on the available CPU features happens with an alternative to avoid conditionals all over the place and to provide the best performance for hot paths. The os_ prefix tells that this is the OS selected mechanism. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.830239347@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 17 +++++++++++------ arch/x86/kernel/fpu/core.c | 7 +++---- arch/x86/kernel/fpu/signal.c | 21 +++++++++++---------- arch/x86/kernel/fpu/xstate.c | 2 +- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 63b7709df887..c56561f28e72 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -266,7 +266,7 @@ static inline void fxsave(struct fxregs_state *fx) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) +static inline void os_xrstor_booting(struct xregs_state *xstate) { u64 mask = -1; u32 lmask = mask; @@ -289,8 +289,11 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) /* * Save processor xstate to xsave area. + * + * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features + * and command line options. The choice is permanent until the next reboot. */ -static inline void copy_xregs_to_kernel(struct xregs_state *xstate) +static inline void os_xsave(struct xregs_state *xstate) { u64 mask = xfeatures_mask_all; u32 lmask = mask; @@ -307,8 +310,10 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) /* * Restore processor xstate from xsave area. + * + * Uses XRSTORS when XSAVES is used, XRSTOR otherwise. */ -static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) +static inline void os_xrstor(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; @@ -369,13 +374,13 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) * Restore xstate from kernel space xsave area, return an error code instead of * an exception. */ -static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) +static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; int err; - if (static_cpu_has(X86_FEATURE_XSAVES)) + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); @@ -388,7 +393,7 @@ extern int copy_fpregs_to_fpstate(struct fpu *fpu); static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, mask); + os_xrstor(&fpstate->xsave, mask); } else { if (use_fxsr()) copy_kernel_to_fxregs(&fpstate->fxsave); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 22437011f4f0..bfdcf7fd63e3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -95,7 +95,7 @@ EXPORT_SYMBOL(irq_fpu_usable); int copy_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { - copy_xregs_to_kernel(&fpu->state.xsave); + os_xsave(&fpu->state.xsave); /* * AVX512 state is tracked here because its use is @@ -314,7 +314,7 @@ void fpu__drop(struct fpu *fpu) static inline void copy_init_fpstate_to_fpregs(u64 features_mask) { if (use_xsave()) - copy_kernel_to_xregs(&init_fpstate.xsave, features_mask); + os_xrstor(&init_fpstate.xsave, features_mask); else if (static_cpu_has(X86_FEATURE_FXSR)) copy_kernel_to_fxregs(&init_fpstate.fxsave); else @@ -345,8 +345,7 @@ static void fpu__clear(struct fpu *fpu, bool user_only) if (user_only) { if (!fpregs_state_valid(fpu, smp_processor_id()) && xfeatures_mask_supervisor()) - copy_kernel_to_xregs(&fpu->state.xsave, - xfeatures_mask_supervisor()); + os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); copy_init_fpstate_to_fpregs(xfeatures_mask_user()); } else { copy_init_fpstate_to_fpregs(xfeatures_mask_all); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 501059579066..33675b3dad0e 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -261,14 +261,14 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) r = copy_user_to_fxregs(buf); if (!r) - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + os_xrstor(&init_fpstate.xsave, init_bv); return r; } else { init_bv = xfeatures_mask_user() & ~xbv; r = copy_user_to_xregs(buf, xbv); if (!r && unlikely(init_bv)) - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + os_xrstor(&init_fpstate.xsave, init_bv); return r; } } else if (use_fxsr()) { @@ -356,9 +356,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * has been copied to the kernel one. */ if (test_thread_flag(TIF_NEED_FPU_LOAD) && - xfeatures_mask_supervisor()) - copy_kernel_to_xregs(&fpu->state.xsave, - xfeatures_mask_supervisor()); + xfeatures_mask_supervisor()) { + os_xrstor(&fpu->state.xsave, + xfeatures_mask_supervisor()); + } fpregs_mark_activate(); fpregs_unlock(); return 0; @@ -412,7 +413,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * above XRSTOR failed or ia32_fxstate is true. Shrug. */ if (xfeatures_mask_supervisor()) - copy_xregs_to_kernel(&fpu->state.xsave); + os_xsave(&fpu->state.xsave); set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); @@ -430,14 +431,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_lock(); if (unlikely(init_bv)) - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + os_xrstor(&init_fpstate.xsave, init_bv); /* * Restore previously saved supervisor xstates along with * copied-in user xstates. */ - ret = copy_kernel_to_xregs_err(&fpu->state.xsave, - user_xfeatures | xfeatures_mask_supervisor()); + ret = os_xrstor_safe(&fpu->state.xsave, + user_xfeatures | xfeatures_mask_supervisor()); } else if (use_fxsr()) { ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); @@ -454,7 +455,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) u64 init_bv; init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE; - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + os_xrstor(&init_fpstate.xsave, init_bv); } ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0c65e3d817c9..d79f6f272a16 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -400,7 +400,7 @@ static void __init setup_init_fpu_buf(void) /* * Init all the features state with header.xfeatures being 0x0 */ - copy_kernel_to_xregs_booting(&init_fpstate.xsave); + os_xrstor_booting(&init_fpstate.xsave); /* * All components are now in init state. Read the state back so -- Gitee From a25fe1b2de0d8ceb68be5d71c8fc6860103686be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:53 +0200 Subject: [PATCH 030/188] x86/fpu: Rename copy_user_to_xregs() and copy_xregs_to_user() mainline inclusion from mainline-v5.14-rc1 commit category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 6b862ba18214 x86/fpu: Rename copy_user_to_xregs() and copy_xregs_to_user(). -------------------------------- The function names for xsave[s]/xrstor[s] operations are horribly named and a permanent source of confusion. Rename: copy_xregs_to_user() to xsave_to_user_sigframe() copy_user_to_xregs() to xrstor_from_user_sigframe() so it's entirely clear what this is about. This is also a clear indicator of the potentially different storage format because this is user ABI and cannot use compacted format. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121453.924266705@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 4 ++-- arch/x86/kernel/fpu/signal.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index c56561f28e72..6797d136d807 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -331,7 +331,7 @@ static inline void os_xrstor(struct xregs_state *xstate, u64 mask) * backward compatibility for old applications which don't understand * compacted format of xsave area. */ -static inline int copy_xregs_to_user(struct xregs_state __user *buf) +static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) { u64 mask = xfeatures_mask_user(); u32 lmask = mask; @@ -356,7 +356,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf) /* * Restore xstate from user space xsave area. */ -static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) +static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask) { struct xregs_state *xstate = ((__force struct xregs_state *)buf); u32 lmask = mask; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 33675b3dad0e..4fe632f56697 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -129,7 +129,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) int err; if (use_xsave()) - err = copy_xregs_to_user(buf); + err = xsave_to_user_sigframe(buf); else if (use_fxsr()) err = copy_fxregs_to_user((struct fxregs_state __user *) buf); else @@ -266,7 +266,7 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) } else { init_bv = xfeatures_mask_user() & ~xbv; - r = copy_user_to_xregs(buf, xbv); + r = xrstor_from_user_sigframe(buf, xbv); if (!r && unlikely(init_bv)) os_xrstor(&init_fpstate.xsave, init_bv); return r; -- Gitee From 462214817d95b02836f0d5ba073d3e4337eb7969 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:54 +0200 Subject: [PATCH 031/188] x86/fpu: Rename fxregs-related copy functions mainline inclusion from mainline-v5.14-rc1 commit 16dcf4385933a02bb21d0af86a04439d151ad42a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 16dcf4385933 x86/fpu: Rename fxregs-related copy functions. -------------------------------- The function names for fxsave/fxrstor operations are horribly named and a permanent source of confusion. Rename: copy_fxregs_to_kernel() to fxsave() copy_kernel_to_fxregs() to fxrstor() copy_fxregs_to_user() to fxsave_to_user_sigframe() copy_user_to_fxregs() to fxrstor_from_user_sigframe() so it's clear what these are doing. All these functions are really low level wrappers around the equally named instructions, so mapping to the documentation is just natural. While at it, replace the static_cpu_has(X86_FEATURE_FXSR) with use_fxsr() to be consistent with the rest of the code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121454.017863494@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 18 +++++------------- arch/x86/kernel/fpu/core.c | 6 +++--- arch/x86/kernel/fpu/signal.c | 10 +++++----- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 6797d136d807..055661d6aa43 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -130,7 +130,7 @@ static inline int copy_fregs_to_user(struct fregs_state __user *fx) return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } -static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) +static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); @@ -139,7 +139,7 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) } -static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) +static inline void fxrstor(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -147,7 +147,7 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) +static inline int fxrstor_safe(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -155,7 +155,7 @@ static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) +static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -178,14 +178,6 @@ static inline int copy_user_to_fregs(struct fregs_state __user *fx) return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline void copy_fxregs_to_kernel(struct fpu *fpu) -{ - if (IS_ENABLED(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else - asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); -} - static inline void fxsave(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) @@ -396,7 +388,7 @@ static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask os_xrstor(&fpstate->xsave, mask); } else { if (use_fxsr()) - copy_kernel_to_fxregs(&fpstate->fxsave); + fxrstor(&fpstate->fxsave); else copy_kernel_to_fregs(&fpstate->fsave); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index bfdcf7fd63e3..035487dcb29e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -107,7 +107,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu) } if (likely(use_fxsr())) { - copy_fxregs_to_kernel(fpu); + fxsave(&fpu->state.fxsave); return 1; } @@ -315,8 +315,8 @@ static inline void copy_init_fpstate_to_fpregs(u64 features_mask) { if (use_xsave()) os_xrstor(&init_fpstate.xsave, features_mask); - else if (static_cpu_has(X86_FEATURE_FXSR)) - copy_kernel_to_fxregs(&init_fpstate.fxsave); + else if (use_fxsr()) + fxrstor(&init_fpstate.fxsave); else copy_kernel_to_fregs(&init_fpstate.fsave); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 4fe632f56697..05f8445eb676 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -64,7 +64,7 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - copy_fxregs_to_kernel(&tsk->thread.fpu); + fxsave(&tsk->thread.fpu.state.fxsave); fpregs_unlock(); convert_from_fxsr(&env, tsk); @@ -131,7 +131,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) if (use_xsave()) err = xsave_to_user_sigframe(buf); else if (use_fxsr()) - err = copy_fxregs_to_user((struct fxregs_state __user *) buf); + err = fxsave_to_user_sigframe((struct fxregs_state __user *) buf); else err = copy_fregs_to_user((struct fregs_state __user *) buf); @@ -259,7 +259,7 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) if (fx_only) { init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE; - r = copy_user_to_fxregs(buf); + r = fxrstor_from_user_sigframe(buf); if (!r) os_xrstor(&init_fpstate.xsave, init_bv); return r; @@ -272,7 +272,7 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) return r; } } else if (use_fxsr()) { - return copy_user_to_fxregs(buf); + return fxrstor_from_user_sigframe(buf); } else return copy_user_to_fregs(buf); } @@ -458,7 +458,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) os_xrstor(&init_fpstate.xsave, init_bv); } - ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave); + ret = fxrstor_safe(&fpu->state.fxsave); } else { ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); if (ret) -- Gitee From aaefc990a0c18e80ff0703c9135e0ecd038a9180 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:55 +0200 Subject: [PATCH 032/188] x86/math-emu: Rename frstor() mainline inclusion from mainline-v5.14-rc1 commit 872c65dbf669b3b471b3d8656391a6b4f736d22b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 872c65dbf669 x86/math-emu: Rename frstor(). -------------------------------- This is in the way of renaming the low level hardware accessors to match the instruction name. Prepend it with FPU_ which is consistent vs. the rest of the emulation code. No functional change. [ bp: Correct the Reported-by: ] Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121454.111665161@linutronix.de Signed-off-by: Lin Wang --- arch/x86/math-emu/fpu_proto.h | 2 +- arch/x86/math-emu/load_store.c | 2 +- arch/x86/math-emu/reg_ld_str.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h index 70d35c200945..94c4023092f3 100644 --- a/arch/x86/math-emu/fpu_proto.h +++ b/arch/x86/math-emu/fpu_proto.h @@ -144,7 +144,7 @@ extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d); extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d); extern int FPU_round_to_int(FPU_REG *r, u_char tag); extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s); -extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address); +extern void FPU_frstor(fpu_addr_modes addr_modes, u_char __user *data_address); extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d); extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address); extern int FPU_tagof(FPU_REG *ptr); diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c index f15263e158e8..4092df79de4f 100644 --- a/arch/x86/math-emu/load_store.c +++ b/arch/x86/math-emu/load_store.c @@ -240,7 +240,7 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, fix-up operations. */ return 1; case 022: /* frstor m94/108byte */ - frstor(addr_modes, (u_char __user *) data_address); + FPU_frstor(addr_modes, (u_char __user *) data_address); /* Ensure that the values just loaded are not changed by fix-up operations. */ return 1; diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c index fe6246ff9887..2de1094ed4d7 100644 --- a/arch/x86/math-emu/reg_ld_str.c +++ b/arch/x86/math-emu/reg_ld_str.c @@ -1117,7 +1117,7 @@ u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s) return s; } -void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) +void FPU_frstor(fpu_addr_modes addr_modes, u_char __user *data_address) { int i, regnr; u_char __user *s = fldenv(addr_modes, data_address); -- Gitee From 3f050fa09a2066d2dd94bca88e44dc920f212e37 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:56 +0200 Subject: [PATCH 033/188] x86/fpu: Rename fregs-related copy functions mainline inclusion from mainline-v5.14-rc1 commit 6fdc908cb56123591baa4259400cfb0787582b11 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 6fdc908cb561 x86/fpu: Rename fregs-related copy functions. -------------------------------- The function names for fnsave/fnrstor operations are horribly named and a permanent source of confusion. Rename: copy_kernel_to_fregs() to frstor() copy_fregs_to_user() to fnsave_to_user_sigframe() copy_user_to_fregs() to frstor_from_user_sigframe() so it's clear what these are doing. All these functions are really low level wrappers around the equally named instructions, so mapping to the documentation is just natural. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121454.223594101@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 10 +++++----- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/signal.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 055661d6aa43..ee65b84ac4f5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -125,7 +125,7 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {} _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ : output : input) -static inline int copy_fregs_to_user(struct fregs_state __user *fx) +static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) { return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } @@ -163,17 +163,17 @@ static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx) return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline void copy_kernel_to_fregs(struct fregs_state *fx) +static inline void frstor(struct fregs_state *fx) { kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_kernel_to_fregs_err(struct fregs_state *fx) +static inline int frstor_safe(struct fregs_state *fx) { return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_user_to_fregs(struct fregs_state __user *fx) +static inline int frstor_from_user_sigframe(struct fregs_state __user *fx) { return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } @@ -390,7 +390,7 @@ static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask if (use_fxsr()) fxrstor(&fpstate->fxsave); else - copy_kernel_to_fregs(&fpstate->fsave); + frstor(&fpstate->fsave); } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 035487dcb29e..1d2587607a7f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -318,7 +318,7 @@ static inline void copy_init_fpstate_to_fpregs(u64 features_mask) else if (use_fxsr()) fxrstor(&init_fpstate.fxsave); else - copy_kernel_to_fregs(&init_fpstate.fsave); + frstor(&init_fpstate.fsave); if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 05f8445eb676..430c66dc2218 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -133,7 +133,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) else if (use_fxsr()) err = fxsave_to_user_sigframe((struct fxregs_state __user *) buf); else - err = copy_fregs_to_user((struct fregs_state __user *) buf); + err = fnsave_to_user_sigframe((struct fregs_state __user *) buf); if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size)) err = -EFAULT; @@ -274,7 +274,7 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) } else if (use_fxsr()) { return fxrstor_from_user_sigframe(buf); } else - return copy_user_to_fregs(buf); + return frstor_from_user_sigframe(buf); } static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) @@ -465,7 +465,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) goto out; fpregs_lock(); - ret = copy_kernel_to_fregs_err(&fpu->state.fsave); + ret = frstor_safe(&fpu->state.fsave); } if (!ret) fpregs_mark_activate(); -- Gitee From 1f35040b020fa81dbfd9a7ca8af5d3dfaaa850b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:57 +0200 Subject: [PATCH 034/188] x86/fpu: Rename xstate copy functions which are related to UABI mainline inclusion from mainline-v5.14-rc1 commit 1cc34413ff3f18c30e5df89fefd95cc0f3b3292e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1cc34413ff3f x86/fpu: Rename xstate copy functions which are related to UABI. -------------------------------- Rename them to reflect that these functions deal with user space format XSAVE buffers. copy_kernel_to_xstate() -> copy_uabi_from_kernel_to_xstate() copy_user_to_xstate() -> copy_sigframe_from_user_to_xstate() Again a clear statement that these functions deal with user space ABI. Suggested-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121454.318485015@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 4 ++-- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 6611e069e834..00e1a2ac5239 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -102,8 +102,8 @@ extern void __init update_regset_xstate_info(unsigned int size, void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int xfeature_size(int xfeature_nr); -int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); -int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); +int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); +int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index ddc290d9bf8e..892aec1dd822 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -166,7 +166,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, } fpu_force_restore(fpu); - ret = copy_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf); + ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf); out: vfree(tmpbuf); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 430c66dc2218..fd4b58d7b72e 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -422,7 +422,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (use_xsave() && !fx_only) { u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; - ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); + ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); if (ret) goto out; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d79f6f272a16..c97a96852fbb 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1099,7 +1099,7 @@ static inline bool mxcsr_valid(struct xstate_header *hdr, const u32 *mxcsr) * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] format * and copy to the target thread. This is called from xstateregs_set(). */ -int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) +int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) { unsigned int offset, size; int i; @@ -1154,7 +1154,8 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) * XSAVE[S] format and copy to the target thread. This is called from the * sigreturn() and rt_sigreturn() system calls. */ -int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) +int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, + const void __user *ubuf) { unsigned int offset, size; int i; -- Gitee From 1a304e05fe6ae18c1bef5384d490b730b0d58704 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:58 +0200 Subject: [PATCH 035/188] x86/fpu: Deduplicate copy_uabi_from_user/kernel_to_xstate() mainline inclusion from mainline-v5.14-rc1 commit 522e92743b35351bda1b6a9136560f833a9c2490 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 522e92743b35 x86/fpu: Deduplicate copy_uabi_from_user/kernel_to_xstate(). -------------------------------- copy_uabi_from_user_to_xstate() and copy_uabi_from_kernel_to_xstate() are almost identical except for the copy function. Unify them. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20210623121454.414215896@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 137 ++++++++++++----------------------- 1 file changed, 47 insertions(+), 90 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c97a96852fbb..98492f6d1d40 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -953,23 +953,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, } #endif /* ! CONFIG_ARCH_HAS_PKEYS */ -/* - * Weird legacy quirk: SSE and YMM states store information in the - * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP - * area is marked as unused in the xfeatures header, we need to copy - * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use. - */ -static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) -{ - if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM))) - return false; - - if (xfeatures & XFEATURE_MASK_FP) - return false; - - return true; -} - static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, void *init_xstate, unsigned int size) { @@ -1082,39 +1065,53 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, membuf_zero(&to, to.left); } -static inline bool mxcsr_valid(struct xstate_header *hdr, const u32 *mxcsr) +static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, + const void *kbuf, const void __user *ubuf) { - u64 mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; - - /* Only check if it is in use */ - if (hdr->xfeatures & mask) { - /* Reserved bits in MXCSR must be zero. */ - if (*mxcsr & ~mxcsr_feature_mask) - return false; + if (kbuf) { + memcpy(dst, kbuf + offset, size); + } else { + if (copy_from_user(dst, ubuf + offset, size)) + return -EFAULT; } - return true; + return 0; } -/* - * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] format - * and copy to the target thread. This is called from xstateregs_set(). - */ -int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) + +static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, + const void __user *ubuf) { unsigned int offset, size; - int i; struct xstate_header hdr; + u64 mask; + int i; offset = offsetof(struct xregs_state, header); - size = sizeof(hdr); - - memcpy(&hdr, kbuf + offset, size); + if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf)) + return -EFAULT; if (validate_user_xstate_header(&hdr)) return -EINVAL; - if (!mxcsr_valid(&hdr, kbuf + offsetof(struct fxregs_state, mxcsr))) - return -EINVAL; + /* Validate MXCSR when any of the related features is in use */ + mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; + if (hdr.xfeatures & mask) { + u32 mxcsr[2]; + + offset = offsetof(struct fxregs_state, mxcsr); + if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf)) + return -EFAULT; + + /* Reserved bits in MXCSR must be zero. */ + if (mxcsr[0] & ~mxcsr_feature_mask) + return -EINVAL; + + /* SSE and YMM require MXCSR even when FP is not in use. */ + if (!(hdr.xfeatures & XFEATURE_MASK_FP)) { + xsave->i387.mxcsr = mxcsr[0]; + xsave->i387.mxcsr_mask = mxcsr[1]; + } + } for (i = 0; i < XFEATURE_MAX; i++) { u64 mask = ((u64)1 << i); @@ -1125,16 +1122,11 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) offset = xstate_offsets[i]; size = xstate_sizes[i]; - memcpy(dst, kbuf + offset, size); + if (copy_from_buffer(dst, offset, size, kbuf, ubuf)) + return -EFAULT; } } - if (xfeatures_mxcsr_quirk(hdr.xfeatures)) { - offset = offsetof(struct fxregs_state, mxcsr); - size = MXCSR_AND_FLAGS_SIZE; - memcpy(&xsave->i387.mxcsr, kbuf + offset, size); - } - /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': @@ -1149,6 +1141,16 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) return 0; } +/* + * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] + * format and copy to the target thread. This is called from + * xstateregs_set(). + */ +int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) +{ + return copy_uabi_to_xstate(xsave, kbuf, NULL); +} + /* * Convert from a sigreturn standard-format user-space buffer to kernel * XSAVE[S] format and copy to the target thread. This is called from the @@ -1157,52 +1159,7 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) { - unsigned int offset, size; - int i; - struct xstate_header hdr; - - offset = offsetof(struct xregs_state, header); - size = sizeof(hdr); - - if (copy_from_user(&hdr, ubuf + offset, size)) - return -EFAULT; - - if (validate_user_xstate_header(&hdr)) - return -EINVAL; - - for (i = 0; i < XFEATURE_MAX; i++) { - u64 mask = ((u64)1 << i); - - if (hdr.xfeatures & mask) { - void *dst = __raw_xsave_addr(xsave, i); - - offset = xstate_offsets[i]; - size = xstate_sizes[i]; - - if (copy_from_user(dst, ubuf + offset, size)) - return -EFAULT; - } - } - - if (xfeatures_mxcsr_quirk(hdr.xfeatures)) { - offset = offsetof(struct fxregs_state, mxcsr); - size = MXCSR_AND_FLAGS_SIZE; - if (copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size)) - return -EFAULT; - } - - /* - * The state that came in from userspace was user-state only. - * Mask all the user states out of 'xfeatures': - */ - xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL; - - /* - * Add back in the features that came in from userspace: - */ - xsave->header.xfeatures |= hdr.xfeatures; - - return 0; + return copy_uabi_to_xstate(xsave, NULL, ubuf); } /** -- Gitee From 88e21209c2fbe9e9ec8d0ca1343ef948cbb75844 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:01:59 +0200 Subject: [PATCH 036/188] x86/fpu: Rename copy_fpregs_to_fpstate() to save_fpregs_to_fpstate() mainline inclusion from mainline-v5.14-rc1 commit ebe7234b08a42d69bae94c4062a84777ea26ef99 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ebe7234b08a4 x86/fpu: Rename copy_fpregs_to_fpstate() to save_fpregs_to_fpstate(). -------------------------------- A copy is guaranteed to leave the source intact, which is not the case when FNSAVE is used as that reinitilizes the registers. Save does not make such guarantees and it matches what this is about, i.e. to save the state for a later restore. Rename it to save_fpregs_to_fpstate(). Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121454.508853062@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 4 ++-- arch/x86/kernel/fpu/core.c | 10 +++++----- arch/x86/kvm/x86.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index ee65b84ac4f5..b9776f6bf25b 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -380,7 +380,7 @@ static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) return err; } -extern int copy_fpregs_to_fpstate(struct fpu *fpu); +extern int save_fpregs_to_fpstate(struct fpu *fpu); static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { @@ -512,7 +512,7 @@ static inline void __fpregs_load_activate(void) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { - if (!copy_fpregs_to_fpstate(old_fpu)) + if (!save_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else old_fpu->last_cpu = cpu; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1d2587607a7f..dcf4d6b58180 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL(irq_fpu_usable); * Modern FPU state can be kept in registers, if there are * no pending FP exceptions. */ -int copy_fpregs_to_fpstate(struct fpu *fpu) +int save_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { os_xsave(&fpu->state.xsave); @@ -119,7 +119,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -EXPORT_SYMBOL(copy_fpregs_to_fpstate); +EXPORT_SYMBOL(save_fpregs_to_fpstate); void kernel_fpu_begin_mask(unsigned int kfpu_mask) { @@ -137,7 +137,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask) * Ignore return value -- we don't care if reg state * is clobbered. */ - copy_fpregs_to_fpstate(¤t->thread.fpu); + save_fpregs_to_fpstate(¤t->thread.fpu); } __cpu_invalidate_fpregs_state(); @@ -172,7 +172,7 @@ void fpu__save(struct fpu *fpu) trace_x86_fpu_before_save(fpu); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - if (!copy_fpregs_to_fpstate(fpu)) { + if (!save_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } } @@ -255,7 +255,7 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) if (test_thread_flag(TIF_NEED_FPU_LOAD)) memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); - else if (!copy_fpregs_to_fpstate(dst_fpu)) + else if (!save_fpregs_to_fpstate(dst_fpu)) copy_kernel_to_fpregs(&dst_fpu->state); fpregs_unlock(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dc1d4ea948fd..a7c901dc9256 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9457,7 +9457,7 @@ static void kvm_save_current_fpu(struct fpu *fpu) memcpy(&fpu->state, ¤t->thread.fpu.state, fpu_kernel_xstate_size); else - copy_fpregs_to_fpstate(fpu); + save_fpregs_to_fpstate(fpu); } /* Swap (qemu) user FPU context for the guest FPU context. */ -- Gitee From f7172c186afb07f10f9b6936ec4ae934a0dea4f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:00 +0200 Subject: [PATCH 037/188] x86/fpu: Get rid of the FNSAVE optimization mainline inclusion from mainline-v5.14-rc1 commit 08ded2cd18a09749e67a14426aa7fd1b04ab1dc0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 08ded2cd18a0 x86/fpu: Get rid of the FNSAVE optimization. -------------------------------- The FNSAVE support requires conditionals in quite some call paths because FNSAVE reinitializes the FPU hardware. If the save has to preserve the FPU register state then the caller has to conditionally restore it from memory when FNSAVE is in use. This also requires a conditional in context switch because the restore avoidance optimization cannot work with FNSAVE. As this only affects 20+ years old CPUs there is really no reason to keep this optimization effective for FNSAVE. It's about time to not optimize for antiques anymore. Just unconditionally FRSTOR the save content to the registers and clean up the conditionals all over the place. Suggested-by: Dave Hansen Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121454.617369268@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 18 ++++++---- arch/x86/kernel/fpu/core.c | 54 ++++++++++++----------------- 2 files changed, 34 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index b9776f6bf25b..7c07155c7443 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -83,6 +83,7 @@ extern void fpstate_init_soft(struct swregs_state *soft); #else static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif +extern void save_fpregs_to_fpstate(struct fpu *fpu); /* Returns 0 or the negated trap number, which results in -EFAULT for #PF */ #define user_insn(insn, output, input...) \ @@ -380,8 +381,6 @@ static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) return err; } -extern int save_fpregs_to_fpstate(struct fpu *fpu); - static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { if (use_xsave()) { @@ -512,12 +511,17 @@ static inline void __fpregs_load_activate(void) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { - if (!save_fpregs_to_fpstate(old_fpu)) - old_fpu->last_cpu = -1; - else - old_fpu->last_cpu = cpu; + save_fpregs_to_fpstate(old_fpu); + /* + * The save operation preserved register state, so the + * fpu_fpregs_owner_ctx is still @old_fpu. Store the + * current CPU number in @old_fpu, so the next return + * to user space can avoid the FPU register restore + * when is returns on the same CPU and still owns the + * context. + */ + old_fpu->last_cpu = cpu; - /* But leave fpu_fpregs_owner_ctx! */ trace_x86_fpu_regs_deactivated(old_fpu); } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index dcf4d6b58180..c290ba27ffef 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -83,16 +83,20 @@ bool irq_fpu_usable(void) EXPORT_SYMBOL(irq_fpu_usable); /* - * These must be called with preempt disabled. Returns - * 'true' if the FPU state is still intact and we can - * keep registers active. + * Save the FPU register state in fpu->state. The register state is + * preserved. * - * The legacy FNSAVE instruction cleared all FPU state - * unconditionally, so registers are essentially destroyed. - * Modern FPU state can be kept in registers, if there are - * no pending FP exceptions. + * Must be called with fpregs_lock() held. + * + * The legacy FNSAVE instruction clears all FPU state unconditionally, so + * register state has to be reloaded. That might be a pointless exercise + * when the FPU is going to be used by another task right after that. But + * this only affects 20+ years old 32bit systems and avoids conditionals all + * over the place. + * + * FXSAVE and all XSAVE variants preserve the FPU register state. */ -int save_fpregs_to_fpstate(struct fpu *fpu) +void save_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { os_xsave(&fpu->state.xsave); @@ -103,21 +107,20 @@ int save_fpregs_to_fpstate(struct fpu *fpu) */ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) fpu->avx512_timestamp = jiffies; - return 1; + return; } if (likely(use_fxsr())) { fxsave(&fpu->state.fxsave); - return 1; + return; } /* * Legacy FPU register saving, FNSAVE always clears FPU registers, - * so we have to mark them inactive: + * so we have to reload them from the memory state. */ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); - - return 0; + frstor(&fpu->state.fsave); } EXPORT_SYMBOL(save_fpregs_to_fpstate); @@ -133,10 +136,6 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask) if (!(current->flags & PF_KTHREAD) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); - /* - * Ignore return value -- we don't care if reg state - * is clobbered. - */ save_fpregs_to_fpstate(¤t->thread.fpu); } __cpu_invalidate_fpregs_state(); @@ -171,11 +170,8 @@ void fpu__save(struct fpu *fpu) fpregs_lock(); trace_x86_fpu_before_save(fpu); - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - if (!save_fpregs_to_fpstate(fpu)) { - copy_kernel_to_fpregs(&fpu->state); - } - } + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) + save_fpregs_to_fpstate(fpu); trace_x86_fpu_after_save(fpu); fpregs_unlock(); @@ -244,20 +240,16 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* - * If the FPU registers are not current just memcpy() the state. - * Otherwise save current FPU registers directly into the child's FPU - * context, without any memory-to-memory copying. - * - * ( The function 'fails' in the FNSAVE case, which destroys - * register contents so we have to load them back. ) + * If the FPU registers are not owned by current just memcpy() the + * state. Otherwise save the FPU registers directly into the + * child's FPU context, without any memory-to-memory copying. */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); - else if (!save_fpregs_to_fpstate(dst_fpu)) - copy_kernel_to_fpregs(&dst_fpu->state); - + else + save_fpregs_to_fpstate(dst_fpu); fpregs_unlock(); set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); -- Gitee From 5f696e95af0ed12cc75c78d47cd2950970dfdd03 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:01 +0200 Subject: [PATCH 038/188] x86/fpu: Rename copy_kernel_to_fpregs() to restore_fpregs_from_fpstate() mainline inclusion from mainline-v5.14-rc1 commit 1c61fada304c125c3f8a2b8eb1896406e4098a05 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1c61fada304c x86/fpu: Rename copy_kernel_to_fpregs() to restore_fpregs_from_fpstate(). -------------------------------- This is not a copy functionality. It restores the register state from the supplied kernel buffer. No functional changes. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121454.716058365@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 8 ++++---- arch/x86/kvm/x86.c | 4 ++-- arch/x86/mm/extable.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 7c07155c7443..56ac9b6901e5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -381,7 +381,7 @@ static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) return err; } -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) +static inline void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) { if (use_xsave()) { os_xrstor(&fpstate->xsave, mask); @@ -393,7 +393,7 @@ static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask } } -static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) +static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate) { /* * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is @@ -408,7 +408,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) : : [addr] "m" (fpstate)); } - __copy_kernel_to_fpregs(fpstate, -1); + __restore_fpregs_from_fpstate(fpstate, -1); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); @@ -479,7 +479,7 @@ static inline void __fpregs_load_activate(void) return; if (!fpregs_state_valid(fpu, cpu)) { - copy_kernel_to_fpregs(&fpu->state); + restore_fpregs_from_fpstate(&fpu->state); fpregs_activate(fpu); fpu->last_cpu = cpu; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a7c901dc9256..9b59c0ba4e9f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9468,7 +9468,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) kvm_save_current_fpu(vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops.run. */ - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, + __restore_fpregs_from_fpstate(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); fpregs_mark_activate(); @@ -9484,7 +9484,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) kvm_save_current_fpu(vcpu->arch.guest_fpu); - copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); + restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); fpregs_unlock(); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b93d6cd08a7f..7d7bd39080dc 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -65,7 +65,7 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.", (void *)instruction_pointer(regs)); - __copy_kernel_to_fpregs(&init_fpstate, -1); + __restore_fpregs_from_fpstate(&init_fpstate, -1); return true; } EXPORT_SYMBOL_GPL(ex_handler_fprestore); -- Gitee From c999cb8085287a816a8e573d5d0666a49c9bb6ab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:02 +0200 Subject: [PATCH 039/188] x86/fpu: Rename initstate copy functions mainline inclusion from mainline-v5.14-rc1 commit b76411b1b568311bfd89d03acc587ffc1548c26f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit b76411b1b568 x86/fpu: Rename initstate copy functions. -------------------------------- Again this not a copy. It's restoring register state from kernel memory. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121454.816581630@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c290ba27ffef..4a59e0fbcfd8 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -303,7 +303,7 @@ void fpu__drop(struct fpu *fpu) * Clear FPU registers by setting them up from the init fpstate. * Caller must do fpregs_[un]lock() around it. */ -static inline void copy_init_fpstate_to_fpregs(u64 features_mask) +static inline void restore_fpregs_from_init_fpstate(u64 features_mask) { if (use_xsave()) os_xrstor(&init_fpstate.xsave, features_mask); @@ -338,9 +338,9 @@ static void fpu__clear(struct fpu *fpu, bool user_only) if (!fpregs_state_valid(fpu, smp_processor_id()) && xfeatures_mask_supervisor()) os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); - copy_init_fpstate_to_fpregs(xfeatures_mask_user()); + restore_fpregs_from_init_fpstate(xfeatures_mask_user()); } else { - copy_init_fpstate_to_fpregs(xfeatures_mask_all); + restore_fpregs_from_init_fpstate(xfeatures_mask_all); } fpregs_mark_activate(); -- Gitee From ee2ffd0de5a2b0e9ca19dc9eb1d130b5da1f0126 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 23 Jun 2021 14:02:03 +0200 Subject: [PATCH 040/188] x86/fpu: Rename "dynamic" XSTATEs to "independent" mainline inclusion from mainline-v5.14-rc1 commit 01707b66535872f7a0d87f66078fd018d1814be0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 01707b665358 x86/fpu: Rename "dynamic" XSTATEs to "independent". -------------------------------- The salient feature of "dynamic" XSTATEs is that they are not part of the main task XSTATE buffer. The fact that they are dynamically allocated is irrelevant and will become quite confusing when user math XSTATEs start being dynamically allocated. Rename them to "independent" because they are independent of the main XSTATE code. This is just a search-and-replace with some whitespace updates to keep things aligned. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/1eecb0e4f3e07828ebe5d737ec77dc3b708fad2d.1623388344.git.luto@kernel.org Link: https://lkml.kernel.org/r/20210623121454.911450390@linutronix.de Signed-off-by: Lin Wang --- arch/x86/events/intel/lbr.c | 6 +-- arch/x86/include/asm/fpu/xstate.h | 22 +++++------ arch/x86/kernel/fpu/xstate.c | 62 +++++++++++++++---------------- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index bd8516e6c353..9f7597a458db 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -491,7 +491,7 @@ static void intel_pmu_arch_lbr_xrstors(void *ctx) { struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; - copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR); + copy_kernel_to_independent_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR); } static __always_inline bool lbr_is_reset_in_cstate(void *ctx) @@ -576,7 +576,7 @@ static void intel_pmu_arch_lbr_xsaves(void *ctx) { struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; - copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR); + copy_independent_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR); } static void __intel_pmu_lbr_save(void *ctx) @@ -993,7 +993,7 @@ static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) intel_pmu_store_lbr(cpuc, NULL); return; } - copy_dynamic_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR); + copy_independent_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR); intel_pmu_store_lbr(cpuc, xsave->lbr.entries); } diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 00e1a2ac5239..a55bd5cabb59 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -42,21 +42,21 @@ * and its size may be huge. Saving/restoring such supervisor state components * at each context switch can cause high CPU and space overhead, which should * be avoided. Such supervisor state components should only be saved/restored - * on demand. The on-demand dynamic supervisor features are set in this mask. + * on demand. The on-demand supervisor features are set in this mask. * - * Unlike the existing supported supervisor features, a dynamic supervisor + * Unlike the existing supported supervisor features, an independent supervisor * feature does not allocate a buffer in task->fpu, and the corresponding * supervisor state component cannot be saved/restored at each context switch. * - * To support a dynamic supervisor feature, a developer should follow the + * To support an independent supervisor feature, a developer should follow the * dos and don'ts as below: * - Do dynamically allocate a buffer for the supervisor state component. * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the * state component to/from the buffer. - * - Don't set the bit corresponding to the dynamic supervisor feature in + * - Don't set the bit corresponding to the independent supervisor feature in * IA32_XSS at run time, since it has been set at boot time. */ -#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR) +#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR) /* * Unsupported supervisor features. When a supervisor feature in this mask is @@ -66,7 +66,7 @@ /* All supervisor states including supported and unsupported states. */ #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ - XFEATURE_MASK_DYNAMIC | \ + XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) #ifdef CONFIG_X86_64 @@ -87,12 +87,12 @@ static inline u64 xfeatures_mask_user(void) return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; } -static inline u64 xfeatures_mask_dynamic(void) +static inline u64 xfeatures_mask_independent(void) { if (!boot_cpu_has(X86_FEATURE_ARCH_LBR)) - return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR; + return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; - return XFEATURE_MASK_DYNAMIC; + return XFEATURE_MASK_INDEPENDENT; } extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; @@ -104,8 +104,8 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int xfeature_size(int xfeature_nr); int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); -void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); -void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask); +void copy_independent_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); +void copy_kernel_to_independent_supervisor(struct xregs_state *xstate, u64 mask); enum xstate_copy_mode { XSTATE_COPY_FP, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 98492f6d1d40..0aed464ff572 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -151,7 +151,7 @@ void fpu__init_cpu_xstate(void) */ if (boot_cpu_has(X86_FEATURE_XSAVES)) { wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | - xfeatures_mask_dynamic()); + xfeatures_mask_independent()); } } @@ -551,7 +551,7 @@ static void check_xstate_against_struct(int nr) * how large the XSAVE buffer needs to be. We are recalculating * it to be safe. * - * Dynamic XSAVE features allocate their own buffers and are not + * Independent XSAVE features allocate their own buffers and are not * covered by these checks. Only the size of the buffer for task->fpu * is checked here. */ @@ -617,18 +617,18 @@ static unsigned int __init get_xsaves_size(void) } /* - * Get the total size of the enabled xstates without the dynamic supervisor + * Get the total size of the enabled xstates without the independent supervisor * features. */ -static unsigned int __init get_xsaves_size_no_dynamic(void) +static unsigned int __init get_xsaves_size_no_independent(void) { - u64 mask = xfeatures_mask_dynamic(); + u64 mask = xfeatures_mask_independent(); unsigned int size; if (!mask) return get_xsaves_size(); - /* Disable dynamic features. */ + /* Disable independent features. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); /* @@ -637,7 +637,7 @@ static unsigned int __init get_xsaves_size_no_dynamic(void) */ size = get_xsaves_size(); - /* Re-enable dynamic features so XSAVES will work on them again. */ + /* Re-enable independent features so XSAVES will work on them again. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); return size; @@ -680,7 +680,7 @@ static int __init init_xstate_size(void) xsave_size = get_xsave_size(); if (boot_cpu_has(X86_FEATURE_XSAVES)) - possible_xstate_size = get_xsaves_size_no_dynamic(); + possible_xstate_size = get_xsaves_size_no_independent(); else possible_xstate_size = xsave_size; @@ -837,7 +837,7 @@ void fpu__resume_cpu(void) */ if (boot_cpu_has(X86_FEATURE_XSAVES)) { wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | - xfeatures_mask_dynamic()); + xfeatures_mask_independent()); } } @@ -1163,34 +1163,34 @@ int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, } /** - * copy_dynamic_supervisor_to_kernel() - Save dynamic supervisor states to - * an xsave area + * copy_independent_supervisor_to_kernel() - Save independent supervisor states to + * an xsave area * @xstate: A pointer to an xsave area - * @mask: Represent the dynamic supervisor features saved into the xsave area + * @mask: Represent the independent supervisor features saved into the xsave area * - * Only the dynamic supervisor states sets in the mask are saved into the xsave - * area (See the comment in XFEATURE_MASK_DYNAMIC for the details of dynamic - * supervisor feature). Besides the dynamic supervisor states, the legacy + * Only the independent supervisor states sets in the mask are saved into the xsave + * area (See the comment in XFEATURE_MASK_INDEPENDENT for the details of independent + * supervisor feature). Besides the independent supervisor states, the legacy * region and XSAVE header are also saved into the xsave area. The supervisor * features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not saved. * * The xsave area must be 64-bytes aligned. */ -void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask) +void copy_independent_supervisor_to_kernel(struct xregs_state *xstate, u64 mask) { - u64 dynamic_mask = xfeatures_mask_dynamic() & mask; + u64 independent_mask = xfeatures_mask_independent() & mask; u32 lmask, hmask; int err; if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES))) return; - if (WARN_ON_FPU(!dynamic_mask)) + if (WARN_ON_FPU(!independent_mask)) return; - lmask = dynamic_mask; - hmask = dynamic_mask >> 32; + lmask = independent_mask; + hmask = independent_mask >> 32; XSTATE_OP(XSAVES, xstate, lmask, hmask, err); @@ -1199,34 +1199,34 @@ void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask) } /** - * copy_kernel_to_dynamic_supervisor() - Restore dynamic supervisor states from - * an xsave area + * copy_kernel_to_independent_supervisor() - Restore independent supervisor states from + * an xsave area * @xstate: A pointer to an xsave area - * @mask: Represent the dynamic supervisor features restored from the xsave area + * @mask: Represent the independent supervisor features restored from the xsave area * - * Only the dynamic supervisor states sets in the mask are restored from the - * xsave area (See the comment in XFEATURE_MASK_DYNAMIC for the details of - * dynamic supervisor feature). Besides the dynamic supervisor states, the + * Only the independent supervisor states sets in the mask are restored from the + * xsave area (See the comment in XFEATURE_MASK_INDEPENDENT for the details of + * independent supervisor feature). Besides the independent supervisor states, the * legacy region and XSAVE header are also restored from the xsave area. The * supervisor features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not restored. * * The xsave area must be 64-bytes aligned. */ -void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask) +void copy_kernel_to_independent_supervisor(struct xregs_state *xstate, u64 mask) { - u64 dynamic_mask = xfeatures_mask_dynamic() & mask; + u64 independent_mask = xfeatures_mask_independent() & mask; u32 lmask, hmask; int err; if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES))) return; - if (WARN_ON_FPU(!dynamic_mask)) + if (WARN_ON_FPU(!independent_mask)) return; - lmask = dynamic_mask; - hmask = dynamic_mask >> 32; + lmask = independent_mask; + hmask = independent_mask >> 32; XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); -- Gitee From 0ff210500135631de72b38630c7955d022e003da Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:04 +0200 Subject: [PATCH 041/188] x86/fpu/xstate: Sanitize handling of independent features mainline inclusion from mainline-v5.14-rc1 commit a75c52896b6d42d6600db4d4dd9f7e4bde9218db category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit a75c52896b6d x86/fpu/xstate: Sanitize handling of independent features. -------------------------------- The copy functions for the independent features are horribly named and the supervisor and independent part is just overengineered. The point is that the supplied mask has either to be a subset of the independent features or a subset of the task->fpu.xstate managed features. Rewrite it so it checks for invalid overlaps of these areas in the caller supplied feature mask. Rename it so it follows the new naming convention for these operations. Mop up the function documentation. This allows to use that function for other purposes as well. Suggested-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Tested-by: Kan Liang Link: https://lkml.kernel.org/r/20210623121455.004880675@linutronix.de Signed-off-by: Lin Wang --- arch/x86/events/intel/lbr.c | 6 +- arch/x86/include/asm/fpu/xstate.h | 5 +- arch/x86/kernel/fpu/xstate.c | 98 +++++++++++++++---------------- 3 files changed, 54 insertions(+), 55 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 9f7597a458db..1d517fd08542 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -491,7 +491,7 @@ static void intel_pmu_arch_lbr_xrstors(void *ctx) { struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; - copy_kernel_to_independent_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR); + xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR); } static __always_inline bool lbr_is_reset_in_cstate(void *ctx) @@ -576,7 +576,7 @@ static void intel_pmu_arch_lbr_xsaves(void *ctx) { struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; - copy_independent_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR); + xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR); } static void __intel_pmu_lbr_save(void *ctx) @@ -993,7 +993,7 @@ static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) intel_pmu_store_lbr(cpuc, NULL); return; } - copy_independent_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR); + xsaves(&xsave->xsave, XFEATURE_MASK_LBR); intel_pmu_store_lbr(cpuc, xsave->lbr.entries); } diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index a55bd5cabb59..7de2384628e2 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -104,8 +104,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int xfeature_size(int xfeature_nr); int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); -void copy_independent_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); -void copy_kernel_to_independent_supervisor(struct xregs_state *xstate, u64 mask); + +void xsaves(struct xregs_state *xsave, u64 mask); +void xrstors(struct xregs_state *xsave, u64 mask); enum xstate_copy_mode { XSTATE_COPY_FP, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0aed464ff572..dea986921a4f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1162,76 +1162,74 @@ int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, return copy_uabi_to_xstate(xsave, NULL, ubuf); } +static bool validate_xsaves_xrstors(u64 mask) +{ + u64 xchk; + + if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES))) + return false; + /* + * Validate that this is either a task->fpstate related component + * subset or an independent one. + */ + if (mask & xfeatures_mask_independent()) + xchk = ~xfeatures_mask_independent(); + else + xchk = ~xfeatures_mask_all; + + if (WARN_ON_ONCE(!mask || mask & xchk)) + return false; + + return true; +} + /** - * copy_independent_supervisor_to_kernel() - Save independent supervisor states to - * an xsave area - * @xstate: A pointer to an xsave area - * @mask: Represent the independent supervisor features saved into the xsave area + * xsaves - Save selected components to a kernel xstate buffer + * @xstate: Pointer to the buffer + * @mask: Feature mask to select the components to save * - * Only the independent supervisor states sets in the mask are saved into the xsave - * area (See the comment in XFEATURE_MASK_INDEPENDENT for the details of independent - * supervisor feature). Besides the independent supervisor states, the legacy - * region and XSAVE header are also saved into the xsave area. The supervisor - * features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and - * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not saved. + * The @xstate buffer must be 64 byte aligned and correctly initialized as + * XSAVES does not write the full xstate header. Before first use the + * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer + * can #GP. * - * The xsave area must be 64-bytes aligned. + * The feature mask must either be a subset of the independent features or + * a subset of the task->fpstate related features. */ -void copy_independent_supervisor_to_kernel(struct xregs_state *xstate, u64 mask) +void xsaves(struct xregs_state *xstate, u64 mask) { - u64 independent_mask = xfeatures_mask_independent() & mask; - u32 lmask, hmask; int err; - if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES))) + if (!validate_xsaves_xrstors(mask)) return; - if (WARN_ON_FPU(!independent_mask)) - return; - - lmask = independent_mask; - hmask = independent_mask >> 32; - - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - - /* Should never fault when copying to a kernel buffer */ - WARN_ON_FPU(err); + XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err); + WARN_ON_ONCE(err); } /** - * copy_kernel_to_independent_supervisor() - Restore independent supervisor states from - * an xsave area - * @xstate: A pointer to an xsave area - * @mask: Represent the independent supervisor features restored from the xsave area + * xrstors - Restore selected components from a kernel xstate buffer + * @xstate: Pointer to the buffer + * @mask: Feature mask to select the components to restore + * + * The @xstate buffer must be 64 byte aligned and correctly initialized + * otherwise XRSTORS from that buffer can #GP. * - * Only the independent supervisor states sets in the mask are restored from the - * xsave area (See the comment in XFEATURE_MASK_INDEPENDENT for the details of - * independent supervisor feature). Besides the independent supervisor states, the - * legacy region and XSAVE header are also restored from the xsave area. The - * supervisor features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and - * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not restored. + * Proper usage is to restore the state which was saved with + * xsaves() into @xstate. * - * The xsave area must be 64-bytes aligned. + * The feature mask must either be a subset of the independent features or + * a subset of the task->fpstate related features. */ -void copy_kernel_to_independent_supervisor(struct xregs_state *xstate, u64 mask) +void xrstors(struct xregs_state *xstate, u64 mask) { - u64 independent_mask = xfeatures_mask_independent() & mask; - u32 lmask, hmask; int err; - if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES))) + if (!validate_xsaves_xrstors(mask)) return; - if (WARN_ON_FPU(!independent_mask)) - return; - - lmask = independent_mask; - hmask = independent_mask >> 32; - - XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); - - /* Should never fault when copying from a kernel buffer */ - WARN_ON_FPU(err); + XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err); + WARN_ON_ONCE(err); } #ifdef CONFIG_PROC_PID_ARCH_STATUS -- Gitee From adf82a9d76cffee7e60f23f415fc49de63dd3fa4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 23 Jun 2021 14:02:05 +0200 Subject: [PATCH 042/188] x86/pkeys: Move read_pkru() and write_pkru() mainline inclusion from mainline-v5.14-rc1 commit 784a46618f634973a17535b7d3d03cd4ebc0ccbd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 784a46618f63 x86/pkeys: Move read_pkru() and write_pkru(). -------------------------------- write_pkru() was originally used just to write to the PKRU register. It was mercifully short and sweet and was not out of place in pgtable.h with some other pkey-related code. But, later work included a requirement to also modify the task XSAVE buffer when updating the register. This really is more related to the XSAVE architecture than to paging. Move the read/write_pkru() to asm/pkru.h. pgtable.h won't miss them. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.102647114@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 1 + arch/x86/include/asm/pgtable.h | 57 +---------------------------- arch/x86/include/asm/pkru.h | 61 +++++++++++++++++++++++++++++++ arch/x86/kernel/process_64.c | 1 + arch/x86/kvm/svm/sev.c | 2 + arch/x86/kvm/x86.c | 1 + arch/x86/mm/pkeys.c | 1 + 7 files changed, 68 insertions(+), 56 deletions(-) create mode 100644 arch/x86/include/asm/pkru.h diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 7de2384628e2..5764cbe39014 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -6,6 +6,7 @@ #include #include +#include #include /* Bit 63 of XCR0 is reserved for future expansion */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 9bacde3ff514..788b3a0120dc 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -23,7 +23,7 @@ #ifndef __ASSEMBLY__ #include -#include +#include #include #include @@ -126,35 +126,6 @@ static inline int pte_dirty(pte_t pte) return pte_flags(pte) & _PAGE_DIRTY; } - -static inline u32 read_pkru(void) -{ - if (boot_cpu_has(X86_FEATURE_OSPKE)) - return rdpkru(); - return 0; -} - -static inline void write_pkru(u32 pkru) -{ - struct pkru_state *pk; - - if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return; - - pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); - - /* - * The PKRU value in xstate needs to be in sync with the value that is - * written to the CPU. The FPU restore on return to userland would - * otherwise load the previous value again. - */ - fpregs_lock(); - if (pk) - pk->pkru = pkru; - __write_pkru(pkru); - fpregs_unlock(); -} - static inline int pte_young(pte_t pte) { return pte_flags(pte) & _PAGE_ACCESSED; @@ -1360,32 +1331,6 @@ static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ -#define PKRU_AD_BIT 0x1u -#define PKRU_WD_BIT 0x2u -#define PKRU_BITS_PER_PKEY 2 - -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -extern u32 init_pkru_value; -#else -#define init_pkru_value 0 -#endif - -static inline bool __pkru_allows_read(u32 pkru, u16 pkey) -{ - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); -} - -static inline bool __pkru_allows_write(u32 pkru, u16 pkey) -{ - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - /* - * Access-disable disables writes too so we need to check - * both bits here. - */ - return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); -} - static inline u16 pte_flags_pkey(unsigned long pte_flags) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h new file mode 100644 index 000000000000..35ffcfd6e403 --- /dev/null +++ b/arch/x86/include/asm/pkru.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PKRU_H +#define _ASM_X86_PKRU_H + +#include + +#define PKRU_AD_BIT 0x1 +#define PKRU_WD_BIT 0x2 +#define PKRU_BITS_PER_PKEY 2 + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +extern u32 init_pkru_value; +#else +#define init_pkru_value 0 +#endif + +static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); +} + +static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + /* + * Access-disable disables writes too so we need to check + * both bits here. + */ + return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); +} + +static inline u32 read_pkru(void) +{ + if (boot_cpu_has(X86_FEATURE_OSPKE)) + return rdpkru(); + return 0; +} + +static inline void write_pkru(u32 pkru) +{ + struct pkru_state *pk; + + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return; + + pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); + + /* + * The PKRU value in xstate needs to be in sync with the value that is + * written to the CPU. The FPU restore on return to userland would + * otherwise load the previous value again. + */ + fpregs_lock(); + if (pk) + pk->pkru = pkru; + __write_pkru(pkru); + fpregs_unlock(); +} + +#endif diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index df342bedea88..34d60788fcb8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 7828b36d67c1..31488ece627e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -15,6 +15,8 @@ #include #include +#include + #include "x86.h" #include "svm.h" diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b59c0ba4e9f..9e3153d10f39 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include /* Ugh! */ #include diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 379c39612793..14c98bcb1083 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -10,6 +10,7 @@ #include /* boot_cpu_has, ... */ #include /* vma_pkey() */ +#include /* read/write_pkru() */ int __execute_only_pkey(struct mm_struct *mm) { -- Gitee From 79fc48491d857ed1a468bbd8db2dfa9b734b7242 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:06 +0200 Subject: [PATCH 043/188] x86/fpu: Rename and sanitize fpu__save/copy() mainline inclusion from mainline-v5.14-rc1 commit b2681e791dbcee6acb1dca7a5076a0285109ac4c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit b2681e791dbc x86/fpu: Rename and sanitize fpu__save/copy(). -------------------------------- Both function names are a misnomer. fpu__save() is actually about synchronizing the hardware register state into the task's memory state so that either coredump or a math exception handler can inspect the state at the time where the problem happens. The function guarantees to preserve the register state, while "save" is a common terminology for saving the current state so it can be modified and restored later. This is clearly not the case here. Rename it to fpu_sync_fpstate(). fpu__copy() is used to clone the current task's FPU state when duplicating task_struct. While the register state is a copy the rest of the FPU state is not. Name it accordingly and remove the really pointless @src argument along with the warning which comes along with it. Nothing can ever copy the FPU state of a non-current task. It's clearly just a consequence of arch_dup_task_struct(), but it makes no sense to proliferate that further. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.196727450@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 6 ++++-- arch/x86/kernel/fpu/core.c | 17 ++++++++--------- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/process.c | 3 +-- arch/x86/kernel/traps.c | 5 +++-- 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 56ac9b6901e5..c4e0609721ff 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -26,14 +26,16 @@ /* * High level FPU state handling functions: */ -extern void fpu__save(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); -extern int fpu__copy(struct task_struct *dst, struct task_struct *src); extern void fpu__clear_user_states(struct fpu *fpu); extern void fpu__clear_all(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern void fpu_sync_fpstate(struct fpu *fpu); + +extern int fpu_clone(struct task_struct *dst); + /* * Boot time FPU initialization functions: */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 4a59e0fbcfd8..8762b1a8966a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -159,11 +159,10 @@ void kernel_fpu_end(void) EXPORT_SYMBOL_GPL(kernel_fpu_end); /* - * Save the FPU state (mark it for reload if necessary): - * - * This only ever gets called for the current task. + * Sync the FPU register state to current's memory register state when the + * current task owns the FPU. The hardware register state is preserved. */ -void fpu__save(struct fpu *fpu) +void fpu_sync_fpstate(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); @@ -221,18 +220,18 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -int fpu__copy(struct task_struct *dst, struct task_struct *src) +/* Clone current's FPU state on fork */ +int fpu_clone(struct task_struct *dst) { + struct fpu *src_fpu = ¤t->thread.fpu; struct fpu *dst_fpu = &dst->thread.fpu; - struct fpu *src_fpu = &src->thread.fpu; + /* The new task's FPU state cannot be valid in the hardware. */ dst_fpu->last_cpu = -1; - if (!static_cpu_has(X86_FEATURE_FPU)) + if (!cpu_feature_enabled(X86_FEATURE_FPU)) return 0; - WARN_ON_FPU(src_fpu != ¤t->thread.fpu); - /* * Don't let 'init optimized' areas of the XSAVE area * leak into the child task: diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 892aec1dd822..4575796d547b 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -41,7 +41,7 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r static void sync_fpstate(struct fpu *fpu) { if (fpu == ¤t->thread.fpu) - fpu__save(fpu); + fpu_sync_fpstate(fpu); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ca588a3ac01b..55539ce3c7d0 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -93,8 +93,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) #ifdef CONFIG_VM86 dst->thread.vm86 = NULL; #endif - - return fpu__copy(dst, src); + return fpu_clone(dst); } /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 696ec85164e6..4c0fcbea60ad 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1089,9 +1089,10 @@ static void math_error(struct pt_regs *regs, int trapnr) } /* - * Save the info for the exception handler and clear the error. + * Synchronize the FPU register state to the memory register state + * if necessary. This allows the exception handler to inspect it. */ - fpu__save(fpu); + fpu_sync_fpstate(fpu); task->thread.trap_nr = trapnr; task->thread.error_code = 0; -- Gitee From 35770a90e97b5f69a8356671aa474a394c39e4a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:07 +0200 Subject: [PATCH 044/188] x86/cpu: Sanitize X86_FEATURE_OSPKE mainline inclusion from mainline-v5.14-rc1 commit 8a1dc55a3f3ef0a723c3c117a567e7b5dd2c1793 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 8a1dc55a3f3e x86/cpu: Sanitize X86_FEATURE_OSPKE. -------------------------------- X86_FEATURE_OSPKE is enabled first on the boot CPU and the feature flag is set. Secondary CPUs have to enable CR4.PKE as well and set their per CPU feature flag. That's ineffective because all call sites have checks for boot_cpu_data. Make it smarter and force the feature flag when PKU is enabled on the boot cpu which allows then to use cpu_feature_enabled(X86_FEATURE_OSPKE) all over the place. That either compiles the code out when PKEY support is disabled in Kconfig or uses a static_cpu_has() for the feature check which makes a significant difference in hotpaths, e.g. context switch. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.305113644@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/pkeys.h | 8 ++++---- arch/x86/include/asm/pkru.h | 4 ++-- arch/x86/kernel/cpu/common.c | 24 +++++++++++------------- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/xstate.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/mm/fault.c | 2 +- 7 files changed, 21 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 2ff9b98812b7..4128f647c755 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -9,14 +9,14 @@ * will be necessary to ensure that the types that store key * numbers and masks have sufficient capacity. */ -#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) +#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); static inline bool arch_pkeys_enabled(void) { - return boot_cpu_has(X86_FEATURE_OSPKE); + return cpu_feature_enabled(X86_FEATURE_OSPKE); } /* @@ -26,7 +26,7 @@ static inline bool arch_pkeys_enabled(void) extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); @@ -37,7 +37,7 @@ extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey) { - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return 0; return __arch_override_mprotect_pkey(vma, prot, pkey); diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index 35ffcfd6e403..ec8dd2878dc9 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -32,7 +32,7 @@ static inline bool __pkru_allows_write(u32 pkru, u16 pkey) static inline u32 read_pkru(void) { - if (boot_cpu_has(X86_FEATURE_OSPKE)) + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) return rdpkru(); return 0; } @@ -41,7 +41,7 @@ static inline void write_pkru(u32 pkru) { struct pkru_state *pk; - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5780924492e..526ceeb53699 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -468,22 +468,20 @@ static bool pku_disabled; static __always_inline void setup_pku(struct cpuinfo_x86 *c) { - /* check the boot processor, plus compile options for PKU: */ - if (!cpu_feature_enabled(X86_FEATURE_PKU)) - return; - /* checks the actual processor's cpuid bits: */ - if (!cpu_has(c, X86_FEATURE_PKU)) - return; - if (pku_disabled) + if (c == &boot_cpu_data) { + if (pku_disabled || !cpu_feature_enabled(X86_FEATURE_PKU)) + return; + /* + * Setting CR4.PKE will cause the X86_FEATURE_OSPKE cpuid + * bit to be set. Enforce it. + */ + setup_force_cpu_cap(X86_FEATURE_OSPKE); + + } else if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) { return; + } cr4_set_bits(X86_CR4_PKE); - /* - * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE - * cpuid bit to be set. We need to ensure that we - * update that bit in this CPU's "cpu_info". - */ - set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8762b1a8966a..3866954354a4 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -311,7 +311,7 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask) else frstor(&init_fpstate.fsave); - if (boot_cpu_has(X86_FEATURE_OSPKE)) + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index dea986921a4f..de4ed9153835 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -921,7 +921,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, * This check implies XSAVE support. OSPKE only gets * set if we enable XSAVE and we enable PKU in XCR0. */ - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return -EINVAL; /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 34d60788fcb8..07283f92c9a5 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -137,7 +137,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, log_lvl, d3, d6, d7); } - if (boot_cpu_has(X86_FEATURE_OSPKE)) + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 058ff3f6944c..931b79d3bb4f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -885,7 +885,7 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, /* This code is always called on the current mm */ bool foreign = false; - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return false; if (error_code & X86_PF_PK) return true; -- Gitee From ae983796e4bafcee9942b467632cf959f9bd2d34 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:08 +0200 Subject: [PATCH 045/188] x86/pkru: Provide pkru_get_init_value() mainline inclusion from mainline-v5.14-rc1 commit 739e2eec0f4849eb411567407d61491f923db405 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 739e2eec0f48 x86/pkru: Provide pkru_get_init_value(). -------------------------------- When CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS is disabled then the following code fails to compile: if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { u32 pkru = READ_ONCE(init_pkru_value); .. } because init_pkru_value is defined as '0' which makes READ_ONCE() upset. Provide an accessor macro to avoid #ifdeffery all over the place. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.404880646@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/pkru.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index ec8dd2878dc9..19d3d7b98465 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -10,8 +10,10 @@ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS extern u32 init_pkru_value; +#define pkru_get_init_value() READ_ONCE(init_pkru_value) #else #define init_pkru_value 0 +#define pkru_get_init_value() 0 #endif static inline bool __pkru_allows_read(u32 pkru, u16 pkey) -- Gitee From e4878a425c090b52e9d188b964e00cdc380db574 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:09 +0200 Subject: [PATCH 046/188] x86/pkru: Provide pkru_write_default() mainline inclusion from mainline-v5.14-rc1 commit ff7ebff47c595e747aa1bb10d8a30b2acb7d425b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ff7ebff47c59 x86/pkru: Provide pkru_write_default(). -------------------------------- Provide a simple and trivial helper which just writes the PKRU default value without trying to fiddle with the task's xsave buffer. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.513729794@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/pkru.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index 19d3d7b98465..7e4550911c51 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -60,4 +60,12 @@ static inline void write_pkru(u32 pkru) fpregs_unlock(); } +static inline void pkru_write_default(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + wrpkru(pkru_get_init_value()); +} + #endif -- Gitee From cebaffb426b2cb4ddd732d5a4cfc7e850491fdf1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:10 +0200 Subject: [PATCH 047/188] x86/cpu: Write the default PKRU value when enabling PKE mainline inclusion from mainline-v5.14-rc1 commit fa8c84b77a54bf3cf351c8b4b26a5aca27a14013 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit fa8c84b77a54 x86/cpu: Write the default PKRU value when enabling PKE. -------------------------------- In preparation of making the PKRU management more independent from XSTATES, write the default PKRU value into the hardware right after enabling PKRU in CR4. This ensures that switch_to() and copy_thread() have the correct setting for init task and the per CPU idle threads right away. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.622983906@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/cpu/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 526ceeb53699..186e3014a306 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -482,6 +482,8 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) } cr4_set_bits(X86_CR4_PKE); + /* Load the default PKRU value */ + pkru_write_default(); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -- Gitee From f156f021f01a63b206cb797371e0474c8ccdd539 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:11 +0200 Subject: [PATCH 048/188] x86/fpu: Use pkru_write_default() in copy_init_fpstate_to_fpregs() mainline inclusion from mainline-v5.14-rc1 commit 371071131cd1032c1e9172c51234a2a324841cab category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 371071131cd1 x86/fpu: Use pkru_write_default() in copy_init_fpstate_to_fpregs(). -------------------------------- There is no point in using copy_init_pkru_to_fpregs() which in turn calls write_pkru(). write_pkru() tries to fiddle with the task's xstate buffer for nothing because the XRSTOR[S](init_fpstate) just cleared the xfeature flag in the xstate header which makes get_xsave_addr() fail. It's a useless exercise anyway because the reinitialization activates the FPU so before the task's xstate buffer can be used again a XRSTOR[S] must happen which in turn dumps the PKRU value. Get rid of the now unused copy_init_pkru_to_fpregs(). Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.732508792@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/pkeys.h | 1 - arch/x86/kernel/fpu/core.c | 3 +-- arch/x86/mm/pkeys.c | 17 ----------------- include/linux/pkeys.h | 4 ---- 4 files changed, 1 insertion(+), 24 deletions(-) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 4128f647c755..5c7bcaa79623 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -124,7 +124,6 @@ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); -extern void copy_init_pkru_to_fpregs(void); static inline int vma_pkey(struct vm_area_struct *vma) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3866954354a4..fedadcb04ba2 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -311,8 +311,7 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask) else frstor(&init_fpstate.fsave); - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) - copy_init_pkru_to_fpregs(); + pkru_write_default(); } /* diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 14c98bcb1083..fb171a5d7f33 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -10,7 +10,6 @@ #include /* boot_cpu_has, ... */ #include /* vma_pkey() */ -#include /* read/write_pkru() */ int __execute_only_pkey(struct mm_struct *mm) { @@ -125,22 +124,6 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) | PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15); -/* - * Called from the FPU code when creating a fresh set of FPU - * registers. This is called from a very specific context where - * we know the FPU regstiers are safe for use and we can use PKRU - * directly. - */ -void copy_init_pkru_to_fpregs(void) -{ - u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value); - /* - * Override the PKRU state that came from 'init_fpstate' - * with the baseline from the process. - */ - write_pkru(init_pkru_value_snapshot); -} - static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 2955ba976048..6beb26b7151d 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -44,10 +44,6 @@ static inline bool arch_pkeys_enabled(void) return false; } -static inline void copy_init_pkru_to_fpregs(void) -{ -} - #endif /* ! CONFIG_ARCH_HAS_PKEYS */ #endif /* _LINUX_PKEYS_H */ -- Gitee From 066a85041e6fc34f525bf91b338d6a579775cfb6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:12 +0200 Subject: [PATCH 049/188] x86/fpu: Rename fpu__clear_all() to fpu_flush_thread() mainline inclusion from mainline-v5.14-rc1 commit e7ecad17c84d0f6bef635c20d02bbe4096eea700 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit e7ecad17c84d x86/fpu: Rename fpu__clear_all() to fpu_flush_thread(). -------------------------------- Make it clear what the function is about. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.827979263@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 3 ++- arch/x86/kernel/fpu/core.c | 4 ++-- arch/x86/kernel/process.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index c4e0609721ff..549119a951b4 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -29,12 +29,13 @@ extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern void fpu__clear_user_states(struct fpu *fpu); -extern void fpu__clear_all(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern void fpu_sync_fpstate(struct fpu *fpu); +/* Clone and exit operations */ extern int fpu_clone(struct task_struct *dst); +extern void fpu_flush_thread(void); /* * Boot time FPU initialization functions: diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index fedadcb04ba2..4b69be9bea55 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -350,9 +350,9 @@ void fpu__clear_user_states(struct fpu *fpu) fpu__clear(fpu, true); } -void fpu__clear_all(struct fpu *fpu) +void fpu_flush_thread(void) { - fpu__clear(fpu, false); + fpu__clear(¤t->thread.fpu, false); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 55539ce3c7d0..d75617ee7074 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -195,7 +195,7 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - fpu__clear_all(&tsk->thread.fpu); + fpu_flush_thread(); } void disable_TSC(void) -- Gitee From 8deb715efdc2db85bb986ae38accb6e42ee764d4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 23 Jun 2021 14:02:13 +0200 Subject: [PATCH 050/188] x86/fpu: Clean up the fpu__clear() variants mainline inclusion from mainline-v5.14-rc1 commit 33344368cb08f8d6bf55a32aa052318d3a69ea84 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 33344368cb08 x86/fpu: Clean up the fpu__clear() variants. -------------------------------- fpu__clear() currently resets both register state and kernel XSAVE buffer state. It has two modes: one for all state (supervisor and user) and another for user state only. fpu__clear_all() uses the "all state" (user_only=0) mode, while a number of signal paths use the user_only=1 mode. Make fpu__clear() work only for user state (user_only=1) and remove the "all state" (user_only=0) code. Rename it to match so it can be used by the signal paths. Replace the "all state" (user_only=0) fpu__clear() functionality. Use the TIF_NEED_FPU_LOAD functionality instead of making any actual hardware registers changes in this path. Instead of invoking fpu__initialize() just memcpy() init_fpstate into the task's FPU state because that has already the correct format and in case of PKRU also contains the default PKRU value. Move the actual PKRU write out into flush_thread() where it belongs and where it will end up anyway when PKRU and XSTATE have been untangled. For bisectability a workaround is required which stores the PKRU value in the xstate memory until PKRU is untangled from XSTATE for context switching and return to user. [ Dave Hansen: Polished changelog ] [ tglx: Fixed the PKRU fallout ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121455.922729522@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 113 +++++++++++++++++++++++++------------ arch/x86/kernel/process.c | 10 ++++ 2 files changed, 86 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 4b69be9bea55..aa7e808b9d1e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -259,19 +259,6 @@ int fpu_clone(struct task_struct *dst) return 0; } -/* - * Activate the current task's in-memory FPU context, - * if it has not been used before: - */ -static void fpu__initialize(struct fpu *fpu) -{ - WARN_ON_FPU(fpu != ¤t->thread.fpu); - - set_thread_flag(TIF_NEED_FPU_LOAD); - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); -} - /* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents @@ -314,47 +301,99 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask) pkru_write_default(); } +static inline unsigned int init_fpstate_copy_size(void) +{ + if (!use_xsave()) + return fpu_kernel_xstate_size; + + /* XSAVE(S) just needs the legacy and the xstate header part */ + return sizeof(init_fpstate.xsave); +} + +/* Temporary workaround. Will be removed once PKRU and XSTATE are untangled. */ +static inline void pkru_set_default_in_xstate(struct xregs_state *xsave) +{ + struct pkru_state *pk; + + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + /* + * Force XFEATURE_PKRU to be set in the header otherwise + * get_xsave_addr() does not work and it also needs to be set to + * make XRSTOR(S) load it. + */ + xsave->header.xfeatures |= XFEATURE_MASK_PKRU; + pk = get_xsave_addr(xsave, XFEATURE_PKRU); + pk->pkru = pkru_get_init_value(); +} + /* - * Clear the FPU state back to init state. - * - * Called by sys_execve(), by the signal handler code and by various - * error paths. + * Reset current->fpu memory state to the init values. + */ +static void fpu_reset_fpstate(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + fpregs_lock(); + fpu__drop(fpu); + /* + * This does not change the actual hardware registers. It just + * resets the memory image and sets TIF_NEED_FPU_LOAD so a + * subsequent return to usermode will reload the registers from the + * task's memory image. + * + * Do not use fpstate_init() here. Just copy init_fpstate which has + * the correct content already except for PKRU. + */ + memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size()); + pkru_set_default_in_xstate(&fpu->state.xsave); + set_thread_flag(TIF_NEED_FPU_LOAD); + fpregs_unlock(); +} + +/* + * Reset current's user FPU states to the init states. current's + * supervisor states, if any, are not modified by this function. The + * caller guarantees that the XSTATE header in memory is intact. */ -static void fpu__clear(struct fpu *fpu, bool user_only) +void fpu__clear_user_states(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) { - fpu__drop(fpu); - fpu__initialize(fpu); + fpregs_lock(); + if (!cpu_feature_enabled(X86_FEATURE_FPU)) { + fpu_reset_fpstate(); + fpregs_unlock(); return; } - fpregs_lock(); - - if (user_only) { - if (!fpregs_state_valid(fpu, smp_processor_id()) && - xfeatures_mask_supervisor()) - os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); - restore_fpregs_from_init_fpstate(xfeatures_mask_user()); - } else { - restore_fpregs_from_init_fpstate(xfeatures_mask_all); + /* + * Ensure that current's supervisor states are loaded into their + * corresponding registers. + */ + if (xfeatures_mask_supervisor() && + !fpregs_state_valid(fpu, smp_processor_id())) { + os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); } + /* Reset user states in registers. */ + restore_fpregs_from_init_fpstate(xfeatures_mask_user()); + + /* + * Now all FPU registers have their desired values. Inform the FPU + * state machine that current's FPU registers are in the hardware + * registers. The memory image does not need to be updated because + * any operation relying on it has to save the registers first when + * current's FPU is marked active. + */ fpregs_mark_activate(); fpregs_unlock(); } -void fpu__clear_user_states(struct fpu *fpu) -{ - fpu__clear(fpu, true); -} - void fpu_flush_thread(void) { - fpu__clear(¤t->thread.fpu, false); + fpu_reset_fpstate(); } - /* * Load FPU context before returning to userspace. */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d75617ee7074..1e69b4db7e2b 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -188,6 +188,15 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, return ret; } +static void pkru_flush_thread(void) +{ + /* + * If PKRU is enabled the default PKRU value has to be loaded into + * the hardware right here (similar to context switch). + */ + pkru_write_default(); +} + void flush_thread(void) { struct task_struct *tsk = current; @@ -196,6 +205,7 @@ void flush_thread(void) memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); fpu_flush_thread(); + pkru_flush_thread(); } void disable_TSC(void) -- Gitee From d152315e71c25dc76cb5a1e7d8cba39850aa80d4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:14 +0200 Subject: [PATCH 051/188] x86/fpu: Rename __fpregs_load_activate() to fpregs_restore_userregs() mainline inclusion from mainline-v5.14-rc1 commit 727d01100e15b18c67f05fb697779ad2a6c99b63 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 727d01100e15 x86/fpu: Rename __fpregs_load_activate() to fpregs_restore_userregs(). -------------------------------- Rename it so that it becomes entirely clear what this function is about. It's purpose is to restore the FPU registers to the state which was saved in the task's FPU memory state either at context switch or by an in kernel FPU user. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.018867925@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 6 ++---- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/signal.c | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 549119a951b4..2c32d4eb9fae 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -470,10 +470,8 @@ static inline void fpregs_activate(struct fpu *fpu) trace_x86_fpu_regs_activated(fpu); } -/* - * Internal helper, do not use directly. Use switch_fpu_return() instead. - */ -static inline void __fpregs_load_activate(void) +/* Internal helper for switch_fpu_return() and signal frame setup */ +static inline void fpregs_restore_userregs(void) { struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index aa7e808b9d1e..6babf1876389 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -402,7 +402,7 @@ void switch_fpu_return(void) if (!static_cpu_has(X86_FEATURE_FPU)) return; - __fpregs_load_activate(); + fpregs_restore_userregs(); } EXPORT_SYMBOL_GPL(switch_fpu_return); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index fd4b58d7b72e..b12665c38a30 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -188,7 +188,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) - __fpregs_load_activate(); + fpregs_restore_userregs(); pagefault_disable(); ret = copy_fpregs_to_sigframe(buf_fx); -- Gitee From 2e391a4d150824828a817a33d6248eb1fe9b6b50 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:15 +0200 Subject: [PATCH 052/188] x86/fpu: Move FXSAVE_LEAK quirk into __copy_kernel_to_fpregs() mainline inclusion from mainline-v5.14-rc1 commit 1d9bffab116fadfe1594f5fea2b50ab280d81d30 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1d9bffab116f x86/fpu: Move FXSAVE_LEAK quirk into __copy_kernel_to_fpregs(). -------------------------------- copy_kernel_to_fpregs() restores all xfeatures but it is also the place where the AMD FXSAVE_LEAK bug is handled. That prevents fpregs_restore_userregs() to limit the restored features, which is required to untangle PKRU and XSTATE handling and also for the upcoming supervisor state management. Move the FXSAVE_LEAK quirk into __copy_kernel_to_fpregs() and deinline that function which has become rather fat. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.114271278@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 25 +------------------------ arch/x86/kernel/fpu/core.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 2c32d4eb9fae..881a32ad6606 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -384,33 +384,10 @@ static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) return err; } -static inline void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) -{ - if (use_xsave()) { - os_xrstor(&fpstate->xsave, mask); - } else { - if (use_fxsr()) - fxrstor(&fpstate->fxsave); - else - frstor(&fpstate->fsave); - } -} +extern void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate) { - /* - * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is - * pending. Clear the x87 state here by setting it to fixed values. - * "m" is a random variable that should be in L1. - */ - if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { - asm volatile( - "fnclex\n\t" - "emms\n\t" - "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (fpstate)); - } - __restore_fpregs_from_fpstate(fpstate, -1); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 6babf1876389..afd0deee8cfd 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -124,6 +124,33 @@ void save_fpregs_to_fpstate(struct fpu *fpu) } EXPORT_SYMBOL(save_fpregs_to_fpstate); +void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) +{ + /* + * AMD K7/K8 and later CPUs up to Zen don't save/restore + * FDP/FIP/FOP unless an exception is pending. Clear the x87 state + * here by setting it to fixed values. "m" is a random variable + * that should be in L1. + */ + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { + asm volatile( + "fnclex\n\t" + "emms\n\t" + "fildl %P[addr]" /* set F?P to defined value */ + : : [addr] "m" (fpstate)); + } + + if (use_xsave()) { + os_xrstor(&fpstate->xsave, mask); + } else { + if (use_fxsr()) + fxrstor(&fpstate->fxsave); + else + frstor(&fpstate->fsave); + } +} +EXPORT_SYMBOL_GPL(__restore_fpregs_from_fpstate); + void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); -- Gitee From 740c9a4c56b46412f58daac3035d48dcfbb33f5a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:16 +0200 Subject: [PATCH 053/188] x86/fpu: Rename xfeatures_mask_user() to xfeatures_mask_uabi() mainline inclusion from mainline-v5.14-rc1 commit 65e952102122bf89f0e4f1bebf8664e32587aaed category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 65e952102122 x86/fpu: Rename xfeatures_mask_user() to xfeatures_mask_uabi(). -------------------------------- Rename it so it's clear that this is about user ABI features which can differ from the feature set which the kernel saves and restores because the kernel handles e.g. PKRU differently. But the user ABI (ptrace, signal frame) expects it to be there. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.211585137@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 7 ++++++- arch/x86/include/asm/fpu/xstate.h | 6 +++++- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/signal.c | 10 +++++----- arch/x86/kernel/fpu/xstate.c | 18 +++++++++--------- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 881a32ad6606..71b3bd1fdfaa 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -329,7 +329,12 @@ static inline void os_xrstor(struct xregs_state *xstate, u64 mask) */ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) { - u64 mask = xfeatures_mask_user(); + /* + * Include the features which are not xsaved/rstored by the kernel + * internally, e.g. PKRU. That's user space ABI and also required + * to allow the signal handler to modify PKRU. + */ + u64 mask = xfeatures_mask_uabi(); u32 lmask = mask; u32 hmask = mask >> 32; int err; diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 5764cbe39014..af9ea134bf5e 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -83,7 +83,11 @@ static inline u64 xfeatures_mask_supervisor(void) return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED; } -static inline u64 xfeatures_mask_user(void) +/* + * The xfeatures which are enabled in XCR0 and expected to be in ptrace + * buffers and signal frames. + */ +static inline u64 xfeatures_mask_uabi(void) { return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index afd0deee8cfd..12437383ff79 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -404,7 +404,7 @@ void fpu__clear_user_states(struct fpu *fpu) } /* Reset user states in registers. */ - restore_fpregs_from_init_fpstate(xfeatures_mask_user()); + restore_fpregs_from_init_fpstate(xfeatures_mask_uabi()); /* * Now all FPU registers have their desired values. Inform the FPU diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index b12665c38a30..a42bc9d0b1cc 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -257,14 +257,14 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) if (use_xsave()) { if (fx_only) { - init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE; + init_bv = xfeatures_mask_uabi() & ~XFEATURE_MASK_FPSSE; r = fxrstor_from_user_sigframe(buf); if (!r) os_xrstor(&init_fpstate.xsave, init_bv); return r; } else { - init_bv = xfeatures_mask_user() & ~xbv; + init_bv = xfeatures_mask_uabi() & ~xbv; r = xrstor_from_user_sigframe(buf, xbv); if (!r && unlikely(init_bv)) @@ -420,7 +420,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); if (use_xsave() && !fx_only) { - u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; + u64 init_bv = xfeatures_mask_uabi() & ~user_xfeatures; ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); if (ret) @@ -454,7 +454,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (use_xsave()) { u64 init_bv; - init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE; + init_bv = xfeatures_mask_uabi() & ~XFEATURE_MASK_FPSSE; os_xrstor(&init_fpstate.xsave, init_bv); } @@ -549,7 +549,7 @@ void fpu__init_prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; - fx_sw_reserved.xfeatures = xfeatures_mask_user(); + fx_sw_reserved.xfeatures = xfeatures_mask_uabi(); fx_sw_reserved.xstate_size = fpu_user_xstate_size; if (IS_ENABLED(CONFIG_IA32_EMULATION) || diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index de4ed9153835..0de7c63ef753 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -144,7 +144,7 @@ void fpu__init_cpu_xstate(void) * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user * states can be set here. */ - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user()); + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_uabi()); /* * MSR_IA32_XSS sets supervisor states managed by XSAVES. @@ -453,7 +453,7 @@ int xfeature_size(int xfeature_nr) static int validate_user_xstate_header(const struct xstate_header *hdr) { /* No unknown or supervisor features may be set */ - if (hdr->xfeatures & ~xfeatures_mask_user()) + if (hdr->xfeatures & ~xfeatures_mask_uabi()) return -EINVAL; /* Userspace must use the uncompacted format */ @@ -756,7 +756,7 @@ void __init fpu__init_system_xstate(void) cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); xfeatures_mask_all |= ecx + ((u64)edx << 32); - if ((xfeatures_mask_user() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { + if ((xfeatures_mask_uabi() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { /* * This indicates that something really unexpected happened * with the enumeration. Disable XSAVE and try to continue @@ -791,7 +791,7 @@ void __init fpu__init_system_xstate(void) * Update info used for ptrace frames; use standard-format size and no * supervisor xstates: */ - update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_user()); + update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_uabi()); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); @@ -828,14 +828,14 @@ void fpu__resume_cpu(void) /* * Restore XCR0 on xsave capable CPUs: */ - if (boot_cpu_has(X86_FEATURE_XSAVE)) - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user()); + if (cpu_feature_enabled(X86_FEATURE_XSAVE)) + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_uabi()); /* * Restore IA32_XSS. The same CPUID bit enumerates support * of XSAVES and MSR_IA32_XSS. */ - if (boot_cpu_has(X86_FEATURE_XSAVES)) { + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } @@ -993,7 +993,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, break; case XSTATE_COPY_XSAVE: - header.xfeatures &= xfeatures_mask_user(); + header.xfeatures &= xfeatures_mask_uabi(); break; } @@ -1038,7 +1038,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, * compacted init_fpstate. The gap tracking will zero this * later. */ - if (!(xfeatures_mask_user() & BIT_ULL(i))) + if (!(xfeatures_mask_uabi() & BIT_ULL(i))) continue; /* -- Gitee From a150eb5f790cbd294018c6e4d25d637222e50e2f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:17 +0200 Subject: [PATCH 054/188] x86/fpu: Dont restore PKRU in fpregs_restore_userspace() mainline inclusion from mainline-v5.14-rc1 commit 2ebe81c6d800576e1213f9d7cf0068017ae610c1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2ebe81c6d800 x86/fpu: Dont restore PKRU in fpregs_restore_userspace(). -------------------------------- switch_to() and flush_thread() write the task's PKRU value eagerly so the PKRU value of current is always valid in the hardware. That means there is no point in restoring PKRU on exit to user or when reactivating the task's FPU registers in the signal frame setup path. This allows to remove all the xstate buffer updates with PKRU values once the PKRU state is stored in thread struct while a task is scheduled out. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.303919033@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 16 +++++++++++++++- arch/x86/include/asm/fpu/xstate.h | 19 +++++++++++++++++++ arch/x86/kernel/fpu/core.c | 2 +- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 71b3bd1fdfaa..a22f9f1e3099 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -462,7 +462,21 @@ static inline void fpregs_restore_userregs(void) return; if (!fpregs_state_valid(fpu, cpu)) { - restore_fpregs_from_fpstate(&fpu->state); + u64 mask; + + /* + * This restores _all_ xstate which has not been + * established yet. + * + * If PKRU is enabled, then the PKRU value is already + * correct because it was either set in switch_to() or in + * flush_thread(). So it is excluded because it might be + * not up to date in current->thread.fpu.xsave state. + */ + mask = xfeatures_mask_restore_user() | + xfeatures_mask_supervisor(); + __restore_fpregs_from_fpstate(&fpu->state, mask); + fpregs_activate(fpu); fpu->last_cpu = cpu; } diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index af9ea134bf5e..6a0aaafb93ec 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -35,6 +35,14 @@ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR) +/* + * Features which are restored when returning to user space. + * PKRU is not restored on return to user space because PKRU + * is switched eagerly in switch_to() and flush_thread() + */ +#define XFEATURE_MASK_USER_RESTORE \ + (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) + /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID) @@ -92,6 +100,17 @@ static inline u64 xfeatures_mask_uabi(void) return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; } +/* + * The xfeatures which are restored by the kernel when returning to user + * mode. This is not necessarily the same as xfeatures_mask_uabi() as the + * kernel does not manage all XCR0 enabled features via xsave/xrstor as + * some of them have to be switched eagerly on context switch and exec(). + */ +static inline u64 xfeatures_mask_restore_user(void) +{ + return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE; +} + static inline u64 xfeatures_mask_independent(void) { if (!boot_cpu_has(X86_FEATURE_ARCH_LBR)) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 12437383ff79..470576ced907 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -404,7 +404,7 @@ void fpu__clear_user_states(struct fpu *fpu) } /* Reset user states in registers. */ - restore_fpregs_from_init_fpstate(xfeatures_mask_uabi()); + restore_fpregs_from_init_fpstate(xfeatures_mask_restore_user()); /* * Now all FPU registers have their desired values. Inform the FPU -- Gitee From 71b20f3bb5de14149f9f6dc062c0ee990a97e3df Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 23 Jun 2021 14:02:18 +0200 Subject: [PATCH 055/188] x86/fpu: Add PKRU storage outside of task XSAVE buffer mainline inclusion from mainline-v5.14-rc1 commit 9782a712eb971ce483442076e79eb1d8d608646e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 9782a712eb97 x86/fpu: Add PKRU storage outside of task XSAVE buffer. -------------------------------- PKRU is currently partly XSAVE-managed and partly not. It has space in the task XSAVE buffer and is context-switched by XSAVE/XRSTOR. However, it is switched more eagerly than FPU because there may be a need for PKRU to be up-to-date for things like copy_to/from_user() since PKRU affects user-permission memory accesses, not just accesses from userspace itself. This leaves PKRU in a very odd position. XSAVE brings very little value to the table for how Linux uses PKRU except for signal related XSTATE handling. Prepare to move PKRU away from being XSAVE-managed. Allocate space in the thread_struct for it and save/restore it in the context-switch path separately from the XSAVE-managed features. task->thread_struct.pkru is only valid when the task is scheduled out. For the current task the authoritative source is the hardware, i.e. it has to be retrieved via rdpkru(). Leave the XSAVE code in place for now to ensure bisectability. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.399107624@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/processor.h | 9 +++++++++ arch/x86/kernel/process.c | 7 +++++++ arch/x86/kernel/process_64.c | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a3d0152a4361..8ca33a5e214d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -537,6 +537,15 @@ struct thread_struct { unsigned int iopl_warn:1; unsigned int sig_on_uaccess_err:1; + /* + * Protection Keys Register for Userspace. Loaded immediately on + * context switch. Store it in thread_struct to avoid a lookup in + * the tasks's FPU xstate buffer. This value is only valid when a + * task is scheduled out. For 'current' the authoritative source of + * PKRU is the hardware itself. + */ + u32 pkru; + /* Floating point and extended processor state */ struct fpu fpu; /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1e69b4db7e2b..836c3c96a36b 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -163,11 +163,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, /* Kernel thread ? */ if (unlikely(p->flags & PF_KTHREAD)) { + p->thread.pkru = pkru_get_init_value(); memset(childregs, 0, sizeof(struct pt_regs)); kthread_frame_init(frame, sp, arg); return 0; } + /* + * Clone current's PKRU value from hardware. tsk->thread.pkru + * is only valid when scheduled out. + */ + p->thread.pkru = read_pkru(); + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 07283f92c9a5..c79452adff59 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -340,6 +340,29 @@ static __always_inline void load_seg_legacy(unsigned short prev_index, } } +/* + * Store prev's PKRU value and load next's PKRU value if they differ. PKRU + * is not XSTATE managed on context switch because that would require a + * lookup in the task's FPU xsave buffer and require to keep that updated + * in various places. + */ +static __always_inline void x86_pkru_load(struct thread_struct *prev, + struct thread_struct *next) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + /* Stash the prev task's value: */ + prev->pkru = rdpkru(); + + /* + * PKRU writes are slightly expensive. Avoid them when not + * strictly necessary: + */ + if (prev->pkru != next->pkru) + wrpkru(next->pkru); +} + static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, struct thread_struct *next) { @@ -590,6 +613,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) x86_fsgsbase_load(prev, next); + x86_pkru_load(prev, next); + /* * Switch the PDA and FPU contexts. */ -- Gitee From 9853aca3813843660c5df78a6dff3922a952ab07 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 23 Jun 2021 14:02:19 +0200 Subject: [PATCH 056/188] x86/fpu: Hook up PKRU into ptrace() mainline inclusion from mainline-v5.14-rc1 commit e84ba47e313dbc097bf859bb6e4f9219883d5f78 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit e84ba47e313d x86/fpu: Hook up PKRU into ptrace(). -------------------------------- One nice thing about having PKRU be XSAVE-managed is that it gets naturally exposed into the XSAVE-using ABIs. Now that XSAVE will not be used to manage PKRU, these ABIs need to be manually enabled to deal with PKRU. ptrace() uses copy_uabi_xstate_to_kernel() to collect the tracee's XSTATE. As PKRU is not in the task's XSTATE buffer, use task->thread.pkru for filling in up the ptrace buffer. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.508770763@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 2 +- arch/x86/kernel/fpu/regset.c | 10 ++++------ arch/x86/kernel/fpu/xstate.c | 25 ++++++++++++++++++------- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 6a0aaafb93ec..4ff4a0093a48 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -139,7 +139,7 @@ enum xstate_copy_mode { }; struct membuf; -void copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, +void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 4575796d547b..66ed317ebc0d 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -78,7 +78,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, sizeof(fpu->state.fxsave)); } - copy_xstate_to_uabi_buf(to, &fpu->state.xsave, XSTATE_COPY_FX); + copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_FX); return 0; } @@ -126,14 +126,12 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, int xstateregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - struct fpu *fpu = &target->thread.fpu; - if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) return -ENODEV; - sync_fpstate(fpu); + sync_fpstate(&target->thread.fpu); - copy_xstate_to_uabi_buf(to, &fpu->state.xsave, XSTATE_COPY_XSAVE); + copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_XSAVE); return 0; } @@ -336,7 +334,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf mb = { .p = &fxsave, .left = sizeof(fxsave) }; /* Handle init state optimized xstate correctly */ - copy_xstate_to_uabi_buf(mb, &fpu->state.xsave, XSTATE_COPY_FP); + copy_xstate_to_uabi_buf(mb, target, XSTATE_COPY_FP); fx = &fxsave; } else { fx = &fpu->state.fxsave; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0de7c63ef753..f977f62142e7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -962,7 +962,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, /** * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor - * @xsave: The kernel xstate buffer to copy from + * @tsk: The task from which to copy the saved xstate * @copy_mode: The requested copy mode * * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming @@ -971,10 +971,11 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * * It supports partial copy but @to.pos always starts from zero. */ -void copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, +void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; struct xregs_state *xinit = &init_fpstate.xsave; struct xstate_header header; unsigned int zerofrom; @@ -1048,11 +1049,21 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, if (zerofrom < xstate_offsets[i]) membuf_zero(&to, xstate_offsets[i] - zerofrom); - copy_feature(header.xfeatures & BIT_ULL(i), &to, - __raw_xsave_addr(xsave, i), - __raw_xsave_addr(xinit, i), - xstate_sizes[i]); - + if (i == XFEATURE_PKRU) { + struct pkru_state pkru = {0}; + /* + * PKRU is not necessarily up to date in the + * thread's XSAVE buffer. Fill this part from the + * per-thread storage. + */ + pkru.pkru = tsk->thread.pkru; + membuf_write(&to, &pkru, sizeof(pkru)); + } else { + copy_feature(header.xfeatures & BIT_ULL(i), &to, + __raw_xsave_addr(xsave, i), + __raw_xsave_addr(xinit, i), + xstate_sizes[i]); + } /* * Keep track of the last copied state in the non-compacted * target buffer for gap zeroing. -- Gitee From fb7f5559902a67585babf2a0a284e5edab9c79cf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:20 +0200 Subject: [PATCH 057/188] x86/fpu: Mask PKRU from kernel XRSTOR[S] operations mainline inclusion from mainline-v5.14-rc1 commit 30a304a138738d71a09c730ca8044e9662de0dbf category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 30a304a13873 x86/fpu: Mask PKRU from kernel XRSTOR[S] operations. -------------------------------- As the PKRU state is managed separately restoring it from the xstate buffer would be counterproductive as it might either restore a stale value or reinit the PKRU state to 0. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.606745195@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 4 ++-- arch/x86/include/asm/fpu/xstate.h | 10 ++++++++++ arch/x86/kernel/fpu/xstate.c | 1 + arch/x86/mm/extable.c | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a22f9f1e3099..24b83fdecb57 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -264,7 +264,7 @@ static inline void fxsave(struct fxregs_state *fx) */ static inline void os_xrstor_booting(struct xregs_state *xstate) { - u64 mask = -1; + u64 mask = xfeatures_mask_fpstate(); u32 lmask = mask; u32 hmask = mask >> 32; int err; @@ -393,7 +393,7 @@ extern void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate) { - __restore_fpregs_from_fpstate(fpstate, -1); + __restore_fpregs_from_fpstate(fpstate, xfeatures_mask_fpstate()); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 4ff4a0093a48..109dfcc75299 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -111,6 +111,16 @@ static inline u64 xfeatures_mask_restore_user(void) return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE; } +/* + * Like xfeatures_mask_restore_user() but additionally restors the + * supported supervisor states. + */ +static inline u64 xfeatures_mask_fpstate(void) +{ + return xfeatures_mask_all & \ + (XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED); +} + static inline u64 xfeatures_mask_independent(void) { if (!boot_cpu_has(X86_FEATURE_ARCH_LBR)) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f977f62142e7..121a111d8928 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -60,6 +60,7 @@ static short xsave_cpuid_features[] __initdata = { * XSAVE buffer, both supervisor and user xstates. */ u64 xfeatures_mask_all __ro_after_init; +EXPORT_SYMBOL_GPL(xfeatures_mask_all); static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 7d7bd39080dc..ad63a2d99949 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -65,7 +65,7 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.", (void *)instruction_pointer(regs)); - __restore_fpregs_from_fpstate(&init_fpstate, -1); + __restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); return true; } EXPORT_SYMBOL_GPL(ex_handler_fprestore); -- Gitee From 6f507c911646ddbe5e391f5ba9b2ef590db52c45 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:21 +0200 Subject: [PATCH 058/188] x86/fpu: Remove PKRU handling from switch_fpu_finish() mainline inclusion from mainline-v5.14-rc1 commit 954436989cc550dd91aab98363240c9c0a4b7e23 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 954436989cc5 x86/fpu: Remove PKRU handling from switch_fpu_finish(). -------------------------------- PKRU is already updated and the xstate is not longer the proper source of information. [ bp: Use cpu_feature_enabled() ] Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.708180184@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 34 ++++------------------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 24b83fdecb57..5a18694a89b2 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -528,39 +528,13 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ /* - * Load PKRU from the FPU context if available. Delay loading of the - * complete FPU state until the return to userland. + * Delay loading of the complete FPU state until the return to userland. + * PKRU is handled separately. */ static inline void switch_fpu_finish(struct fpu *new_fpu) { - u32 pkru_val = init_pkru_value; - struct pkru_state *pk; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return; - - set_thread_flag(TIF_NEED_FPU_LOAD); - - if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) - return; - - /* - * PKRU state is switched eagerly because it needs to be valid before we - * return to userland e.g. for a copy_to_user() operation. - */ - if (!(current->flags & PF_KTHREAD)) { - /* - * If the PKRU bit in xsave.header.xfeatures is not set, - * then the PKRU component was in init state, which means - * XRSTOR will set PKRU to 0. If the bit is not set then - * get_xsave_addr() will return NULL because the PKRU value - * in memory is not valid. This means pkru_val has to be - * set to 0 and not to init_pkru_value. - */ - pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); - pkru_val = pk ? pk->pkru : 0; - } - __write_pkru(pkru_val); + if (cpu_feature_enabled(X86_FEATURE_FPU)) + set_thread_flag(TIF_NEED_FPU_LOAD); } #endif /* _ASM_X86_FPU_INTERNAL_H */ -- Gitee From 88cae4296eb10dbac3efe92c48e8a9e6028b71a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:22 +0200 Subject: [PATCH 059/188] x86/fpu: Don't store PKRU in xstate in fpu_reset_fpstate() mainline inclusion from mainline-v5.14-rc1 commit 0e8c54f6b2c8b1037cef9276e451522ee90ed969 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 0e8c54f6b2c8 x86/fpu: Don't store PKRU in xstate in fpu_reset_fpstate(). -------------------------------- PKRU for a task is stored in task->thread.pkru when the task is scheduled out. For 'current' the authoritative source of PKRU is the hardware. fpu_reset_fpstate() has two callers: 1) fpu__clear_user_states() for !FPU systems. For those PKRU is irrelevant 2) fpu_flush_thread() which is invoked from flush_thread(). flush_thread() resets the hardware to the kernel restrictive default value. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.802850233@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 470576ced907..5295cbafc92e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -337,23 +337,6 @@ static inline unsigned int init_fpstate_copy_size(void) return sizeof(init_fpstate.xsave); } -/* Temporary workaround. Will be removed once PKRU and XSTATE are untangled. */ -static inline void pkru_set_default_in_xstate(struct xregs_state *xsave) -{ - struct pkru_state *pk; - - if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) - return; - /* - * Force XFEATURE_PKRU to be set in the header otherwise - * get_xsave_addr() does not work and it also needs to be set to - * make XRSTOR(S) load it. - */ - xsave->header.xfeatures |= XFEATURE_MASK_PKRU; - pk = get_xsave_addr(xsave, XFEATURE_PKRU); - pk->pkru = pkru_get_init_value(); -} - /* * Reset current->fpu memory state to the init values. */ @@ -371,9 +354,12 @@ static void fpu_reset_fpstate(void) * * Do not use fpstate_init() here. Just copy init_fpstate which has * the correct content already except for PKRU. + * + * PKRU handling does not rely on the xstate when restoring for + * user space as PKRU is eagerly written in switch_to() and + * flush_thread(). */ memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size()); - pkru_set_default_in_xstate(&fpu->state.xsave); set_thread_flag(TIF_NEED_FPU_LOAD); fpregs_unlock(); } -- Gitee From a26e2255f1a70e84b7399ea86fc7959aced9e73b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:23 +0200 Subject: [PATCH 060/188] x86/pkru: Remove xstate fiddling from write_pkru() mainline inclusion from mainline-v5.14-rc1 commit 72a6c08c44e4460e39315ca828f60b8d5afd6b19 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 72a6c08c44e4 x86/pkru: Remove xstate fiddling from write_pkru(). -------------------------------- The PKRU value of a task is stored in task->thread.pkru when the task is scheduled out. PKRU is restored on schedule in from there. So keeping the XSAVE buffer up to date is a pointless exercise. Remove the xstate fiddling and cleanup all related functions. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.897372712@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/pkru.h | 17 ++++------------- arch/x86/include/asm/special_insns.h | 14 +------------- arch/x86/kvm/x86.c | 4 ++-- 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index 7e4550911c51..ccc539faa5bb 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -41,23 +41,14 @@ static inline u32 read_pkru(void) static inline void write_pkru(u32 pkru) { - struct pkru_state *pk; - if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; - - pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); - /* - * The PKRU value in xstate needs to be in sync with the value that is - * written to the CPU. The FPU restore on return to userland would - * otherwise load the previous value again. + * WRPKRU is relatively expensive compared to RDPKRU. + * Avoid WRPKRU when it would not change the value. */ - fpregs_lock(); - if (pk) - pk->pkru = pkru; - __write_pkru(pkru); - fpregs_unlock(); + if (pkru != rdpkru()) + wrpkru(pkru); } static inline void pkru_write_default(void) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 415693f5d909..940959a40443 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -104,25 +104,13 @@ static inline void wrpkru(u32 pkru) : : "a" (pkru), "c"(ecx), "d"(edx)); } -static inline void __write_pkru(u32 pkru) -{ - /* - * WRPKRU is relatively expensive compared to RDPKRU. - * Avoid WRPKRU when it would not change the value. - */ - if (pkru == rdpkru()) - return; - - wrpkru(pkru); -} - #else static inline u32 rdpkru(void) { return 0; } -static inline void __write_pkru(u32 pkru) +static inline void wrpkru(u32 pkru) { } #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9e3153d10f39..1d46232904d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -946,7 +946,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) && vcpu->arch.pkru != vcpu->arch.host_pkru) - __write_pkru(vcpu->arch.pkru); + write_pkru(vcpu->arch.pkru); } EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); @@ -957,7 +957,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) { vcpu->arch.pkru = rdpkru(); if (vcpu->arch.pkru != vcpu->arch.host_pkru) - __write_pkru(vcpu->arch.host_pkru); + write_pkru(vcpu->arch.host_pkru); } if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { -- Gitee From dab153b116aaff8e8d4511ec81786d86ba3352e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:24 +0200 Subject: [PATCH 061/188] x86/fpu: Mark init_fpstate __ro_after_init mainline inclusion from mainline-v5.14-rc1 commit bf68a7d98922e1665019b8bf0c4791500837c857 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit bf68a7d98922 x86/fpu: Mark init_fpstate __ro_after_init. -------------------------------- Nothing has to write into that state after init. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121456.992342060@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 5295cbafc92e..7ada7bd03a32 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -23,7 +23,7 @@ * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: */ -union fpregs_state init_fpstate __read_mostly; +union fpregs_state init_fpstate __ro_after_init; /* * Track whether the kernel is using the FPU state -- Gitee From 414a0bf00acc3d86179816289b779a42192f2337 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:25 +0200 Subject: [PATCH 062/188] x86/fpu/signal: Move initial checks into fpu__restore_sig() mainline inclusion from mainline-v5.14-rc1 commit 99a5901951b70251965b0d1542d4a8c616842a99 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 99a5901951b7 x86/fpu/signal: Move initial checks into fpu__restore_sig(). -------------------------------- __fpu__restore_sig() is convoluted and some of the basic checks can trivially be done in the calling function as well as the final error handling of clearing user state. [ bp: Fixup typos. ] Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121457.086336154@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 76 +++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a42bc9d0b1cc..42e85c3fe9de 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -277,11 +277,11 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) return frstor_from_user_sigframe(buf); } -static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) +static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, + bool ia32_fxstate) { struct user_i387_ia32_struct *envp = NULL; int state_size = fpu_kernel_xstate_size; - int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; @@ -289,26 +289,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) int fx_only = 0; int ret = 0; - ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || - IS_ENABLED(CONFIG_IA32_EMULATION)); - - if (!buf) { - fpu__clear_user_states(fpu); - return 0; - } - - if (!access_ok(buf, size)) { - ret = -EACCES; - goto out; - } - - if (!static_cpu_has(X86_FEATURE_FPU)) { - ret = fpregs_soft_set(current, NULL, 0, - sizeof(struct user_i387_ia32_struct), - NULL, buf); - goto out; - } - if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { @@ -391,7 +371,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ ret = __copy_from_user(&env, buf, sizeof(env)); if (ret) - goto out; + return ret; envp = &env; } @@ -424,7 +404,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); if (ret) - goto out; + return ret; sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, fx_only); @@ -442,10 +422,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } else if (use_fxsr()) { ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); - if (ret) { - ret = -EFAULT; - goto out; - } + if (ret) + return -EFAULT; sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, fx_only); @@ -462,7 +440,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } else { ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); if (ret) - goto out; + return ret; fpregs_lock(); ret = frstor_safe(&fpu->state.fsave); @@ -472,10 +450,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) else fpregs_deactivate(fpu); fpregs_unlock(); - -out: - if (ret) - fpu__clear_user_states(fpu); return ret; } @@ -490,15 +464,47 @@ static inline int xstate_sigframe_size(void) */ int fpu__restore_sig(void __user *buf, int ia32_frame) { + unsigned int size = xstate_sigframe_size(); + struct fpu *fpu = ¤t->thread.fpu; void __user *buf_fx = buf; - int size = xstate_sigframe_size(); + bool ia32_fxstate = false; + int ret; + + if (unlikely(!buf)) { + fpu__clear_user_states(fpu); + return 0; + } + ia32_frame &= (IS_ENABLED(CONFIG_X86_32) || + IS_ENABLED(CONFIG_IA32_EMULATION)); + + /* + * Only FXSR enabled systems need the FX state quirk. + * FRSTOR does not need it and can use the fast path. + */ if (ia32_frame && use_fxsr()) { buf_fx = buf + sizeof(struct fregs_state); size += sizeof(struct fregs_state); + ia32_fxstate = true; } - return __fpu__restore_sig(buf, buf_fx, size); + if (!access_ok(buf, size)) { + ret = -EACCES; + goto out; + } + + if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) { + ret = fpregs_soft_set(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), + NULL, buf); + } else { + ret = __fpu_restore_sig(buf, buf_fx, ia32_fxstate); + } + +out: + if (unlikely(ret)) + fpu__clear_user_states(fpu); + return ret; } unsigned long -- Gitee From 90b6ac2c49dc7443ffb4c88025164e0ec955305c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:26 +0200 Subject: [PATCH 063/188] x86/fpu/signal: Remove the legacy alignment check mainline inclusion from mainline-v5.14-rc1 commit 9ba589f9cdbd8906465b108bc7ec0fc1519a06d3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 9ba589f9cdbd x86/fpu/signal: Remove the legacy alignment check. -------------------------------- Checking for the XSTATE buffer being 64-byte aligned, and if not, deciding just to restore the FXSR state is daft. If user space provides an unaligned math frame and has the extended state magic set in the FX software reserved bytes, then it really can keep the pieces. If the frame is unaligned and the FX software magic is not set, then fx_only is already set and the restore will use fxrstor. Remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121457.184149902@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 42e85c3fe9de..8a327c05bb86 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -306,9 +306,6 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, } } - if ((unsigned long)buf_fx % 64) - fx_only = 1; - if (!ia32_fxstate) { /* * Attempt to restore the FPU registers directly from user -- Gitee From aebd32f1b5dc2bd7906bc0253130d73577a967d9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:27 +0200 Subject: [PATCH 064/188] x86/fpu/signal: Sanitize the xstate check on sigframe mainline inclusion from mainline-v5.14-rc1 commit 1258a8c896044564514c1b53795ba3033b1e9fd6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1258a8c89604 x86/fpu/signal: Sanitize the xstate check on sigframe. -------------------------------- Utilize the check for the extended state magic in the FX software reserved bytes and set the parameters for restoring fx_only in the relevant members of fw_sw_user. This allows further cleanups on top because the data is consistent. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121457.277738268@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 70 +++++++++++++++++------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 8a327c05bb86..d55241038871 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -15,29 +15,30 @@ #include #include -static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; +static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init; +static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init; /* * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ -static inline int check_for_xstate(struct fxregs_state __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw) +static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, + struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct fxregs_state) + sizeof(struct xstate_header); + void __user *fpstate = fxbuf; unsigned int magic2; - if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) - return -1; + if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw))) + return -EFAULT; /* Check for the first magic field and other error scenarios. */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || fx_sw->xstate_size < min_xstate_size || fx_sw->xstate_size > fpu_user_xstate_size || fx_sw->xstate_size > fx_sw->extended_size) - return -1; + goto setfx; /* * Check for the presence of second magic word at the end of memory @@ -45,10 +46,18 @@ static inline int check_for_xstate(struct fxregs_state __user *buf, * fpstate layout with out copying the extended state information * in the memory layout. */ - if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) - || magic2 != FP_XSTATE_MAGIC2) - return -1; + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))) + return -EFAULT; + if (likely(magic2 == FP_XSTATE_MAGIC2)) + return 0; +setfx: + trace_x86_fpu_xstate_check_failed(¤t->thread.fpu); + + /* Set the parameters for fx only state */ + fx_sw->magic1 = 0; + fx_sw->xstate_size = sizeof(struct fxregs_state); + fx_sw->xfeatures = XFEATURE_MASK_FPSSE; return 0; } @@ -213,21 +222,15 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) static inline void sanitize_restored_user_xstate(union fpregs_state *state, - struct user_i387_ia32_struct *ia32_env, - u64 user_xfeatures, int fx_only) + struct user_i387_ia32_struct *ia32_env, u64 mask) { struct xregs_state *xsave = &state->xsave; struct xstate_header *header = &xsave->header; if (use_xsave()) { /* - * Clear all feature bits which are not set in - * user_xfeatures and clear all extended features - * for fx_only mode. - */ - u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures; - - /* + * Clear all feature bits which are not set in mask. + * * Supervisor state has to be preserved. The sigframe * restore can only modify user features, i.e. @mask * cannot contain them. @@ -286,24 +289,19 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; u64 user_xfeatures = 0; - int fx_only = 0; + bool fx_only = false; int ret = 0; if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; - if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { - /* - * Couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - state_size = sizeof(struct fxregs_state); - fx_only = 1; - trace_x86_fpu_xstate_check_failed(fpu); - } else { - state_size = fx_sw_user.xstate_size; - user_xfeatures = fx_sw_user.xfeatures; - } + + ret = check_xstate_in_sigframe(buf_fx, &fx_sw_user); + if (unlikely(ret)) + return ret; + + fx_only = !fx_sw_user.magic1; + state_size = fx_sw_user.xstate_size; + user_xfeatures = fx_sw_user.xfeatures; } if (!ia32_fxstate) { @@ -403,8 +401,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, if (ret) return ret; - sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, - fx_only); + sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures); fpregs_lock(); if (unlikely(init_bv)) @@ -422,8 +419,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, if (ret) return -EFAULT; - sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, - fx_only); + sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures); fpregs_lock(); if (use_xsave()) { -- Gitee From f568582e2732d4a5b605b894123e12dbd65b9f2a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:28 +0200 Subject: [PATCH 065/188] x86/fpu/signal: Sanitize copy_user_to_fpregs_zeroing() mainline inclusion from mainline-v5.14-rc1 commit cdcec1b77001e7f2cd10dccfc6d9b6d5d3f1f3ea category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit cdcec1b77001 x86/fpu/signal: Sanitize copy_user_to_fpregs_zeroing(). -------------------------------- Now that user_xfeatures is correctly set when xsave is enabled, remove the duplicated initialization of components. Rename the function while at it. Intel-SIG: commit cdcec1b77001 x86/fpu/signal: Sanitize copy_user_to_fpregs_zeroing(). Intel Advanced Matrix Extensions(AMX) is one of the key features on Sapphire Rapids(SPR) and it depends on xsave, this patch is part of Thomas Gleixner's x86 FPU code cleanup that upstream AMX implementation is building on, and is also key to all code for being developed in upstream that depends on xsave. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121457.377341297@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index d55241038871..a1a70134e1fe 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -251,33 +251,27 @@ sanitize_restored_user_xstate(union fpregs_state *state, } /* - * Restore the extended state if present. Otherwise, restore the FP/SSE state. + * Restore the FPU state directly from the userspace signal frame. */ -static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) +static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { - u64 init_bv; - int r; - if (use_xsave()) { - if (fx_only) { - init_bv = xfeatures_mask_uabi() & ~XFEATURE_MASK_FPSSE; + u64 init_bv = xfeatures_mask_uabi() & ~xrestore; + int ret; - r = fxrstor_from_user_sigframe(buf); - if (!r) - os_xrstor(&init_fpstate.xsave, init_bv); - return r; - } else { - init_bv = xfeatures_mask_uabi() & ~xbv; - - r = xrstor_from_user_sigframe(buf, xbv); - if (!r && unlikely(init_bv)) - os_xrstor(&init_fpstate.xsave, init_bv); - return r; - } + if (likely(!fx_only)) + ret = xrstor_from_user_sigframe(buf, xrestore); + else + ret = fxrstor_from_user_sigframe(buf); + + if (!ret && unlikely(init_bv)) + os_xrstor(&init_fpstate.xsave, init_bv); + return ret; } else if (use_fxsr()) { return fxrstor_from_user_sigframe(buf); - } else + } else { return frstor_from_user_sigframe(buf); + } } static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, @@ -314,7 +308,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, */ fpregs_lock(); pagefault_disable(); - ret = copy_user_to_fpregs_zeroing(buf_fx, user_xfeatures, fx_only); + ret = restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); pagefault_enable(); if (!ret) { -- Gitee From 34d072efece867ed12220693147490c87bf6e7ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:29 +0200 Subject: [PATCH 066/188] x86/fpu/signal: Split out the direct restore code mainline inclusion from mainline-v5.14-rc1 commit 0a6c2e9ec91c96bde1e8ce063180ac6e05e680f7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 0a6c2e9ec91c x86/fpu/signal: Split out the direct restore code. -------------------------------- Prepare for smarter failure handling of the direct restore. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121457.493455414@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 112 ++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a1a70134e1fe..aa268d9cf228 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -250,10 +250,8 @@ sanitize_restored_user_xstate(union fpregs_state *state, } } -/* - * Restore the FPU state directly from the userspace signal frame. - */ -static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) +static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, + bool fx_only) { if (use_xsave()) { u64 init_bv = xfeatures_mask_uabi() & ~xrestore; @@ -274,6 +272,57 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only } } +static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) +{ + struct fpu *fpu = ¤t->thread.fpu; + int ret; + + fpregs_lock(); + pagefault_disable(); + ret = __restore_fpregs_from_user(buf, xrestore, fx_only); + pagefault_enable(); + + if (unlikely(ret)) { + /* + * The above did an FPU restore operation, restricted to + * the user portion of the registers, and failed, but the + * microcode might have modified the FPU registers + * nevertheless. + * + * If the FPU registers do not belong to current, then + * invalidate the FPU register state otherwise the task + * might preempt current and return to user space with + * corrupted FPU registers. + * + * In case current owns the FPU registers then no further + * action is required. The fixup in the slow path will + * handle it correctly. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + __cpu_invalidate_fpregs_state(); + fpregs_unlock(); + return ret; + } + + /* + * Restore supervisor states: previous context switch etc has done + * XSAVES and saved the supervisor states in the kernel buffer from + * which they can be restored now. + * + * It would be optimal to handle this with a single XRSTORS, but + * this does not work because the rest of the FPU registers have + * been restored from a user buffer directly. The single XRSTORS + * happens below, when the user buffer has been copied to the + * kernel one. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor()) + os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); + + fpregs_mark_activate(); + fpregs_unlock(); + return 0; +} + static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, bool ia32_fxstate) { @@ -298,61 +347,16 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, user_xfeatures = fx_sw_user.xfeatures; } - if (!ia32_fxstate) { + if (likely(!ia32_fxstate)) { /* * Attempt to restore the FPU registers directly from user - * memory. For that to succeed, the user access cannot cause - * page faults. If it does, fall back to the slow path below, - * going through the kernel buffer with the enabled pagefault - * handler. + * memory. For that to succeed, the user access cannot cause page + * faults. If it does, fall back to the slow path below, going + * through the kernel buffer with the enabled pagefault handler. */ - fpregs_lock(); - pagefault_disable(); ret = restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); - pagefault_enable(); - if (!ret) { - - /* - * Restore supervisor states: previous context switch - * etc has done XSAVES and saved the supervisor states - * in the kernel buffer from which they can be restored - * now. - * - * We cannot do a single XRSTORS here - which would - * be nice - because the rest of the FPU registers are - * being restored from a user buffer directly. The - * single XRSTORS happens below, when the user buffer - * has been copied to the kernel one. - */ - if (test_thread_flag(TIF_NEED_FPU_LOAD) && - xfeatures_mask_supervisor()) { - os_xrstor(&fpu->state.xsave, - xfeatures_mask_supervisor()); - } - fpregs_mark_activate(); - fpregs_unlock(); + if (likely(!ret)) return 0; - } - - /* - * The above did an FPU restore operation, restricted to - * the user portion of the registers, and failed, but the - * microcode might have modified the FPU registers - * nevertheless. - * - * If the FPU registers do not belong to current, then - * invalidate the FPU register state otherwise the task might - * preempt current and return to user space with corrupted - * FPU registers. - * - * In case current owns the FPU registers then no further - * action is required. The fixup below will handle it - * correctly. - */ - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - __cpu_invalidate_fpregs_state(); - - fpregs_unlock(); } else { /* * For 32-bit frames with fxstate, copy the fxstate so it can -- Gitee From 0303856cdaf2c73182deabf1ab62a11ad57a3ab3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:31 +0200 Subject: [PATCH 067/188] x86/fpu/signal: Handle #PF in the direct restore path mainline inclusion from mainline-v5.14-rc1 commit fcb3635f5018e53024c6be3c3213737f469f74ff category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit fcb3635f5018 x86/fpu/signal: Handle #PF in the direct restore path. -------------------------------- If *RSTOR raises an exception, then the slow path is taken. That's wrong because if the reason was not #PF then going through the slow path is waste of time because that will end up with the same conclusion that the data is invalid. Now that the wrapper around *RSTOR return an negative error code, which is the negated trap number, it's possible to differentiate. If the *RSTOR raised #PF then handle it directly in the fast path and if it was some other exception, e.g. #GP, then give up and do not try the fast path. This removes the legacy frame FRSTOR code from the slow path because FRSTOR is not a ia32_fxstate frame and is therefore handled in the fast path. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121457.696022863@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 67 ++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index aa268d9cf228..4c252d0c8e6a 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -272,11 +272,17 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, } } -static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) +/* + * Attempt to restore the FPU registers directly from user memory. + * Pagefaults are handled and any errors returned are fatal. + */ +static int restore_fpregs_from_user(void __user *buf, u64 xrestore, + bool fx_only, unsigned int size) { struct fpu *fpu = ¤t->thread.fpu; int ret; +retry: fpregs_lock(); pagefault_disable(); ret = __restore_fpregs_from_user(buf, xrestore, fx_only); @@ -293,14 +299,18 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only * invalidate the FPU register state otherwise the task * might preempt current and return to user space with * corrupted FPU registers. - * - * In case current owns the FPU registers then no further - * action is required. The fixup in the slow path will - * handle it correctly. */ if (test_thread_flag(TIF_NEED_FPU_LOAD)) __cpu_invalidate_fpregs_state(); fpregs_unlock(); + + /* Try to handle #PF, but anything else is fatal. */ + if (ret != -EFAULT) + return -EINVAL; + + ret = fault_in_pages_readable(buf, size); + if (!ret) + goto retry; return ret; } @@ -311,9 +321,7 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only * * It would be optimal to handle this with a single XRSTORS, but * this does not work because the rest of the FPU registers have - * been restored from a user buffer directly. The single XRSTORS - * happens below, when the user buffer has been copied to the - * kernel one. + * been restored from a user buffer directly. */ if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor()) os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); @@ -326,14 +334,13 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, bool ia32_fxstate) { - struct user_i387_ia32_struct *envp = NULL; int state_size = fpu_kernel_xstate_size; struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; u64 user_xfeatures = 0; bool fx_only = false; - int ret = 0; + int ret; if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; @@ -354,20 +361,19 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, * faults. If it does, fall back to the slow path below, going * through the kernel buffer with the enabled pagefault handler. */ - ret = restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); - if (likely(!ret)) - return 0; - } else { - /* - * For 32-bit frames with fxstate, copy the fxstate so it can - * be reconstructed later. - */ - ret = __copy_from_user(&env, buf, sizeof(env)); - if (ret) - return ret; - envp = &env; + return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, + state_size); } + /* + * Copy the legacy state because the FP portion of the FX frame has + * to be ignored for histerical raisins. The legacy state is folded + * in once the larger state has been copied. + */ + ret = __copy_from_user(&env, buf, sizeof(env)); + if (ret) + return ret; + /* * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is * not modified on context switch and that the xstate is considered @@ -382,8 +388,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, * supervisor state is preserved. Save the full state for * simplicity. There is no point in optimizing this by only * saving the supervisor states and then shuffle them to - * the right place in memory. This is the slow path and the - * above XRSTOR failed or ia32_fxstate is true. Shrug. + * the right place in memory. It's ia32 mode. Shrug. */ if (xfeatures_mask_supervisor()) os_xsave(&fpu->state.xsave); @@ -399,7 +404,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, if (ret) return ret; - sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures); + sanitize_restored_user_xstate(&fpu->state, &env, user_xfeatures); fpregs_lock(); if (unlikely(init_bv)) @@ -412,12 +417,12 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, ret = os_xrstor_safe(&fpu->state.xsave, user_xfeatures | xfeatures_mask_supervisor()); - } else if (use_fxsr()) { + } else { ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); if (ret) return -EFAULT; - sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures); + sanitize_restored_user_xstate(&fpu->state, &env, user_xfeatures); fpregs_lock(); if (use_xsave()) { @@ -428,14 +433,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, } ret = fxrstor_safe(&fpu->state.fxsave); - } else { - ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); - if (ret) - return ret; - - fpregs_lock(); - ret = frstor_safe(&fpu->state.fsave); } + if (!ret) fpregs_mark_activate(); else -- Gitee From 7b6001a1fbf327ecfda40039c21c8ebe4819fe0d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Jun 2021 14:02:32 +0200 Subject: [PATCH 068/188] x86/fpu/signal: Let xrstor handle the features to init mainline inclusion from mainline-v5.14-rc1 commit 6f9866a166cd1ad3ebb2dcdb3874aa8fee8dea2f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 6f9866a166cd x86/fpu/signal: Let xrstor handle the features to init. -------------------------------- There is no reason to do an extra XRSTOR from init_fpstate for feature bits which have been cleared by user space in the FX magic xfeatures storage. Just clear them in the task's XSTATE header and do a full restore which will put these cleared features into init state. There is no real difference in performance because the current code already does a full restore when the xfeatures bits are preserved as the signal frame setup has stored them, which is the full UABI feature set. [ bp: Use the negated mxcsr_feature_mask in the MXCSR check. ] Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210623121457.804115017@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 89 +++++++++++++----------------------- 1 file changed, 31 insertions(+), 58 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 4c252d0c8e6a..445c57c9c539 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -220,36 +220,6 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) return 0; } -static inline void -sanitize_restored_user_xstate(union fpregs_state *state, - struct user_i387_ia32_struct *ia32_env, u64 mask) -{ - struct xregs_state *xsave = &state->xsave; - struct xstate_header *header = &xsave->header; - - if (use_xsave()) { - /* - * Clear all feature bits which are not set in mask. - * - * Supervisor state has to be preserved. The sigframe - * restore can only modify user features, i.e. @mask - * cannot contain them. - */ - header->xfeatures &= mask | xfeatures_mask_supervisor(); - } - - if (use_fxsr()) { - /* - * mscsr reserved bits must be masked to zero for security - * reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - - if (ia32_env) - convert_to_fxsr(&state->fxsave, ia32_env); - } -} - static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { @@ -352,6 +322,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, fx_only = !fx_sw_user.magic1; state_size = fx_sw_user.xstate_size; user_xfeatures = fx_sw_user.xfeatures; + } else { + user_xfeatures = XFEATURE_MASK_FPSSE; } if (likely(!ia32_fxstate)) { @@ -395,54 +367,55 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); + __cpu_invalidate_fpregs_state(); fpregs_unlock(); if (use_xsave() && !fx_only) { - u64 init_bv = xfeatures_mask_uabi() & ~user_xfeatures; - ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); if (ret) return ret; + } else { + if (__copy_from_user(&fpu->state.fxsave, buf_fx, + sizeof(fpu->state.fxsave))) + return -EFAULT; - sanitize_restored_user_xstate(&fpu->state, &env, user_xfeatures); + /* Reject invalid MXCSR values. */ + if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask) + return -EINVAL; - fpregs_lock(); - if (unlikely(init_bv)) - os_xrstor(&init_fpstate.xsave, init_bv); + /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ + if (use_xsave()) + fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; + } + + /* Fold the legacy FP storage */ + convert_to_fxsr(&fpu->state.fxsave, &env); + fpregs_lock(); + if (use_xsave()) { /* - * Restore previously saved supervisor xstates along with - * copied-in user xstates. + * Remove all UABI feature bits not set in user_xfeatures + * from the memory xstate header which makes the full + * restore below bring them into init state. This works for + * fx_only mode as well because that has only FP and SSE + * set in user_xfeatures. + * + * Preserve supervisor states! */ - ret = os_xrstor_safe(&fpu->state.xsave, - user_xfeatures | xfeatures_mask_supervisor()); + u64 mask = user_xfeatures | xfeatures_mask_supervisor(); + fpu->state.xsave.header.xfeatures &= mask; + ret = os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all); } else { - ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); - if (ret) - return -EFAULT; - - sanitize_restored_user_xstate(&fpu->state, &env, user_xfeatures); - - fpregs_lock(); - if (use_xsave()) { - u64 init_bv; - - init_bv = xfeatures_mask_uabi() & ~XFEATURE_MASK_FPSSE; - os_xrstor(&init_fpstate.xsave, init_bv); - } - ret = fxrstor_safe(&fpu->state.fxsave); } - if (!ret) + if (likely(!ret)) fpregs_mark_activate(); - else - fpregs_deactivate(fpu); + fpregs_unlock(); return ret; } - static inline int xstate_sigframe_size(void) { return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE : -- Gitee From f741716ebafcbad1067df0aef9786d8b480a6049 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Jun 2021 17:09:18 +0200 Subject: [PATCH 069/188] x86/fpu/xstate: Clear xstate header in copy_xstate_to_uabi_buf() again mainline inclusion from mainline-v5.14-rc1 commit 93c2cdc975aab53c222472c5b96c2d41dbeb350c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 93c2cdc975aa x86/fpu/xstate: Clear xstate header in copy_xstate_to_uabi_buf() again. -------------------------------- The change which made copy_xstate_to_uabi_buf() usable for [x]fpregs_get() removed the zeroing of the header which means the header, which is copied to user space later, contains except for the xfeatures member, random stack content. Add the memset() back to zero it before usage. Fixes: eb6f51723f03 ("x86/fpu: Make copy_xstate_to_kernel() usable for [x]fpregs_get()") Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/875yy3wb8h.ffs@nanos.tec.linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 121a111d8928..705a265325fe 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -982,6 +982,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, unsigned int zerofrom; int i; + memset(&header, 0, sizeof(header)); header.xfeatures = xsave->header.xfeatures; /* Mask out the feature bits depending on copy mode */ -- Gitee From 90145d163b735489d6d7244c0edbdfdac5416b78 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 6 Oct 2021 18:33:52 +0200 Subject: [PATCH 070/188] x86/fpu: Restore the masking out of reserved MXCSR bits mainline inclusion from mainline-v5.15-rc5 commit d298b03506d3e161f7492c440babb0bfae35e650 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit d298b03506d3 x86/fpu: Restore the masking out of reserved MXCSR bits. -------------------------------- Ser Olmy reported a boot failure: init[1] bad frame in sigreturn frame:(ptrval) ip:b7c9fbe6 sp:bf933310 orax:ffffffff \ in libc-2.33.so[b7bed000+156000] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b CPU: 0 PID: 1 Comm: init Tainted: G W 5.14.9 #1 Hardware name: Hewlett-Packard HP PC/HP Board, BIOS JD.00.06 12/06/2001 Call Trace: dump_stack_lvl dump_stack panic do_exit.cold do_group_exit get_signal arch_do_signal_or_restart ? force_sig_info_to_task ? force_sig exit_to_user_mode_prepare syscall_exit_to_user_mode do_int80_syscall_32 entry_INT80_32 on an old 32-bit Intel CPU: vendor_id : GenuineIntel cpu family : 6 model : 6 model name : Celeron (Mendocino) stepping : 5 microcode : 0x3 Ser bisected the problem to the commit in Fixes. tglx suggested reverting the rejection of invalid MXCSR values which this commit introduced and replacing it with what the old code did - simply masking them out to zero. Further debugging confirmed his suggestion: fpu->state.fxsave.mxcsr: 0xb7be13b4, mxcsr_feature_mask: 0xffbf WARNING: CPU: 0 PID: 1 at arch/x86/kernel/fpu/signal.c:384 __fpu_restore_sig+0x51f/0x540 so restore the original behavior only for 32-bit kernels where you have ancient machines with buggy hardware. For 32-bit programs on 64-bit kernels, user space which supplies wrong MXCSR values is considered malicious so fail the sigframe restoration there. Fixes: 6f9866a166cd ("x86/fpu/signal: Let xrstor handle the features to init") Reported-by: Ser Olmy Signed-off-by: Borislav Petkov Tested-by: Ser Olmy Cc: Link: https://lkml.kernel.org/r/YVtA67jImg3KlBTw@zn.tnic Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 445c57c9c539..fa17a27390ab 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -379,9 +379,14 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, sizeof(fpu->state.fxsave))) return -EFAULT; - /* Reject invalid MXCSR values. */ - if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask) - return -EINVAL; + if (IS_ENABLED(CONFIG_X86_64)) { + /* Reject invalid MXCSR values. */ + if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask) + return -EINVAL; + } else { + /* Mask invalid bits out for historical reasons (broken hardware). */ + fpu->state.fxsave.mxcsr &= ~mxcsr_feature_mask; + } /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ if (use_xsave()) -- Gitee From 4cb0f0d58820a60e71fe96ff46faba31951ba8a5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 15 Oct 2021 12:46:25 +0200 Subject: [PATCH 071/188] x86/fpu: Mask out the invalid MXCSR bits properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v5.15-rc6 commit b2381acd3fd9bacd2c63f53b2c610c89959b31cc category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit b2381acd3fd9 x86/fpu: Mask out the invalid MXCSR bits properly. -------------------------------- This is a fix for the fix (yeah, /facepalm). The correct mask to use is not the negation of the MXCSR_MASK but the actual mask which contains the supported bits in the MXCSR register. Reported and debugged by Ville Syrjälä Fixes: d298b03506d3 ("x86/fpu: Restore the masking out of reserved MXCSR bits") Signed-off-by: Borislav Petkov Tested-by: Ville Syrjälä Tested-by: Ser Olmy Cc: Link: https://lore.kernel.org/r/YWgYIYXLriayyezv@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index fa17a27390ab..831b25c5e705 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -385,7 +385,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, return -EINVAL; } else { /* Mask invalid bits out for historical reasons (broken hardware). */ - fpu->state.fxsave.mxcsr &= ~mxcsr_feature_mask; + fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; } /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ -- Gitee From 2ea4a9c3e9a1c3475c37fb255ffb8fbea027b4cb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:12 +0200 Subject: [PATCH 072/188] x86/extable: Tidy up redundant handler functions mainline inclusion from mainline-v5.16-rc1 commit 326b567f82df0c4c8f50092b9af9a3014616fb3c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 326b567f82df x86/extable: Tidy up redundant handler functions. -------------------------------- No need to have the same code all over the place. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132524.963232825@linutronix.de Signed-off-by: Lin Wang --- arch/x86/mm/extable.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index ad63a2d99949..6a0a5608d6aa 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -39,9 +39,8 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup, unsigned long error_code, unsigned long fault_addr) { - regs->ip = ex_fixup_addr(fixup); regs->ax = trapnr; - return true; + return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL_GPL(ex_handler_fault); @@ -76,8 +75,7 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, unsigned long fault_addr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); - regs->ip = ex_fixup_addr(fixup); - return true; + return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_uaccess); @@ -87,9 +85,7 @@ __visible bool ex_handler_copy(const struct exception_table_entry *fixup, unsigned long fault_addr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); - regs->ip = ex_fixup_addr(fixup); - regs->ax = trapnr; - return true; + return ex_handler_fault(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_copy); @@ -103,10 +99,9 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup show_stack_regs(regs); /* Pretend that the read succeeded and returned 0. */ - regs->ip = ex_fixup_addr(fixup); regs->ax = 0; regs->dx = 0; - return true; + return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); @@ -121,8 +116,7 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup show_stack_regs(regs); /* Pretend that the write succeeded. */ - regs->ip = ex_fixup_addr(fixup); - return true; + return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); -- Gitee From 2fea76adc252bd747bdb4bf4bc4ea02dfc698cd4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:13 +0200 Subject: [PATCH 073/188] x86/extable: Get rid of redundant macros mainline inclusion from mainline-v5.16-rc1 commit 32fd8b59f91fcd3bf9459aa72d90345735cc2588 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 32fd8b59f91f x86/extable: Get rid of redundant macros. -------------------------------- No point in defining the identical macros twice depending on C or assembly mode. They are still identical. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.023659534@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/asm.h | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 0603c7423aca..7dadcf9c3980 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -129,18 +129,6 @@ .long (handler) - . ; \ .popsection -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) - -# define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) - -# define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) - -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) - # ifdef CONFIG_KPROBES # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ @@ -161,18 +149,6 @@ " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ " .popsection\n" -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) - -# define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) - -# define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) - -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) - /* For C file, we already have NOKPROBE_SYMBOL macro */ /* @@ -185,4 +161,16 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif /* __ASSEMBLY__ */ +#define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + +#define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) + +#define _ASM_EXTABLE_CPY(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) + +#define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + #endif /* _ASM_X86_ASM_H */ -- Gitee From 5781990e397e7a623af57d39201701282bbf7a26 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:15 +0200 Subject: [PATCH 074/188] x86/mce: Deduplicate exception handling mainline inclusion from mainline-v5.16-rc1 commit e42404afc4ca856c48f1e05752541faa3587c472 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit e42404afc4ca x86/mce: Deduplicate exception handling. -------------------------------- Prepare code for further simplification. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.096452100@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/cpu/mce/core.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index ed49a4abd20e..67d33c3aa37a 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -382,13 +382,16 @@ static int msr_to_offset(u32 msr) return -1; } -__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { - pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, regs->ip, (void *)regs->ip); + if (wrmsr) { + pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, + regs->ip, (void *)regs->ip); + } else { + pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, regs->ip, (void *)regs->ip); + } show_stack_regs(regs); @@ -396,7 +399,14 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, while (true) cpu_relax(); +} +__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr) +{ + ex_handler_msr_mce(regs, false); return true; } @@ -441,17 +451,7 @@ __visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, unsigned long error_code, unsigned long fault_addr) { - pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, - regs->ip, (void *)regs->ip); - - show_stack_regs(regs); - - panic("MCA architectural violation!\n"); - - while (true) - cpu_relax(); - + ex_handler_msr_mce(regs, true); return true; } -- Gitee From 0f56096091175a7820cc6cb420fd25e2ae99ba16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:16 +0200 Subject: [PATCH 075/188] x86/mce: Get rid of stray semicolons mainline inclusion from mainline-v5.16-rc1 commit 083b32d6f4fa26abaf585721abeee73c92ea5376 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 083b32d6f4fa x86/mce: Get rid of stray semicolons. -------------------------------- and the random number of tabs. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.154428878@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/cpu/mce/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 88dcc79cfb07..95099225defc 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -61,7 +61,7 @@ static inline void cmci_disable_bank(int bank) { } static inline void intel_init_cmci(void) { } static inline void intel_init_lmce(void) { } static inline void intel_clear_lmce(void) { } -static inline bool intel_filter_mce(struct mce *m) { return false; }; +static inline bool intel_filter_mce(struct mce *m) { return false; } #endif void mce_timer_kick(unsigned long interval); @@ -183,7 +183,7 @@ extern bool filter_mce(struct mce *m); #ifdef CONFIG_X86_MCE_AMD extern bool amd_filter_mce(struct mce *m); #else -static inline bool amd_filter_mce(struct mce *m) { return false; }; +static inline bool amd_filter_mce(struct mce *m) { return false; } #endif __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, -- Gitee From ac549590f5c6221cad0241e54df50510373a3092 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:18 +0200 Subject: [PATCH 076/188] x86/extable: Rework the exception table mechanics mainline inclusion from mainline-v5.16-rc1 commit 46d28947d9876fc0f8f93d3c69813ef6e9852595 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 46d28947d987 x86/extable: Rework the exception table mechanics. -------------------------------- The exception table entries contain the instruction address, the fixup address and the handler address. All addresses are relative. Storing the handler address has a few downsides: 1) Most handlers need to be exported 2) Handlers can be defined everywhere and there is no overview about the handler types 3) MCE needs to check the handler type to decide whether an in kernel #MC can be recovered. The functionality of the handler itself is not in any way special, but for these checks there need to be separate functions which in the worst case have to be exported. Some of these 'recoverable' exception fixups are pretty obscure and just reuse some other handler to spare code. That obfuscates e.g. the #MC safe copy functions. Cleaning that up would require more handlers and exports Rework the exception fixup mechanics by storing a fixup type number instead of the handler address and invoke the proper handler for each fixup type. Also teach the extable sort to leave the type field alone. This makes most handlers static except for special cases like the MCE MSR fixup and the BPF fixup. This allows to add more types for cleaning up the obscure places without adding more handler code and exports. There is a marginal code size reduction for a production config and it removes _eight_ exported symbols. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Alexei Starovoitov Link: https://lkml.kernel.org/r/20210908132525.211958725@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/asm.h | 29 ++--- arch/x86/include/asm/extable.h | 44 +++++--- arch/x86/include/asm/extable_fixup_types.h | 19 ++++ arch/x86/include/asm/fpu/internal.h | 4 +- arch/x86/include/asm/msr.h | 4 +- arch/x86/include/asm/segment.h | 2 +- arch/x86/kernel/cpu/mce/core.c | 24 +--- arch/x86/kernel/cpu/mce/internal.h | 10 -- arch/x86/kernel/cpu/mce/severity.c | 21 ++-- arch/x86/mm/extable.c | 123 +++++++++------------ arch/x86/net/bpf_jit_comp.c | 11 +- scripts/sorttable.c | 4 +- 12 files changed, 137 insertions(+), 158 deletions(-) create mode 100644 arch/x86/include/asm/extable_fixup_types.h diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7dadcf9c3980..a521b5dd8052 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -119,14 +119,17 @@ # define CC_OUT(c) [_cc_ ## c] "=qm" #endif +# include + /* Exception table entry */ #ifdef __ASSEMBLY__ -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + +# define _ASM_EXTABLE_TYPE(from, to, type) \ .pushsection "__ex_table","a" ; \ .balign 4 ; \ .long (from) - . ; \ .long (to) - . ; \ - .long (handler) - . ; \ + .long type ; \ .popsection # ifdef CONFIG_KPROBES @@ -140,13 +143,13 @@ # endif #else /* ! __ASSEMBLY__ */ -# define _EXPAND_EXTABLE_HANDLE(x) #x -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + +# define _ASM_EXTABLE_TYPE(from, to, type) \ " .pushsection \"__ex_table\",\"a\"\n" \ " .balign 4\n" \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ - " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ + " .long " __stringify(type) " \n" \ " .popsection\n" /* For C file, we already have NOKPROBE_SYMBOL macro */ @@ -161,16 +164,16 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif /* __ASSEMBLY__ */ -#define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) +#define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT) -#define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) +#define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS) -#define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) +#define _ASM_EXTABLE_CPY(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY) -#define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) +#define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT) #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index 1f0cbc52937c..93f400eb728f 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -1,12 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_EXTABLE_H #define _ASM_X86_EXTABLE_H + +#include + /* - * The exception table consists of triples of addresses relative to the - * exception table entry itself. The first address is of an instruction - * that is allowed to fault, the second is the target at which the program - * should continue. The third is a handler function to deal with the fault - * caused by the instruction in the first field. + * The exception table consists of two addresses relative to the + * exception table entry itself and a type selector field. + * + * The first address is of an instruction that is allowed to fault, the + * second is the target at which the program should continue. + * + * The type entry is used by fixup_exception() to select the handler to + * deal with the fault caused by the instruction in the first field. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -15,7 +21,7 @@ */ struct exception_table_entry { - int insn, fixup, handler; + int insn, fixup, type; }; struct pt_regs; @@ -25,21 +31,27 @@ struct pt_regs; do { \ (a)->fixup = (b)->fixup + (delta); \ (b)->fixup = (tmp).fixup - (delta); \ - (a)->handler = (b)->handler + (delta); \ - (b)->handler = (tmp).handler - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ } while (0) -enum handler_type { - EX_HANDLER_NONE, - EX_HANDLER_FAULT, - EX_HANDLER_UACCESS, - EX_HANDLER_OTHER -}; - extern int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr); extern int fixup_bug(struct pt_regs *regs, int trapnr); -extern enum handler_type ex_get_fault_handler_type(unsigned long ip); +extern int ex_get_fixup_type(unsigned long ip); extern void early_fixup_exception(struct pt_regs *regs, int trapnr); +#ifdef CONFIG_X86_MCE +extern void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr); +#else +static inline void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { } +#endif + +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs); +#else +static inline bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs) { return false; } +#endif + #endif diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h new file mode 100644 index 000000000000..0adc117618e6 --- /dev/null +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H +#define _ASM_X86_EXTABLE_FIXUP_TYPES_H + +#define EX_TYPE_NONE 0 +#define EX_TYPE_DEFAULT 1 +#define EX_TYPE_FAULT 2 +#define EX_TYPE_UACCESS 3 +#define EX_TYPE_COPY 4 +#define EX_TYPE_CLEAR_FS 5 +#define EX_TYPE_FPU_RESTORE 6 +#define EX_TYPE_WRMSR 7 +#define EX_TYPE_RDMSR 8 +#define EX_TYPE_BPF 9 + +#define EX_TYPE_WRMSR_IN_MCE 10 +#define EX_TYPE_RDMSR_IN_MCE 11 + +#endif diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5a18694a89b2..ce6fc4f8d1d1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -126,7 +126,7 @@ extern void save_fpregs_to_fpstate(struct fpu *fpu); #define kernel_insn(insn, output, input...) \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE) \ : output : input) static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) @@ -253,7 +253,7 @@ static inline void fxsave(struct fxregs_state *fx) XRSTORS, X86_FEATURE_XSAVES) \ "\n" \ "3:\n" \ - _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ + _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \ : \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index e16cccdd0420..55495778cb58 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -92,7 +92,7 @@ static __always_inline unsigned long long __rdmsr(unsigned int msr) asm volatile("1: rdmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR) : EAX_EDX_RET(val, low, high) : "c" (msr)); return EAX_EDX_VAL(val, low, high); @@ -102,7 +102,7 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) { asm volatile("1: wrmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) : : "c" (msr), "a"(low), "d" (high) : "memory"); } diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 7fdd4facfce7..4be36ead91a2 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -346,7 +346,7 @@ static inline void __loadsegment_fs(unsigned short value) "1: movw %0, %%fs \n" "2: \n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS) : : "rm" (value) : "memory"); } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 67d33c3aa37a..00ee1756ec92 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -382,7 +382,7 @@ static int msr_to_offset(u32 msr) return -1; } -static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) +void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { if (wrmsr) { pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", @@ -401,15 +401,6 @@ static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) cpu_relax(); } -__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) -{ - ex_handler_msr_mce(regs, false); - return true; -} - /* MSR access wrappers used for error injection */ static noinstr u64 mce_rdmsrl(u32 msr) { @@ -439,22 +430,13 @@ static noinstr u64 mce_rdmsrl(u32 msr) */ asm volatile("1: rdmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR_IN_MCE) : EAX_EDX_RET(val, low, high) : "c" (msr)); return EAX_EDX_VAL(val, low, high); } -__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) -{ - ex_handler_msr_mce(regs, true); - return true; -} - static noinstr void mce_wrmsrl(u32 msr, u64 v) { u32 low, high; @@ -479,7 +461,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v) /* See comment in mce_rdmsrl() */ asm volatile("1: wrmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE) : : "c" (msr), "a"(low), "d" (high) : "memory"); } diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 95099225defc..3463f8cedb32 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -186,14 +186,4 @@ extern bool amd_filter_mce(struct mce *m); static inline bool amd_filter_mce(struct mce *m) { return false; } #endif -__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr); - -__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr); - #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 83df991314c5..38b747dfd263 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -269,25 +269,24 @@ static bool is_copy_from_user(struct pt_regs *regs) */ static int error_context(struct mce *m, struct pt_regs *regs) { - enum handler_type t; - if ((m->cs & 3) == 3) return IN_USER; if (!mc_recoverable(m->mcgstatus)) return IN_KERNEL; - t = ex_get_fault_handler_type(m->ip); - if (t == EX_HANDLER_FAULT) { - m->kflags |= MCE_IN_KERNEL_RECOV; - return IN_KERNEL_RECOV; - } - if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) { - m->kflags |= MCE_IN_KERNEL_RECOV; + switch (ex_get_fixup_type(m->ip)) { + case EX_TYPE_UACCESS: + case EX_TYPE_COPY: + if (!regs || !is_copy_from_user(regs)) + return IN_KERNEL; m->kflags |= MCE_IN_KERNEL_COPYIN; + fallthrough; + case EX_TYPE_FAULT: + m->kflags |= MCE_IN_KERNEL_RECOV; return IN_KERNEL_RECOV; + default: + return IN_KERNEL; } - - return IN_KERNEL; } static int mce_severity_amd_smca(struct mce *m, enum context err_ctx) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 6a0a5608d6aa..ed8322bfd3e7 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -9,40 +9,25 @@ #include #include -typedef bool (*ex_handler_t)(const struct exception_table_entry *, - struct pt_regs *, int, unsigned long, - unsigned long); - static inline unsigned long ex_fixup_addr(const struct exception_table_entry *x) { return (unsigned long)&x->fixup + x->fixup; } -static inline ex_handler_t -ex_fixup_handler(const struct exception_table_entry *x) -{ - return (ex_handler_t)((unsigned long)&x->handler + x->handler); -} -__visible bool ex_handler_default(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_default(const struct exception_table_entry *fixup, + struct pt_regs *regs) { regs->ip = ex_fixup_addr(fixup); return true; } -EXPORT_SYMBOL(ex_handler_default); -__visible bool ex_handler_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { regs->ax = trapnr; - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL_GPL(ex_handler_fault); /* * Handler for when we fail to restore a task's FPU state. We should never get @@ -54,10 +39,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault); * of vulnerability by restoring from the initial state (essentially, zeroing * out all the FPU registers) if we can't restore from the task's FPU state. */ -__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_fprestore(const struct exception_table_entry *fixup, + struct pt_regs *regs) { regs->ip = ex_fixup_addr(fixup); @@ -67,32 +50,23 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, __restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); return true; } -EXPORT_SYMBOL_GPL(ex_handler_fprestore); -__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_uaccess(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL(ex_handler_uaccess); -__visible bool ex_handler_copy(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_copy(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); - return ex_handler_fault(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_fault(fixup, regs, trapnr); } -EXPORT_SYMBOL(ex_handler_copy); -__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs) { if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) @@ -101,14 +75,11 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup /* Pretend that the read succeeded and returned 0. */ regs->ax = 0; regs->dx = 0; - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); -__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs) { if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, @@ -116,44 +87,29 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup show_stack_regs(regs); /* Pretend that the write succeeded. */ - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); -__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_clear_fs(const struct exception_table_entry *fixup, + struct pt_regs *regs) { if (static_cpu_has(X86_BUG_NULL_SEG)) asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); asm volatile ("mov %0, %%fs" : : "rm" (0)); - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL(ex_handler_clear_fs); -enum handler_type ex_get_fault_handler_type(unsigned long ip) +int ex_get_fixup_type(unsigned long ip) { - const struct exception_table_entry *e; - ex_handler_t handler; + const struct exception_table_entry *e = search_exception_tables(ip); - e = search_exception_tables(ip); - if (!e) - return EX_HANDLER_NONE; - handler = ex_fixup_handler(e); - if (handler == ex_handler_fault) - return EX_HANDLER_FAULT; - else if (handler == ex_handler_uaccess || handler == ex_handler_copy) - return EX_HANDLER_UACCESS; - else - return EX_HANDLER_OTHER; + return e ? e->type : EX_TYPE_NONE; } int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { const struct exception_table_entry *e; - ex_handler_t handler; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { @@ -173,8 +129,33 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, if (!e) return 0; - handler = ex_fixup_handler(e); - return handler(e, regs, trapnr, error_code, fault_addr); + switch (e->type) { + case EX_TYPE_DEFAULT: + return ex_handler_default(e, regs); + case EX_TYPE_FAULT: + return ex_handler_fault(e, regs, trapnr); + case EX_TYPE_UACCESS: + return ex_handler_uaccess(e, regs, trapnr); + case EX_TYPE_COPY: + return ex_handler_copy(e, regs, trapnr); + case EX_TYPE_CLEAR_FS: + return ex_handler_clear_fs(e, regs); + case EX_TYPE_FPU_RESTORE: + return ex_handler_fprestore(e, regs); + case EX_TYPE_RDMSR: + return ex_handler_rdmsr_unsafe(e, regs); + case EX_TYPE_WRMSR: + return ex_handler_wrmsr_unsafe(e, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(e, regs); + case EX_TYPE_RDMSR_IN_MCE: + ex_handler_msr_mce(regs, false); + break; + case EX_TYPE_WRMSR_IN_MCE: + ex_handler_msr_mce(regs, true); + break; + } + BUG(); } extern unsigned int early_recursion_flag; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a0a7ead52698..62ed3710fc53 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -761,9 +761,7 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) *pprog = prog; } -static bool ex_handler_bpf(const struct exception_table_entry *x, - struct pt_regs *regs, int trapnr, - unsigned long error_code, unsigned long fault_addr) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { u32 reg = x->fixup >> 8; @@ -1217,12 +1215,7 @@ st: if (is_imm8(insn->off)) } ex->insn = delta; - delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; - if (!is_simm32(delta)) { - pr_err("extable->handler doesn't fit into 32-bit\n"); - return -EFAULT; - } - ex->handler = delta; + ex->type = EX_TYPE_BPF; if (dst_reg > BPF_REG_9) { pr_err("verifier error\n"); diff --git a/scripts/sorttable.c b/scripts/sorttable.c index ac93e033b7cb..dc913d0c6788 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -236,7 +236,7 @@ static void x86_sort_relative_table(char *extab_image, int image_size) w(r(loc) + i, loc); w(r(loc + 1) + i + 4, loc + 1); - w(r(loc + 2) + i + 8, loc + 2); + /* Don't touch the fixup type */ i += sizeof(uint32_t) * 3; } @@ -249,7 +249,7 @@ static void x86_sort_relative_table(char *extab_image, int image_size) w(r(loc) - i, loc); w(r(loc + 1) - (i + 4), loc + 1); - w(r(loc + 2) - (i + 8), loc + 2); + /* Don't touch the fixup type */ i += sizeof(uint32_t) * 3; } -- Gitee From d7133166d4d78437f003ea04af98d1bce4cdc459 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:19 +0200 Subject: [PATCH 077/188] x86/extable: Provide EX_TYPE_DEFAULT_MCE_SAFE and EX_TYPE_FAULT_MCE_SAFE mainline inclusion from mainline-v5.16-rc1 commit 2cadf5248b9316d3c8af876e795d61c55476f6e9 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2cadf5248b93 x86/extable: Provide EX_TYPE_DEFAULT_MCE_SAFE and EX_TYPE_FAULT_MCE_SAFE. -------------------------------- Provide exception fixup types which can be used to identify fixups which allow in kernel #MC recovery and make them invoke the existing handlers. These will be used at places where #MC recovery is handled correctly by the caller. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.269689153@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/extable_fixup_types.h | 3 +++ arch/x86/kernel/cpu/mce/severity.c | 2 ++ arch/x86/mm/extable.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 0adc117618e6..409524d5d2eb 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -16,4 +16,7 @@ #define EX_TYPE_WRMSR_IN_MCE 10 #define EX_TYPE_RDMSR_IN_MCE 11 +#define EX_TYPE_DEFAULT_MCE_SAFE 12 +#define EX_TYPE_FAULT_MCE_SAFE 13 + #endif diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 38b747dfd263..af893622d94a 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -282,6 +282,8 @@ static int error_context(struct mce *m, struct pt_regs *regs) m->kflags |= MCE_IN_KERNEL_COPYIN; fallthrough; case EX_TYPE_FAULT: + case EX_TYPE_FAULT_MCE_SAFE: + case EX_TYPE_DEFAULT_MCE_SAFE: m->kflags |= MCE_IN_KERNEL_RECOV; return IN_KERNEL_RECOV; default: diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index ed8322bfd3e7..33ba927eaafc 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -131,8 +131,10 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, switch (e->type) { case EX_TYPE_DEFAULT: + case EX_TYPE_DEFAULT_MCE_SAFE: return ex_handler_default(e, regs); case EX_TYPE_FAULT: + case EX_TYPE_FAULT_MCE_SAFE: return ex_handler_fault(e, regs, trapnr); case EX_TYPE_UACCESS: return ex_handler_uaccess(e, regs, trapnr); -- Gitee From ecc5c148a30ce5dd313ce388b881bf02c6247a3e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:21 +0200 Subject: [PATCH 078/188] x86/copy_mc: Use EX_TYPE_DEFAULT_MCE_SAFE for exception fixups mainline inclusion from mainline-v5.16-rc1 commit c1c97d175493ab32325df81133611ce8e4e05088 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit c1c97d175493 x86/copy_mc: Use EX_TYPE_DEFAULT_MCE_SAFE for exception fixups. -------------------------------- Nothing in that code uses the trap number which was stored by the exception fixup which is instantiated via _ASM_EXTABLE_FAULT(). Use _ASM_EXTABLE(... EX_TYPE_DEFAULT_MCE_SAFE) instead which just handles the IP fixup and the type indicates to the #MC handler that the call site can handle the abort caused by #MC correctly. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.328706042@linutronix.de Signed-off-by: Lin Wang --- arch/x86/lib/copy_mc_64.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S index 892d8915f609..4c8398182a1f 100644 --- a/arch/x86/lib/copy_mc_64.S +++ b/arch/x86/lib/copy_mc_64.S @@ -117,9 +117,9 @@ EXPORT_SYMBOL_GPL(copy_mc_fragile) .previous - _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) - _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) - _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT_MCE_SAFE) + _ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT_MCE_SAFE) + _ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT_MCE_SAFE) _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) _ASM_EXTABLE(.L_write_words, .E_write_words) _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) @@ -159,5 +159,5 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string) .previous - _ASM_EXTABLE_FAULT(.L_copy, .E_copy) + _ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT_MCE_SAFE) #endif /* !CONFIG_UML */ -- Gitee From f2ef27d4065452bb40b2c7f7918aee75295e76a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:23 +0200 Subject: [PATCH 079/188] x86/fpu: Use EX_TYPE_FAULT_MCE_SAFE for exception fixups mainline inclusion from mainline-v5.16-rc1 commit c6304556f3ae98c943bbb4042a30205c98e4f921 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit c6304556f3ae x86/fpu: Use EX_TYPE_FAULT_MCE_SAFE for exception fixups. -------------------------------- The macros used for restoring FPU state from a user space buffer can handle all exceptions including #MC. They need to return the trap number in the error case as the code which invokes them needs to distinguish the cause of the failure. It aborts the operation for anything except #PF. Use the new EX_TYPE_FAULT_MCE_SAFE exception table fixup type to document the nature of the fixup. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.387464538@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index ce6fc4f8d1d1..cb1ca602e848 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -102,7 +102,7 @@ extern void save_fpregs_to_fpstate(struct fpu *fpu); "3: negl %%eax\n" \ " jmp 2b\n" \ ".previous\n" \ - _ASM_EXTABLE_FAULT(1b, 3b) \ + _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FAULT_MCE_SAFE) \ : [err] "=a" (err), output \ : "0"(0), input); \ err; \ @@ -209,7 +209,7 @@ static inline void fxsave(struct fxregs_state *fx) "3: negl %%eax\n\t" \ "jmp 2b\n\t" \ ".popsection\n\t" \ - _ASM_EXTABLE_FAULT(1b, 3b) \ + _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FAULT_MCE_SAFE) \ : [err] "=a" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") -- Gitee From 384c2c3dd690dbe35597fa40143e180cf585016e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:24 +0200 Subject: [PATCH 080/188] x86/extable: Remove EX_TYPE_FAULT from MCE safe fixups mainline inclusion from mainline-v5.16-rc1 commit 0c2e62ba04cd0b7194b380bae4fc35c45bb2e46e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 0c2e62ba04cd x86/extable: Remove EX_TYPE_FAULT from MCE safe fixups. -------------------------------- Now that the MC safe copy and FPU have been converted to use the MCE safe fixup types remove EX_TYPE_FAULT from the list of types which MCE considers to be safe to be recovered in kernel. This removes the SGX exception handling of ENCLS from the #MC safe handling, but according to the SGX wizards the current SGX implementations cannot survive #MC on ENCLS: https://lore.kernel.org/r/YS+upEmTfpZub3s9@google.com The code relies on the trap number being stored if ENCLS raised an exception. That's still working, but it does no longer trick the MCE code into assuming that #MC is handled correctly for ENCLS. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.445255957@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/cpu/mce/severity.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index af893622d94a..ef78d2d21bc4 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -281,7 +281,6 @@ static int error_context(struct mce *m, struct pt_regs *regs) return IN_KERNEL; m->kflags |= MCE_IN_KERNEL_COPYIN; fallthrough; - case EX_TYPE_FAULT: case EX_TYPE_FAULT_MCE_SAFE: case EX_TYPE_DEFAULT_MCE_SAFE: m->kflags |= MCE_IN_KERNEL_RECOV; -- Gitee From 62bb427e80a461eb4b882c03f2be21a4be98bf87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:26 +0200 Subject: [PATCH 081/188] x86/fpu/signal: Clarify exception handling in restore_fpregs_from_user() mainline inclusion from mainline-v5.16-rc1 commit 4339d0c63c2d5bea1fe6de4091ee2fe9eeea09a7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 4339d0c63c2d x86/fpu/signal: Clarify exception handling in restore_fpregs_from_user(). -------------------------------- FPU restore from a signal frame can trigger various exceptions. The exceptions are caught with an exception table entry. The handler of this entry stores the trap number in EAX. The FPU specific fixup negates that trap number to convert it into an negative error code. Any other exception than #PF is fatal and recovery is not possible. This relies on the fact that the #PF exception number is the same as EFAULT, but that's not really obvious. Remove the negation from the exception fixup as it really has no value and check for X86_TRAP_PF at the call site. There is still confusion due to the return code conversion for the error case which will be cleaned up separately. Suggested-by: Al Viro Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.506192488@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 21 ++++++++------------- arch/x86/kernel/fpu/signal.c | 5 +++-- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index cb1ca602e848..4cfd40dc3cb5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -88,7 +88,10 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif extern void save_fpregs_to_fpstate(struct fpu *fpu); -/* Returns 0 or the negated trap number, which results in -EFAULT for #PF */ +/* + * Returns 0 on success or the trap number when the operation raises an + * exception. + */ #define user_insn(insn, output, input...) \ ({ \ int err; \ @@ -98,11 +101,7 @@ extern void save_fpregs_to_fpstate(struct fpu *fpu); asm volatile(ASM_STAC "\n" \ "1: " #insn "\n" \ "2: " ASM_CLAC "\n" \ - ".section .fixup,\"ax\"\n" \ - "3: negl %%eax\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FAULT_MCE_SAFE) \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ : [err] "=a" (err), output \ : "0"(0), input); \ err; \ @@ -198,18 +197,14 @@ static inline void fxsave(struct fxregs_state *fx) #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" /* - * After this @err contains 0 on success or the negated trap number when - * the operation raises an exception. For faults this results in -EFAULT. + * After this @err contains 0 on success or the trap number when the + * operation raises an exception. */ #define XSTATE_OP(op, st, lmask, hmask, err) \ asm volatile("1:" op "\n\t" \ "xor %[err], %[err]\n" \ "2:\n\t" \ - ".pushsection .fixup,\"ax\"\n\t" \ - "3: negl %%eax\n\t" \ - "jmp 2b\n\t" \ - ".popsection\n\t" \ - _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FAULT_MCE_SAFE) \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ : [err] "=a" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 831b25c5e705..cd96f5ce0ca7 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -13,6 +13,7 @@ #include #include +#include #include static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init; @@ -275,7 +276,7 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, fpregs_unlock(); /* Try to handle #PF, but anything else is fatal. */ - if (ret != -EFAULT) + if (ret != X86_TRAP_PF) return -EINVAL; ret = fault_in_pages_readable(buf, size); @@ -410,7 +411,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, u64 mask = user_xfeatures | xfeatures_mask_supervisor(); fpu->state.xsave.header.xfeatures &= mask; - ret = os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all); + ret = os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all) ? -EINVAL : 0; } else { ret = fxrstor_safe(&fpu->state.fxsave); } -- Gitee From 8c78641a0f857090016714d9978d7985e3d5ad05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:29 +0200 Subject: [PATCH 082/188] x86/fpu/signal: Move header zeroing out of xsave_to_user_sigframe() mainline inclusion from mainline-v5.16-rc1 commit 4164a482a5d92c29eaf53d01755103f6bbce38f2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 4164a482a5d9 x86/fpu/signal: Move header zeroing out of xsave_to_user_sigframe(). -------------------------------- There is no reason to have the header zeroing in the pagefault disabled region. Do it upfront once. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.621674721@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 17 ++++++----------- arch/x86/kernel/fpu/signal.c | 12 ++++++++++++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4cfd40dc3cb5..c856ca481546 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -318,9 +318,12 @@ static inline void os_xrstor(struct xregs_state *xstate, u64 mask) * We don't use modified optimization because xrstor/xrstors might track * a different application. * - * We don't use compacted format xsave area for - * backward compatibility for old applications which don't understand - * compacted format of xsave area. + * We don't use compacted format xsave area for backward compatibility for + * old applications which don't understand the compacted format of the + * xsave area. + * + * The caller has to zero buf::header before calling this because XSAVE* + * does not touch the reserved fields in the header. */ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) { @@ -334,14 +337,6 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) u32 hmask = mask >> 32; int err; - /* - * Clear the xsave header first, so that reserved fields are - * initialized to zero. - */ - err = __clear_user(&buf->header, sizeof(buf->header)); - if (unlikely(err)) - return -EFAULT; - stac(); XSTATE_OP(XSAVE, buf, lmask, hmask, err); clac(); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index cd96f5ce0ca7..91c1e68e5fca 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -189,6 +189,18 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if (!access_ok(buf, size)) return -EACCES; + + if (use_xsave()) { + struct xregs_state __user *xbuf = buf_fx; + + /* + * Clear the xsave header first, so that reserved fields are + * initialized to zero. + */ + ret = __clear_user(&xbuf->header, sizeof(xbuf->header)); + if (unlikely(ret)) + return ret; + } retry: /* * Load the FPU registers if they are not valid for the current task. -- Gitee From 8638c2199fc48d8d59a8fe2e3024285965253850 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:30 +0200 Subject: [PATCH 083/188] x86/fpu/signal: Move xstate clearing out of copy_fpregs_to_sigframe() mainline inclusion from mainline-v5.16-rc1 commit fcfb7163329ce832aafef31f26345ef5e8642a17 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit fcfb7163329c x86/fpu/signal: Move xstate clearing out of copy_fpregs_to_sigframe(). -------------------------------- When the direct saving of the FPU registers to the user space sigframe fails, copy_fpregs_to_sigframe() attempts to clear the user buffer. The most likely reason for such a fail is a page fault. As copy_fpregs_to_sigframe() is invoked with pagefaults disabled the chance that __clear_user() succeeds is minuscule. Move the clearing out into the caller which replaces the fault_in_pages_writeable() in that error handling path. The return value confusion will be cleaned up separately. Suggested-by: Al Viro Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.679356300@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 91c1e68e5fca..d5d610ab7739 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -136,18 +136,12 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) { - int err; - if (use_xsave()) - err = xsave_to_user_sigframe(buf); - else if (use_fxsr()) - err = fxsave_to_user_sigframe((struct fxregs_state __user *) buf); + return xsave_to_user_sigframe(buf); + if (use_fxsr()) + return fxsave_to_user_sigframe((struct fxregs_state __user *) buf); else - err = fnsave_to_user_sigframe((struct fregs_state __user *) buf); - - if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size)) - err = -EFAULT; - return err; + return fnsave_to_user_sigframe((struct fregs_state __user *) buf); } /* @@ -218,9 +212,9 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); if (ret) { - if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size)) + if (!__clear_user(buf_fx, fpu_user_xstate_size)) goto retry; - return -EFAULT; + return -1; } /* Save the fsave header for the 32-bit frames. */ -- Gitee From 19a15734918109db0a6084c3a61d2cfa3869f65b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:32 +0200 Subject: [PATCH 084/188] x86/fpu/signal: Change return type of copy_fpstate_to_sigframe() to boolean mainline inclusion from mainline-v5.16-rc1 commit 052adee668284b67105375c0a524f16a423f1424 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 052adee66828 x86/fpu/signal: Change return type of copy_fpstate_to_sigframe() to boolean. -------------------------------- None of the call sites cares about the actual return code. Change the return type to boolean and return 'true' on success. Suggested-by: Al Viro Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.736773588@linutronix.de Signed-off-by: Lin Wang --- arch/x86/ia32/ia32_signal.c | 4 ++-- arch/x86/include/asm/fpu/internal.h | 2 +- arch/x86/kernel/fpu/signal.c | 20 ++++++++++---------- arch/x86/kernel/signal.c | 4 +--- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 81cf22398cd1..9b1667e5c25d 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -220,8 +220,8 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); *fpstate = (struct _fpstate_32 __user *) sp; - if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, - math_size) < 0) + if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, + math_size)) return (void __user *) -1L; sp -= frame_size; diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index c856ca481546..74aa53eeedf5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -386,7 +386,7 @@ static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate) __restore_fpregs_from_fpstate(fpstate, xfeatures_mask_fpstate()); } -extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); /* * FPU context switch related helper methods: diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index d5d610ab7739..db5681339cd9 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -165,7 +165,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * For [f]xsave state, update the SW reserved fields in the [f]xsave frame * indicating the absence/presence of the extended state to the user. */ -int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) +bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); @@ -176,13 +176,14 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if (!static_cpu_has(X86_FEATURE_FPU)) { struct user_i387_ia32_struct fp; + fpregs_soft_get(current, NULL, (struct membuf){.p = &fp, .left = sizeof(fp)}); - return copy_to_user(buf, &fp, sizeof(fp)) ? -EFAULT : 0; + return !copy_to_user(buf, &fp, sizeof(fp)); } if (!access_ok(buf, size)) - return -EACCES; + return false; if (use_xsave()) { struct xregs_state __user *xbuf = buf_fx; @@ -191,9 +192,8 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) * Clear the xsave header first, so that reserved fields are * initialized to zero. */ - ret = __clear_user(&xbuf->header, sizeof(xbuf->header)); - if (unlikely(ret)) - return ret; + if (__clear_user(&xbuf->header, sizeof(xbuf->header))) + return false; } retry: /* @@ -214,17 +214,17 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if (ret) { if (!__clear_user(buf_fx, fpu_user_xstate_size)) goto retry; - return -1; + return false; } /* Save the fsave header for the 32-bit frames. */ if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) - return -1; + return false; if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) - return -1; + return false; - return 0; + return true; } static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e941dc4c46bd..27657e7efe11 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -244,7 +244,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, unsigned long math_size = 0; unsigned long sp = regs->sp; unsigned long buf_fx = 0; - int ret; /* redzone */ if (IS_ENABLED(CONFIG_X86_64)) @@ -292,8 +291,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } /* save i387 and extended state */ - ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size); - if (ret < 0) + if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size)) return (void __user *)-1L; return (void __user *)sp; -- Gitee From 0aecb60b7ecf606298ff6bb84a0a684a99afbe87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:34 +0200 Subject: [PATCH 085/188] x86/fpu/signal: Change return type of copy_fpregs_to_sigframe() helpers to boolean mainline inclusion from mainline-v5.16-rc1 commit 2af07f3a6e9fb81331421ca24b26a96180d792dd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2af07f3a6e9f x86/fpu/signal: Change return type of copy_fpregs_to_sigframe() helpers to boolean. -------------------------------- Now that copy_fpregs_to_sigframe() returns boolean the individual return codes in the related helper functions do not make sense anymore. Change them to return boolean success/fail. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.794334915@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index db5681339cd9..2e69b1113e9e 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -65,7 +65,7 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, /* * Signal frame handlers. */ -static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) +static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; @@ -82,18 +82,19 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) if (__copy_to_user(buf, &env, sizeof(env)) || __put_user(xsave->i387.swd, &fp->status) || __put_user(X86_FXSR_MAGIC, &fp->magic)) - return -1; + return false; } else { struct fregs_state __user *fp = buf; u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) - return -1; + return false; } - return 0; + return true; } -static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +static inline bool save_xstate_epilog(void __user *buf, int ia32_frame) { struct xregs_state __user *x = buf; struct _fpx_sw_bytes *sw_bytes; @@ -131,7 +132,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures); - return err; + return !err; } static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) @@ -218,10 +219,10 @@ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) } /* Save the fsave header for the 32-bit frames. */ - if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf)) return false; - if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate)) return false; return true; -- Gitee From cf699152052c90ffedad0f7a78ccb40969f9f96a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:35 +0200 Subject: [PATCH 086/188] x86/signal: Change return type of restore_sigcontext() to boolean mainline inclusion from mainline-v5.16-rc1 commit ee4ecdfbd28954086a09740dc931c10c93e39370 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ee4ecdfbd289 x86/signal: Change return type of restore_sigcontext() to boolean. -------------------------------- None of the call sites cares about the return code. All they are interested in is success or fail. Suggested-by: Al Viro Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.851280949@linutronix.de Signed-off-by: Lin Wang --- arch/x86/ia32/ia32_signal.c | 12 ++++++------ arch/x86/kernel/signal.c | 18 +++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 9b1667e5c25d..3bfc9e1ad846 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -57,8 +57,8 @@ static inline void reload_segments(struct sigcontext_32 *sc) /* * Do a signal return; undo the signal stack. */ -static int ia32_restore_sigcontext(struct pt_regs *regs, - struct sigcontext_32 __user *usc) +static bool ia32_restore_sigcontext(struct pt_regs *regs, + struct sigcontext_32 __user *usc) { struct sigcontext_32 sc; @@ -66,7 +66,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, current->restart_block.fn = do_no_restart_syscall; if (unlikely(copy_from_user(&sc, usc, sizeof(sc)))) - return -EFAULT; + return false; /* Get only the ia32 registers. */ regs->bx = sc.bx; @@ -94,7 +94,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, * normal case. */ reload_segments(&sc); - return fpu__restore_sig(compat_ptr(sc.fpstate), 1); + return !fpu__restore_sig(compat_ptr(sc.fpstate), 1); } COMPAT_SYSCALL_DEFINE0(sigreturn) @@ -111,7 +111,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) set_current_blocked(&set); - if (ia32_restore_sigcontext(regs, &frame->sc)) + if (!ia32_restore_sigcontext(regs, &frame->sc)) goto badframe; return regs->ax; @@ -135,7 +135,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) set_current_blocked(&set); - if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (!ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (compat_restore_altstack(&frame->uc.uc_stack)) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 27657e7efe11..4eadbcd46250 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -79,9 +79,9 @@ static void force_valid_ss(struct pt_regs *regs) # define CONTEXT_COPY_SIZE sizeof(struct sigcontext) #endif -static int restore_sigcontext(struct pt_regs *regs, - struct sigcontext __user *usc, - unsigned long uc_flags) +static bool restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *usc, + unsigned long uc_flags) { struct sigcontext sc; @@ -89,7 +89,7 @@ static int restore_sigcontext(struct pt_regs *regs, current->restart_block.fn = do_no_restart_syscall; if (copy_from_user(&sc, usc, CONTEXT_COPY_SIZE)) - return -EFAULT; + return false; #ifdef CONFIG_X86_32 set_user_gs(regs, sc.gs); @@ -136,8 +136,8 @@ static int restore_sigcontext(struct pt_regs *regs, force_valid_ss(regs); #endif - return fpu__restore_sig((void __user *)sc.fpstate, - IS_ENABLED(CONFIG_X86_32)); + return !fpu__restore_sig((void __user *)sc.fpstate, + IS_ENABLED(CONFIG_X86_32)); } static __always_inline int @@ -641,7 +641,7 @@ SYSCALL_DEFINE0(sigreturn) * x86_32 has no uc_flags bits relevant to restore_sigcontext. * Save a few cycles by skipping the __get_user. */ - if (restore_sigcontext(regs, &frame->sc, 0)) + if (!restore_sigcontext(regs, &frame->sc, 0)) goto badframe; return regs->ax; @@ -669,7 +669,7 @@ SYSCALL_DEFINE0(rt_sigreturn) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) + if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (restore_altstack(&frame->uc.uc_stack)) @@ -927,7 +927,7 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) + if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (compat_restore_altstack(&frame->uc.uc_stack)) -- Gitee From 39730d80a233cd791dc89bbbe2f893b14f0cbfc0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:37 +0200 Subject: [PATCH 087/188] x86/fpu/signal: Change return type of fpu__restore_sig() to boolean mainline inclusion from mainline-v5.16-rc1 commit f3305be5feecae62adfa5a6a1441a76493fe7412 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit f3305be5feec x86/fpu/signal: Change return type of fpu__restore_sig() to boolean. -------------------------------- None of the call sites cares about the error code. All they need to know is whether the function succeeded or not. Suggested-by: Al Viro Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.909065931@linutronix.de Signed-off-by: Lin Wang --- arch/x86/ia32/ia32_signal.c | 2 +- arch/x86/include/asm/fpu/internal.h | 2 +- arch/x86/kernel/fpu/signal.c | 22 ++++++++++------------ arch/x86/kernel/signal.c | 4 ++-- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 3bfc9e1ad846..e3dd76cf909e 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -94,7 +94,7 @@ static bool ia32_restore_sigcontext(struct pt_regs *regs, * normal case. */ reload_segments(&sc); - return !fpu__restore_sig(compat_ptr(sc.fpstate), 1); + return fpu__restore_sig(compat_ptr(sc.fpstate), 1); } COMPAT_SYSCALL_DEFINE0(sigreturn) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 74aa53eeedf5..89960e479f87 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -26,7 +26,7 @@ /* * High level FPU state handling functions: */ -extern int fpu__restore_sig(void __user *buf, int ia32_frame); +extern bool fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern void fpu__clear_user_states(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 2e69b1113e9e..7bcb7b860b1f 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -438,17 +438,17 @@ static inline int xstate_sigframe_size(void) /* * Restore FPU state from a sigframe: */ -int fpu__restore_sig(void __user *buf, int ia32_frame) +bool fpu__restore_sig(void __user *buf, int ia32_frame) { unsigned int size = xstate_sigframe_size(); struct fpu *fpu = ¤t->thread.fpu; void __user *buf_fx = buf; bool ia32_fxstate = false; - int ret; + bool success = false; if (unlikely(!buf)) { fpu__clear_user_states(fpu); - return 0; + return true; } ia32_frame &= (IS_ENABLED(CONFIG_X86_32) || @@ -464,23 +464,21 @@ int fpu__restore_sig(void __user *buf, int ia32_frame) ia32_fxstate = true; } - if (!access_ok(buf, size)) { - ret = -EACCES; + if (!access_ok(buf, size)) goto out; - } if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) { - ret = fpregs_soft_set(current, NULL, 0, - sizeof(struct user_i387_ia32_struct), - NULL, buf); + success = !fpregs_soft_set(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), + NULL, buf); } else { - ret = __fpu_restore_sig(buf, buf_fx, ia32_fxstate); + success = !__fpu_restore_sig(buf, buf_fx, ia32_fxstate); } out: - if (unlikely(ret)) + if (unlikely(!success)) fpu__clear_user_states(fpu); - return ret; + return success; } unsigned long diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4eadbcd46250..25d9c826d158 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -136,8 +136,8 @@ static bool restore_sigcontext(struct pt_regs *regs, force_valid_ss(regs); #endif - return !fpu__restore_sig((void __user *)sc.fpstate, - IS_ENABLED(CONFIG_X86_32)); + return fpu__restore_sig((void __user *)sc.fpstate, + IS_ENABLED(CONFIG_X86_32)); } static __always_inline int -- Gitee From 7c04e68c13f47262c65dfc57160570b0330172f5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:38 +0200 Subject: [PATCH 088/188] x86/fpu/signal: Change return type of __fpu_restore_sig() to boolean mainline inclusion from mainline-v5.16-rc1 commit 1193f408cd5140f2cfd38c7e60a2d39d39cd485f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1193f408cd51 x86/fpu/signal: Change return type of __fpu_restore_sig() to boolean. -------------------------------- Now that fpu__restore_sig() returns a boolean get rid of the individual error codes in __fpu_restore_sig() as well. Suggested-by: Al Viro Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.966197097@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 41 ++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 7bcb7b860b1f..bc69085b9b8b 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -309,8 +309,8 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, return 0; } -static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, - bool ia32_fxstate) +static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, + bool ia32_fxstate) { int state_size = fpu_kernel_xstate_size; struct task_struct *tsk = current; @@ -318,14 +318,14 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct user_i387_ia32_struct env; u64 user_xfeatures = 0; bool fx_only = false; - int ret; + bool success; + if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; - ret = check_xstate_in_sigframe(buf_fx, &fx_sw_user); - if (unlikely(ret)) - return ret; + if (check_xstate_in_sigframe(buf_fx, &fx_sw_user)) + return false; fx_only = !fx_sw_user.magic1; state_size = fx_sw_user.xstate_size; @@ -341,8 +341,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, * faults. If it does, fall back to the slow path below, going * through the kernel buffer with the enabled pagefault handler. */ - return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, - state_size); + return !restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, + state_size); } /* @@ -350,9 +350,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, * to be ignored for histerical raisins. The legacy state is folded * in once the larger state has been copied. */ - ret = __copy_from_user(&env, buf, sizeof(env)); - if (ret) - return ret; + if (__copy_from_user(&env, buf, sizeof(env))) + return false; /* * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is @@ -379,18 +378,17 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, fpregs_unlock(); if (use_xsave() && !fx_only) { - ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); - if (ret) - return ret; + if (copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx)) + return false; } else { if (__copy_from_user(&fpu->state.fxsave, buf_fx, sizeof(fpu->state.fxsave))) - return -EFAULT; + return false; if (IS_ENABLED(CONFIG_X86_64)) { /* Reject invalid MXCSR values. */ if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask) - return -EINVAL; + return false; } else { /* Mask invalid bits out for historical reasons (broken hardware). */ fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; @@ -418,17 +416,18 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, u64 mask = user_xfeatures | xfeatures_mask_supervisor(); fpu->state.xsave.header.xfeatures &= mask; - ret = os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all) ? -EINVAL : 0; + success = !os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all); } else { - ret = fxrstor_safe(&fpu->state.fxsave); + success = !fxrstor_safe(&fpu->state.fxsave); } - if (likely(!ret)) + if (likely(success)) fpregs_mark_activate(); fpregs_unlock(); - return ret; + return success; } + static inline int xstate_sigframe_size(void) { return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE : @@ -472,7 +471,7 @@ bool fpu__restore_sig(void __user *buf, int ia32_frame) sizeof(struct user_i387_ia32_struct), NULL, buf); } else { - success = !__fpu_restore_sig(buf, buf_fx, ia32_fxstate); + success = __fpu_restore_sig(buf, buf_fx, ia32_fxstate); } out: -- Gitee From bcd36a259f3b69adad391374f0ce4901a8c750b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:40 +0200 Subject: [PATCH 089/188] x86/fpu/signal: Change return code of check_xstate_in_sigframe() to boolean mainline inclusion from mainline-v5.16-rc1 commit be0040144152ed834c369a7830487e5ee4f27080 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit be0040144152 x86/fpu/signal: Change return code of check_xstate_in_sigframe() to boolean. -------------------------------- __fpu_sig_restore() only needs success/fail information and no detailed error code. Suggested-by: Al Viro Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132526.024024598@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index bc69085b9b8b..bbf1e58bce87 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -23,8 +23,8 @@ static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init; * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ -static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, - struct _fpx_sw_bytes *fx_sw) +static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, + struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct fxregs_state) + sizeof(struct xstate_header); @@ -32,7 +32,7 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, unsigned int magic2; if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw))) - return -EFAULT; + return false; /* Check for the first magic field and other error scenarios. */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || @@ -48,10 +48,10 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, * in the memory layout. */ if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))) - return -EFAULT; + return false; if (likely(magic2 == FP_XSTATE_MAGIC2)) - return 0; + return true; setfx: trace_x86_fpu_xstate_check_failed(¤t->thread.fpu); @@ -59,7 +59,7 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, fx_sw->magic1 = 0; fx_sw->xstate_size = sizeof(struct fxregs_state); fx_sw->xfeatures = XFEATURE_MASK_FPSSE; - return 0; + return true; } /* @@ -324,7 +324,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; - if (check_xstate_in_sigframe(buf_fx, &fx_sw_user)) + if (!check_xstate_in_sigframe(buf_fx, &fx_sw_user)) return false; fx_only = !fx_sw_user.magic1; -- Gitee From 0d8f48c38fdcb181de5bc8cab16b4c3e9613157a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:41 +0200 Subject: [PATCH 090/188] x86/fpu/signal: Change return code of restore_fpregs_from_user() to boolean mainline inclusion from mainline-v5.16-rc1 commit a2a8fd9a3efd8d22ee14a441e9e78cf5c998e69a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit a2a8fd9a3efd x86/fpu/signal: Change return code of restore_fpregs_from_user() to boolean. -------------------------------- __fpu_sig_restore() only needs information about success or fail and no real error code. This cleans up the confusing conversion of the trap number, which is returned by the *RSTOR() exception fixups, to an error code. Suggested-by: Al Viro Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132526.084109938@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index bbf1e58bce87..693321ebdc3c 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -254,8 +254,8 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ -static int restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only, unsigned int size) +static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, + bool fx_only, unsigned int size) { struct fpu *fpu = ¤t->thread.fpu; int ret; @@ -284,12 +284,11 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, /* Try to handle #PF, but anything else is fatal. */ if (ret != X86_TRAP_PF) - return -EINVAL; + return false; - ret = fault_in_pages_readable(buf, size); - if (!ret) + if (!fault_in_pages_readable(buf, size)) goto retry; - return ret; + return false; } /* @@ -306,7 +305,7 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, fpregs_mark_activate(); fpregs_unlock(); - return 0; + return true; } static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, @@ -341,8 +340,8 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, * faults. If it does, fall back to the slow path below, going * through the kernel buffer with the enabled pagefault handler. */ - return !restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, - state_size); + return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, + state_size); } /* -- Gitee From 548452c1ddc9a0f11db411c669f233e9fa0fddc1 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 22 Sep 2021 22:09:01 +0200 Subject: [PATCH 091/188] x86/fpu/signal: Fix missed conversion to correct boolean retval in save_xstate_epilog() mainline inclusion from mainline-v5.16-rc1 commit 724fc0248d450224b19ef5b5ee41e392348f6704 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 724fc0248d45 x86/fpu/signal: Fix missed conversion. -------------------------------- Fix the missing return code polarity in save_xstate_epilog(). [ bp: Massage, use the right commit in the Fixes: tag ] Fixes: 2af07f3a6e9f ("x86/fpu/signal: Change return type of copy_fpregs_to_sigframe() helpers to boolean") Reported-by: Remi Duraffort Signed-off-by: Anders Roxell Signed-off-by: Borislav Petkov Tested-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/1461 Link: https://lkml.kernel.org/r/20210922200901.1823741-1-anders.roxell@linaro.org Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 693321ebdc3c..51c4915a35f0 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -106,7 +106,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame) err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); if (!use_xsave()) - return err; + return !err; err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *)(buf + fpu_user_xstate_size)); -- Gitee From 439b022a5b8225b64d42b98f8e636b9a5c54d29d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:15:54 +0200 Subject: [PATCH 092/188] x86/fpu: Remove pointless argument from switch_fpu_finish() mainline inclusion from mainline-v5.16-rc1 commit 9568bfb4f04bd9a280c592879ccd7a26a77c1390 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 9568bfb4f04b x86/fpu: Remove pointless argument from switch_fpu_finish(). -------------------------------- Unused since the FPU switching rework. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.433135710@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 2 +- arch/x86/kernel/process_32.c | 3 +-- arch/x86/kernel/process_64.c | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 89960e479f87..1503750534f7 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -521,7 +521,7 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) * Delay loading of the complete FPU state until the return to userland. * PKRU is handled separately. */ -static inline void switch_fpu_finish(struct fpu *new_fpu) +static inline void switch_fpu_finish(void) { if (cpu_feature_enabled(X86_FEATURE_FPU)) set_thread_flag(TIF_NEED_FPU_LOAD); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4f2f54e1281c..d008e222a302 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -160,7 +160,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; struct fpu *prev_fpu = &prev->fpu; - struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -213,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); - switch_fpu_finish(next_fpu); + switch_fpu_finish(); /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c79452adff59..ec407de7f47a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -560,7 +560,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; struct fpu *prev_fpu = &prev->fpu; - struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && @@ -621,7 +620,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(next_fpu); + switch_fpu_finish(); /* Reload sp0. */ update_task_stack(next_p); -- Gitee From b055804bddcc82d4afa6223f64133954cdfd7a74 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:15:56 +0200 Subject: [PATCH 093/188] x86/fpu: Update stale comments mainline inclusion from mainline-v5.16-rc1 commit d2d926482cdfbd5517826eca4e39dcd8757f04d3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit d2d926482cdf x86/fpu: Update stale comments. -------------------------------- copy_fpstate_to_sigframe() does not have a slow path anymore. Neither does the !ia32 restore in __fpu_restore_sig(). Update the comments accordingly. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.493570236@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 51c4915a35f0..e257805d3d3f 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -155,10 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * - * Try to save it directly to the user frame with disabled page fault handler. - * If this fails then do the slow path where the FPU state is first saved to - * task's fpu->state and then copy it to the user frame pointed to by the - * aligned pointer 'buf_fx'. + * Save it directly to the user frame with disabled page fault handler. If + * that faults, try to clear the frame which handles the page fault. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. @@ -334,12 +332,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, } if (likely(!ia32_fxstate)) { - /* - * Attempt to restore the FPU registers directly from user - * memory. For that to succeed, the user access cannot cause page - * faults. If it does, fall back to the slow path below, going - * through the kernel buffer with the enabled pagefault handler. - */ + /* Restore the FPU registers directly from user memory. */ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, state_size); } -- Gitee From b71d2192599415c897c6f577debf1f921be8b4cf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:15:57 +0200 Subject: [PATCH 094/188] x86/pkru: Remove useless include mainline inclusion from mainline-v5.16-rc1 commit b50854eca0e014c2d3738073b387ab8ec85118ab category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit b50854eca0e0 x86/pkru: Remove useless include. -------------------------------- PKRU code does not need anything from FPU headers. Include cpufeature.h instead and fixup the resulting fallout in perf. This is a preparation for FPU changes in order to prevent recursive include hell. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.551522694@linutronix.de Signed-off-by: Lin Wang --- arch/x86/events/perf_event.h | 1 + arch/x86/include/asm/pkru.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 0c3b8fa7e532..2c2a319da51d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -14,6 +14,7 @@ #include +#include #include /* To enable MSR tracing please use the generic trace points. */ diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index ccc539faa5bb..4cd49afa0ca4 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PKRU_H #define _ASM_X86_PKRU_H -#include +#include #define PKRU_AD_BIT 0x1 #define PKRU_WD_BIT 0x2 -- Gitee From 00f2424d71f739a56aeb0e826013c9179a2c999a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:15:59 +0200 Subject: [PATCH 095/188] x86/fpu: Restrict xsaves()/xrstors() to independent states mainline inclusion from mainline-v5.16-rc1 commit f5daf836f292f795f9cf8f36e036bf47adcbc3a3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit f5daf836f292 x86/fpu: Restrict xsaves()/xrstors() to independent states. -------------------------------- These interfaces are really only valid for features which are independently managed and not part of the task context state for various reasons. Tighten the checks and adjust the misleading comments. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.608492174@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 705a265325fe..4c91435940c7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1175,20 +1175,14 @@ int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, return copy_uabi_to_xstate(xsave, NULL, ubuf); } -static bool validate_xsaves_xrstors(u64 mask) +static bool validate_independent_components(u64 mask) { u64 xchk; if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES))) return false; - /* - * Validate that this is either a task->fpstate related component - * subset or an independent one. - */ - if (mask & xfeatures_mask_independent()) - xchk = ~xfeatures_mask_independent(); - else - xchk = ~xfeatures_mask_all; + + xchk = ~xfeatures_mask_independent(); if (WARN_ON_ONCE(!mask || mask & xchk)) return false; @@ -1206,14 +1200,13 @@ static bool validate_xsaves_xrstors(u64 mask) * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer * can #GP. * - * The feature mask must either be a subset of the independent features or - * a subset of the task->fpstate related features. + * The feature mask must be a subset of the independent features. */ void xsaves(struct xregs_state *xstate, u64 mask) { int err; - if (!validate_xsaves_xrstors(mask)) + if (!validate_independent_components(mask)) return; XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err); @@ -1231,14 +1224,13 @@ void xsaves(struct xregs_state *xstate, u64 mask) * Proper usage is to restore the state which was saved with * xsaves() into @xstate. * - * The feature mask must either be a subset of the independent features or - * a subset of the task->fpstate related features. + * The feature mask must be a subset of the independent features. */ void xrstors(struct xregs_state *xstate, u64 mask) { int err; - if (!validate_xsaves_xrstors(mask)) + if (!validate_independent_components(mask)) return; XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err); -- Gitee From 2ee4e6922dbebf2b269de3dd2d98e3c046315c98 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:01 +0200 Subject: [PATCH 096/188] x86/fpu: Cleanup the on_boot_cpu clutter mainline inclusion from mainline-v5.16-rc1 commit dc2f39fd1bf23eee644d409b84e8e435606997bf category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit dc2f39fd1bf2 x86/fpu: Cleanup the on_boot_cpu clutter. -------------------------------- Defensive programming is useful, but this on_boot_cpu debug is really silly. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.665080855@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/init.c | 16 ---------------- arch/x86/kernel/fpu/xstate.c | 9 --------- 2 files changed, 25 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 64e29927cc32..86bc9759fc8b 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -192,11 +192,6 @@ static void __init fpu__init_task_struct_size(void) */ static void __init fpu__init_system_xstate_size_legacy(void) { - static int on_boot_cpu __initdata = 1; - - WARN_ON_FPU(!on_boot_cpu); - on_boot_cpu = 0; - /* * Note that xstate sizes might be overwritten later during * fpu__init_system_xstate(). @@ -216,15 +211,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) fpu_user_xstate_size = fpu_kernel_xstate_size; } -/* Legacy code to initialize eager fpu mode. */ -static void __init fpu__init_system_ctx_switch(void) -{ - static bool on_boot_cpu __initdata = 1; - - WARN_ON_FPU(!on_boot_cpu); - on_boot_cpu = 0; -} - /* * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: @@ -243,6 +229,4 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(); fpu__init_task_struct_size(); - - fpu__init_system_ctx_switch(); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4c91435940c7..5cb91ca9339d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -379,15 +379,10 @@ static void __init print_xstate_offset_size(void) */ static void __init setup_init_fpu_buf(void) { - static int on_boot_cpu __initdata = 1; - BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED) != XFEATURES_INIT_FPSTATE_HANDLED); - WARN_ON_FPU(!on_boot_cpu); - on_boot_cpu = 0; - if (!boot_cpu_has(X86_FEATURE_XSAVE)) return; @@ -721,14 +716,10 @@ static void fpu__init_disable_system_xstate(void) void __init fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; - static int on_boot_cpu __initdata = 1; u64 xfeatures; int err; int i; - WARN_ON_FPU(!on_boot_cpu); - on_boot_cpu = 0; - if (!boot_cpu_has(X86_FEATURE_FPU)) { pr_info("x86/fpu: No FPU detected\n"); return; -- Gitee From 3518d12e2e181c166b1a428dbee76405fc2b81f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:02 +0200 Subject: [PATCH 097/188] x86/fpu: Remove pointless memset in fpu_clone() mainline inclusion from mainline-v5.16-rc1 commit 01f9f62d3ae75077a54a11d2777082f1e58e2d9f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 01f9f62d3ae7 x86/fpu: Remove pointless memset in fpu_clone(). -------------------------------- Zeroing the forked task's FPU registers buffer to avoid leaking init optimized stale data into the clone is a pointless exercise for the case where the current task has TIF_NEED_FPU_LOAD set. In that case, the FPU registers state is copied from current's FPU register buffer which can contain stale init optimized data as well. The alledged information leak is non-existant because this stale init optimized data is used nowhere and cannot leak anywhere. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.722854569@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7ada7bd03a32..191269edac97 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -259,12 +259,6 @@ int fpu_clone(struct task_struct *dst) if (!cpu_feature_enabled(X86_FEATURE_FPU)) return 0; - /* - * Don't let 'init optimized' areas of the XSAVE area - * leak into the child task: - */ - memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); - /* * If the FPU registers are not owned by current just memcpy() the * state. Otherwise save the FPU registers directly into the -- Gitee From 48b6111837f7a160429fad0c6ec7aa681df0d99d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:04 +0200 Subject: [PATCH 098/188] x86/process: Clone FPU in copy_thread() mainline inclusion from mainline-v5.16-rc1 commit 2d16a1876f20218f8970ea4b7f679cead1cdb510 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2d16a1876f20 x86/process: Clone FPU in copy_thread(). -------------------------------- There is no reason to clone FPU in arch_dup_task_struct(). Quite the contrary - it prevents optimizations. Move it to copy_thread(). Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.780714235@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/process.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 836c3c96a36b..18986d12277b 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -93,7 +93,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) #ifdef CONFIG_VM86 dst->thread.vm86 = NULL; #endif - return fpu_clone(dst); + return 0; } /* @@ -161,6 +161,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, frame->flags = X86_EFLAGS_FIXED; #endif + fpu_clone(p); + /* Kernel thread ? */ if (unlikely(p->flags & PF_KTHREAD)) { p->thread.pkru = pkru_get_init_value(); -- Gitee From bb04e235a60526dd2a9f7be633997ef72e0e4c99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:06 +0200 Subject: [PATCH 099/188] x86/fpu: Do not inherit FPU context for kernel and IO worker threads mainline inclusion from mainline-v5.16-rc1 commit 509e7a30cd0a9f38abac4114832d9f69ff0d73b4 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 509e7a30cd0a x86/fpu: Do not inherit FPU context for kernel and IO worker threads. -------------------------------- There is no reason why kernel and IO worker threads need a full clone of the parent's FPU state. Both are kernel threads which are not supposed to use FPU. So copying a large state or doing XSAVE() is pointless. Just clean out the minimally required state for those tasks. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.839822981@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 191269edac97..9a6b195a8a00 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -212,6 +212,15 @@ static inline void fpstate_init_xstate(struct xregs_state *xsave) xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; } +static inline unsigned int init_fpstate_copy_size(void) +{ + if (!use_xsave()) + return fpu_kernel_xstate_size; + + /* XSAVE(S) just needs the legacy and the xstate header part */ + return sizeof(init_fpstate.xsave); +} + static inline void fpstate_init_fxstate(struct fxregs_state *fx) { fx->cwd = 0x37f; @@ -259,6 +268,23 @@ int fpu_clone(struct task_struct *dst) if (!cpu_feature_enabled(X86_FEATURE_FPU)) return 0; + /* + * Enforce reload for user space tasks and prevent kernel threads + * from trying to save the FPU registers on context switch. + */ + set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); + + /* + * No FPU state inheritance for kernel threads and IO + * worker threads. + */ + if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) { + /* Clear out the minimal state */ + memcpy(&dst_fpu->state, &init_fpstate, + init_fpstate_copy_size()); + return 0; + } + /* * If the FPU registers are not owned by current just memcpy() the * state. Otherwise save the FPU registers directly into the @@ -272,8 +298,6 @@ int fpu_clone(struct task_struct *dst) save_fpregs_to_fpstate(dst_fpu); fpregs_unlock(); - set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); - trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); @@ -322,15 +346,6 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask) pkru_write_default(); } -static inline unsigned int init_fpstate_copy_size(void) -{ - if (!use_xsave()) - return fpu_kernel_xstate_size; - - /* XSAVE(S) just needs the legacy and the xstate header part */ - return sizeof(init_fpstate.xsave); -} - /* * Reset current->fpu memory state to the init values. */ -- Gitee From a2a5121a6da27efad1eb69ce9d01afee1632e2b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:07 +0200 Subject: [PATCH 100/188] x86/fpu: Cleanup xstate xcomp_bv initialization mainline inclusion from mainline-v5.16-rc1 commit 126fe0401883598b45b34dbbd5e0d7d8a0aefa21 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 126fe0401883 x86/fpu: Cleanup xstate xcomp_bv initialization. -------------------------------- No point in having this duplicated all over the place with needlessly different defines. Provide a proper initialization function which initializes user buffers properly and make KVM use it. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.897664678@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 4 +++- arch/x86/kernel/fpu/core.c | 35 ++++++++++++++++------------- arch/x86/kernel/fpu/init.c | 6 ++--- arch/x86/kernel/fpu/xstate.c | 8 +++---- arch/x86/kernel/fpu/xstate.h | 18 +++++++++++++++ arch/x86/kvm/x86.c | 8 +++---- 6 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 arch/x86/kernel/fpu/xstate.h diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 1503750534f7..df57f1af3a4c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -80,7 +80,9 @@ static __always_inline __pure bool use_fxsr(void) extern union fpregs_state init_fpstate; -extern void fpstate_init(union fpregs_state *state); +extern void fpstate_init_user(union fpregs_state *state); +extern void fpu_init_fpstate_user(struct fpu *fpu); + #ifdef CONFIG_MATH_EMULATION extern void fpstate_init_soft(struct swregs_state *soft); #else diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9a6b195a8a00..0789f0c3dca9 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -16,6 +16,8 @@ #include #include +#include "xstate.h" + #define CREATE_TRACE_POINTS #include @@ -203,15 +205,6 @@ void fpu_sync_fpstate(struct fpu *fpu) fpregs_unlock(); } -static inline void fpstate_init_xstate(struct xregs_state *xsave) -{ - /* - * XRSTORS requires these bits set in xcomp_bv, or it will - * trigger #GP: - */ - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; -} - static inline unsigned int init_fpstate_copy_size(void) { if (!use_xsave()) @@ -238,23 +231,33 @@ static inline void fpstate_init_fstate(struct fregs_state *fp) fp->fos = 0xffff0000u; } -void fpstate_init(union fpregs_state *state) +/* + * Used in two places: + * 1) Early boot to setup init_fpstate for non XSAVE systems + * 2) fpu_init_fpstate_user() which is invoked from KVM + */ +void fpstate_init_user(union fpregs_state *state) { - if (!static_cpu_has(X86_FEATURE_FPU)) { + if (!cpu_feature_enabled(X86_FEATURE_FPU)) { fpstate_init_soft(&state->soft); return; } - memset(state, 0, fpu_kernel_xstate_size); + xstate_init_xcomp_bv(&state->xsave, xfeatures_mask_uabi()); - if (static_cpu_has(X86_FEATURE_XSAVES)) - fpstate_init_xstate(&state->xsave); - if (static_cpu_has(X86_FEATURE_FXSR)) + if (cpu_feature_enabled(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else fpstate_init_fstate(&state->fsave); } -EXPORT_SYMBOL_GPL(fpstate_init); + +#if IS_ENABLED(CONFIG_KVM) +void fpu_init_fpstate_user(struct fpu *fpu) +{ + fpstate_init_user(&fpu->state); +} +EXPORT_SYMBOL_GPL(fpu_init_fpstate_user); +#endif /* Clone current's FPU state on fork */ int fpu_clone(struct task_struct *dst) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 86bc9759fc8b..37f872630a0e 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -121,10 +121,10 @@ static void __init fpu__init_system_mxcsr(void) static void __init fpu__init_system_generic(void) { /* - * Set up the legacy init FPU context. (xstate init might overwrite this - * with a more modern format, if the CPU supports it.) + * Set up the legacy init FPU context. Will be updated when the + * CPU supports XSAVE[S]. */ - fpstate_init(&init_fpstate); + fpstate_init_user(&init_fpstate); fpu__init_system_mxcsr(); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 5cb91ca9339d..60061344267f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -15,10 +15,10 @@ #include #include #include -#include #include -#include + +#include "xstate.h" /* * Although we spell it out in here, the Processor Trace @@ -389,9 +389,7 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); print_xstate_features(); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | - xfeatures_mask_all; + xstate_init_xcomp_bv(&init_fpstate.xsave, xfeatures_mask_all); /* * Init all the features state with header.xfeatures being 0x0 diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h new file mode 100644 index 000000000000..0789a04ee705 --- /dev/null +++ b/arch/x86/kernel/fpu/xstate.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_KERNEL_FPU_XSTATE_H +#define __X86_KERNEL_FPU_XSTATE_H + +#include +#include + +static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) +{ + /* + * XRSTORS requires these bits set in xcomp_bv, or it will + * trigger #GP: + */ + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; +} + +#endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d46232904d1..0beb7dbc8ced 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10067,11 +10067,6 @@ static int sync_regs(struct kvm_vcpu *vcpu) static void fx_init(struct kvm_vcpu *vcpu) { - fpstate_init(&vcpu->arch.guest_fpu->state); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv = - host_xcr0 | XSTATE_COMPACTION_ENABLED; - /* * Ensure guest xcr0 is valid for loading */ @@ -10147,6 +10142,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) pr_err("kvm: failed to allocate vcpu's fpu\n"); goto free_user_fpu; } + + fpu_init_fpstate_user(vcpu->arch.user_fpu); + fpu_init_fpstate_user(vcpu->arch.guest_fpu); fx_init(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); -- Gitee From d453b46b82ffbb8aa2a3d1cdfe01e5e3532a25fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:09 +0200 Subject: [PATCH 101/188] x86/fpu/xstate: Provide and use for_each_xfeature() mainline inclusion from mainline-v5.16-rc1 commit ffd3e504c9e0de8b85755f3c7eabbbdd984cfeed category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ffd3e504c9e0 x86/fpu/xstate: Provide and use for_each_xfeature(). -------------------------------- These loops evaluating xfeature bits are really hard to read. Create an iterator and use for_each_set_bit_from() inside which already does the right thing. No functional changes. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011538.958107505@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 56 +++++++++++++++--------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 60061344267f..bb27d7da4f0d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -4,6 +4,7 @@ * * Author: Suresh Siddha */ +#include #include #include #include @@ -20,6 +21,10 @@ #include "xstate.h" +#define for_each_extended_xfeature(bit, mask) \ + (bit) = FIRST_EXTENDED_XFEATURE; \ + for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask)) + /* * Although we spell it out in here, the Processor Trace * xfeature is completely unused. We use other mechanisms @@ -184,10 +189,7 @@ static void __init setup_xstate_features(void) xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, xmm_space); - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i)) - continue; - + for_each_extended_xfeature(i, xfeatures_mask_all) { cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); xstate_sizes[i] = eax; @@ -291,20 +293,15 @@ static void __init setup_xstate_comp_offsets(void) xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state, xmm_space); - if (!boot_cpu_has(X86_FEATURE_XSAVES)) { - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (xfeature_enabled(i)) - xstate_comp_offsets[i] = xstate_offsets[i]; - } + if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) { + for_each_extended_xfeature(i, xfeatures_mask_all) + xstate_comp_offsets[i] = xstate_offsets[i]; return; } next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i)) - continue; - + for_each_extended_xfeature(i, xfeatures_mask_all) { if (xfeature_is_aligned(i)) next_offset = ALIGN(next_offset, 64); @@ -328,8 +325,8 @@ static void __init setup_supervisor_only_offsets(void) next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i) || !xfeature_is_supervisor(i)) + for_each_extended_xfeature(i, xfeatures_mask_all) { + if (!xfeature_is_supervisor(i)) continue; if (xfeature_is_aligned(i)) @@ -347,9 +344,7 @@ static void __init print_xstate_offset_size(void) { int i; - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i)) - continue; + for_each_extended_xfeature(i, xfeatures_mask_all) { pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, xstate_comp_offsets[i], i, xstate_sizes[i]); } @@ -554,10 +549,7 @@ static void do_extra_xstate_size_checks(void) int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i)) - continue; - + for_each_extended_xfeature(i, xfeatures_mask_all) { check_xstate_against_struct(i); /* * Supervisor state components can be managed only by @@ -586,7 +578,6 @@ static void do_extra_xstate_size_checks(void) XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size); } - /* * Get total size of enabled xstates in XCR0 | IA32_XSS. * @@ -969,6 +960,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, struct xregs_state *xinit = &init_fpstate.xsave; struct xstate_header header; unsigned int zerofrom; + u64 mask; int i; memset(&header, 0, sizeof(header)); @@ -1022,17 +1014,15 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, zerofrom = offsetof(struct xregs_state, extended_state_area); - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - /* - * The ptrace buffer is in non-compacted XSAVE format. - * In non-compacted format disabled features still occupy - * state space, but there is no state to copy from in the - * compacted init_fpstate. The gap tracking will zero this - * later. - */ - if (!(xfeatures_mask_uabi() & BIT_ULL(i))) - continue; + /* + * The ptrace buffer is in non-compacted XSAVE format. In + * non-compacted format disabled features still occupy state space, + * but there is no state to copy from in the compacted + * init_fpstate. The gap tracking will zero these states. + */ + mask = xfeatures_mask_uabi(); + for_each_extended_xfeature(i, mask) { /* * If there was a feature or alignment gap, zero the space * in the destination buffer. -- Gitee From 0bb0e9962a7faa2dd18f011ab942f65a836d2e84 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:10 +0200 Subject: [PATCH 102/188] x86/fpu/xstate: Mark all init only functions __init mainline inclusion from mainline-v5.16-rc1 commit 63cf05a19a5d3fb6e66b5f7ceb76e77dfc2695f2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 63cf05a19a5d x86/fpu/xstate: Mark all init only functions __init. -------------------------------- No point to keep them around after boot. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.017919252@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index bb27d7da4f0d..6b290cf59fdf 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -462,7 +462,7 @@ static int validate_user_xstate_header(const struct xstate_header *hdr) return 0; } -static void __xstate_dump_leaves(void) +static void __init __xstate_dump_leaves(void) { int i; u32 eax, ebx, ecx, edx; @@ -502,7 +502,7 @@ static void __xstate_dump_leaves(void) * that our software representation matches what the CPU * tells us about the state's size. */ -static void check_xstate_against_struct(int nr) +static void __init check_xstate_against_struct(int nr) { /* * Ask the CPU for the size of the state. @@ -544,7 +544,7 @@ static void check_xstate_against_struct(int nr) * covered by these checks. Only the size of the buffer for task->fpu * is checked here. */ -static void do_extra_xstate_size_checks(void) +static void __init do_extra_xstate_size_checks(void) { int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; @@ -646,7 +646,7 @@ static unsigned int __init get_xsave_size(void) * Will the runtime-enumerated 'xstate_size' fit in the init * task's statically-allocated buffer? */ -static bool is_supported_xstate_size(unsigned int test_xstate_size) +static bool __init is_supported_xstate_size(unsigned int test_xstate_size) { if (test_xstate_size <= sizeof(union fpregs_state)) return true; @@ -691,7 +691,7 @@ static int __init init_xstate_size(void) * We enabled the XSAVE hardware, but something went wrong and * we can not use it. Disable it. */ -static void fpu__init_disable_system_xstate(void) +static void __init fpu__init_disable_system_xstate(void) { xfeatures_mask_all = 0; cr4_clear_bits(X86_CR4_OSXSAVE); -- Gitee From 6e7c989feb69b88ed5aceccda22661b1b00ef04d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:12 +0200 Subject: [PATCH 103/188] x86/fpu: Move KVMs FPU swapping to FPU core mainline inclusion from mainline-v5.16-rc1 commit a0ff0611c2fbde94f6c9db8351939b08f2cb6797 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit a0ff0611c2fb x86/fpu: Move KVMs FPU swapping to FPU core. -------------------------------- Swapping the host/guest FPU is directly fiddling with FPU internals which requires 5 exports. The upcoming support of dynamically enabled states would even need more. Implement a swap function in the FPU core code and export that instead. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Paolo Bonzini Cc: kvm@vger.kernel.org Link: https://lkml.kernel.org/r/20211015011539.076072399@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 8 +++++ arch/x86/include/asm/fpu/internal.h | 15 ++------- arch/x86/kernel/fpu/core.c | 30 +++++++++++++++--- arch/x86/kernel/fpu/init.c | 1 - arch/x86/kernel/fpu/xstate.c | 1 - arch/x86/kvm/x86.c | 49 +++++++++-------------------- arch/x86/mm/extable.c | 2 +- 7 files changed, 52 insertions(+), 54 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 8b9bfaad6e66..2928de7c5dc2 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -12,6 +12,8 @@ #define _ASM_X86_FPU_API_H #include +#include + /* * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It * disables preemption so be careful if you intend to use it for long periods @@ -81,4 +83,10 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); static inline void update_pasid(void) { } +/* fpstate-related functions which are exported to KVM */ +extern void fpu_init_fpstate_user(struct fpu *fpu); + +/* KVM specific functions */ +extern void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask); + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index df57f1af3a4c..3ac55ba55782 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -74,14 +74,8 @@ static __always_inline __pure bool use_fxsr(void) return static_cpu_has(X86_FEATURE_FXSR); } -/* - * fpstate handling functions: - */ - extern union fpregs_state init_fpstate; - extern void fpstate_init_user(union fpregs_state *state); -extern void fpu_init_fpstate_user(struct fpu *fpu); #ifdef CONFIG_MATH_EMULATION extern void fpstate_init_soft(struct swregs_state *soft); @@ -381,12 +375,7 @@ static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) return err; } -extern void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); - -static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate) -{ - __restore_fpregs_from_fpstate(fpstate, xfeatures_mask_fpstate()); -} +extern void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); @@ -467,7 +456,7 @@ static inline void fpregs_restore_userregs(void) */ mask = xfeatures_mask_restore_user() | xfeatures_mask_supervisor(); - __restore_fpregs_from_fpstate(&fpu->state, mask); + restore_fpregs_from_fpstate(&fpu->state, mask); fpregs_activate(fpu); fpu->last_cpu = cpu; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0789f0c3dca9..023bfe857907 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -124,9 +124,8 @@ void save_fpregs_to_fpstate(struct fpu *fpu) asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); frstor(&fpu->state.fsave); } -EXPORT_SYMBOL(save_fpregs_to_fpstate); -void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) +void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) { /* * AMD K7/K8 and later CPUs up to Zen don't save/restore @@ -151,7 +150,31 @@ void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) frstor(&fpstate->fsave); } } -EXPORT_SYMBOL_GPL(__restore_fpregs_from_fpstate); + +#if IS_ENABLED(CONFIG_KVM) +void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) +{ + fpregs_lock(); + + if (save) { + if (test_thread_flag(TIF_NEED_FPU_LOAD)) { + memcpy(&save->state, ¤t->thread.fpu.state, + fpu_kernel_xstate_size); + } else { + save_fpregs_to_fpstate(save); + } + } + + if (rstor) { + restore_mask &= xfeatures_mask_fpstate(); + restore_fpregs_from_fpstate(&rstor->state, restore_mask); + } + + fpregs_mark_activate(); + fpregs_unlock(); +} +EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpu); +#endif void kernel_fpu_begin_mask(unsigned int kfpu_mask) { @@ -457,7 +480,6 @@ void fpregs_mark_activate(void) fpu->last_cpu = smp_processor_id(); clear_thread_flag(TIF_NEED_FPU_LOAD); } -EXPORT_SYMBOL_GPL(fpregs_mark_activate); /* * x87 math exception handling: diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 37f872630a0e..545c91c723b8 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -136,7 +136,6 @@ static void __init fpu__init_system_generic(void) * components into a single, continuous memory block: */ unsigned int fpu_kernel_xstate_size __ro_after_init; -EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); /* Get alignment of the TYPE. */ #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6b290cf59fdf..b745dc05e605 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -65,7 +65,6 @@ static short xsave_cpuid_features[] __initdata = { * XSAVE buffer, both supervisor and user xstates. */ u64 xfeatures_mask_all __ro_after_init; -EXPORT_SYMBOL_GPL(xfeatures_mask_all); static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0beb7dbc8ced..0ca479cdd303 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -66,7 +66,9 @@ #include #include #include -#include /* Ugh! */ +#include +#include +#include #include #include #include @@ -9448,48 +9450,27 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } -static void kvm_save_current_fpu(struct fpu *fpu) -{ - /* - * If the target FPU state is not resident in the CPU registers, just - * memcpy() from current, else save CPU state directly to the target. - */ - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - memcpy(&fpu->state, ¤t->thread.fpu.state, - fpu_kernel_xstate_size); - else - save_fpregs_to_fpstate(fpu); -} - /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - fpregs_lock(); - - kvm_save_current_fpu(vcpu->arch.user_fpu); - - /* PKRU is separately restored in kvm_x86_ops.run. */ - __restore_fpregs_from_fpstate(&vcpu->arch.guest_fpu->state, - ~XFEATURE_MASK_PKRU); - - fpregs_mark_activate(); - fpregs_unlock(); - + /* + * Guests with protected state have guest_fpu == NULL which makes + * the swap only save the host state. Exclude PKRU from restore as + * it is restored separately in kvm_x86_ops.run(). + */ + fpu_swap_kvm_fpu(vcpu->arch.user_fpu, vcpu->arch.guest_fpu, + ~XFEATURE_MASK_PKRU); trace_kvm_fpu(1); } /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - fpregs_lock(); - - kvm_save_current_fpu(vcpu->arch.guest_fpu); - - restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state); - - fpregs_mark_activate(); - fpregs_unlock(); - + /* + * Guests with protected state have guest_fpu == NULL which makes + * swap only restore the host state. + */ + fpu_swap_kvm_fpu(vcpu->arch.guest_fpu, vcpu->arch.user_fpu, ~0ULL); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 33ba927eaafc..69a3acc712f1 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -47,7 +47,7 @@ static bool ex_handler_fprestore(const struct exception_table_entry *fixup, WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.", (void *)instruction_pointer(regs)); - __restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); + restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); return true; } -- Gitee From ec0a66b6722f15493a8c2755aa41b26869e15950 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:13 +0200 Subject: [PATCH 104/188] x86/fpu: Replace KVMs home brewed FPU copy from user mainline inclusion from mainline-v5.16-rc1 commit ea4d6938d4c0761672ff6237964a20db3cb95cc1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ea4d6938d4c0 x86/fpu: Replace KVMs home brewed FPU copy from user. -------------------------------- Copying a user space buffer to the memory buffer is already available in the FPU core. The copy mechanism in KVM lacks sanity checks and needs to use cpuid() to lookup the offset of each component, while the FPU core has this information cached. Make the FPU core variant accessible for KVM and replace the home brewed mechanism. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: kvm@vger.kernel.org Link: https://lkml.kernel.org/r/20211015011539.134065207@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 2 + arch/x86/kernel/fpu/core.c | 38 +++++++++++++++++- arch/x86/kernel/fpu/xstate.c | 3 +- arch/x86/kvm/x86.c | 72 ++-------------------------------- 4 files changed, 43 insertions(+), 72 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 2928de7c5dc2..79073383a9bc 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -89,4 +89,6 @@ extern void fpu_init_fpstate_user(struct fpu *fpu); /* KVM specific functions */ extern void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask); +extern int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, u32 *pkru); + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 023bfe857907..65fc87760011 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -174,7 +174,43 @@ void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) fpregs_unlock(); } EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpu); -#endif + +int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, + u32 *vpkru) +{ + union fpregs_state *kstate = &fpu->state; + const union fpregs_state *ustate = buf; + struct pkru_state *xpkru; + int ret; + + if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) { + if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE) + return -EINVAL; + if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask) + return -EINVAL; + memcpy(&kstate->fxsave, &ustate->fxsave, sizeof(ustate->fxsave)); + return 0; + } + + if (ustate->xsave.header.xfeatures & ~xcr0) + return -EINVAL; + + ret = copy_uabi_from_kernel_to_xstate(&kstate->xsave, ustate); + if (ret) + return ret; + + /* Retrieve PKRU if not in init state */ + if (kstate->xsave.header.xfeatures & XFEATURE_MASK_PKRU) { + xpkru = get_xsave_addr(&kstate->xsave, XFEATURE_PKRU); + *vpkru = xpkru->pkru; + } + + /* Ensure that XCOMP_BV is set up for XSAVES */ + xstate_init_xcomp_bv(&kstate->xsave, xfeatures_mask_uabi()); + return 0; +} +EXPORT_SYMBOL_GPL(fpu_copy_kvm_uabi_to_fpstate); +#endif /* CONFIG_KVM */ void kernel_fpu_begin_mask(unsigned int kfpu_mask) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b745dc05e605..276f8edb1440 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1134,8 +1134,7 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, /* * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] - * format and copy to the target thread. This is called from - * xstateregs_set(). + * format and copy to the target thread. Used by ptrace and KVM. */ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ca479cdd303..def5897c9bce 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4521,8 +4521,6 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } -#define XSTATE_COMPACTION_ENABLED (1ULL << 63) - static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) { struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave; @@ -4566,50 +4564,6 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) } } -static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) -{ - struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave; - u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); - u64 valid; - - /* - * Copy legacy XSAVE area, to avoid complications with CPUID - * leaves 0 and 1 in the loop below. - */ - memcpy(xsave, src, XSAVE_HDR_OFFSET); - - /* Set XSTATE_BV and possibly XCOMP_BV. */ - xsave->header.xfeatures = xstate_bv; - if (boot_cpu_has(X86_FEATURE_XSAVES)) - xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; - - /* - * Copy each region from the non-compacted offset to the - * possibly compacted offset. - */ - valid = xstate_bv & ~XFEATURE_MASK_FPSSE; - while (valid) { - u32 size, offset, ecx, edx; - u64 xfeature_mask = valid & -valid; - int xfeature_nr = fls64(xfeature_mask) - 1; - - cpuid_count(XSTATE_CPUID, xfeature_nr, - &size, &offset, &ecx, &edx); - - if (xfeature_nr == XFEATURE_PKRU) { - memcpy(&vcpu->arch.pkru, src + offset, - sizeof(vcpu->arch.pkru)); - } else { - void *dest = get_xsave_addr(xsave, xfeature_nr); - - if (dest) - memcpy(dest, src + offset, size); - } - - valid -= xfeature_mask; - } -} - static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { @@ -4625,32 +4579,12 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, } } -#define XSAVE_MXCSR_OFFSET 24 - static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - u64 xstate_bv = - *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; - u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; - - if (boot_cpu_has(X86_FEATURE_XSAVE)) { - /* - * Here we allow setting states that are not present in - * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility - * with old userspace. - */ - if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask) - return -EINVAL; - load_xsave(vcpu, (u8 *)guest_xsave->region); - } else { - if (xstate_bv & ~XFEATURE_MASK_FPSSE || - mxcsr & ~mxcsr_feature_mask) - return -EINVAL; - memcpy(&vcpu->arch.guest_fpu->state.fxsave, - guest_xsave->region, sizeof(struct fxregs_state)); - } - return 0; + return fpu_copy_kvm_uabi_to_fpstate(vcpu->arch.guest_fpu, + guest_xsave->region, + supported_xcr0, &vcpu->arch.pkru); } static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, -- Gitee From aaa019db726ba107aab35a0841af564bd4cf9bc2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:15 +0200 Subject: [PATCH 105/188] x86/fpu: Rework copy_xstate_to_uabi_buf() mainline inclusion from mainline-v5.16-rc1 commit ca834defd33bae9cf9542ff92b15635a84e91946 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ca834defd33b x86/fpu: Rework copy_xstate_to_uabi_buf(). -------------------------------- Prepare for replacing the KVM copy xstate to user function by extending copy_xstate_to_uabi_buf() with a pkru argument which allows the caller to hand in the pkru value, which is required for KVM because the guest PKRU is not accessible via current. Fixup all callsites accordingly. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.191902137@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 34 ++++++++++++++++++++++++++-------- arch/x86/kernel/fpu/xstate.h | 3 +++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 276f8edb1440..fab0f5727932 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -940,9 +940,10 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, } /** - * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer + * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor - * @tsk: The task from which to copy the saved xstate + * @xsave: The xsave from which to copy + * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming @@ -951,11 +952,10 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * * It supports partial copy but @to.pos always starts from zero. */ -void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, - enum xstate_copy_mode copy_mode) +void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, + u32 pkru_val, enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); - struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; struct xregs_state *xinit = &init_fpstate.xsave; struct xstate_header header; unsigned int zerofrom; @@ -1033,10 +1033,9 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, struct pkru_state pkru = {0}; /* * PKRU is not necessarily up to date in the - * thread's XSAVE buffer. Fill this part from the - * per-thread storage. + * XSAVE buffer. Use the provided value. */ - pkru.pkru = tsk->thread.pkru; + pkru.pkru = pkru_val; membuf_write(&to, &pkru, sizeof(pkru)); } else { copy_feature(header.xfeatures & BIT_ULL(i), &to, @@ -1056,6 +1055,25 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, membuf_zero(&to, to.left); } +/** + * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer + * @to: membuf descriptor + * @tsk: The task from which to copy the saved xstate + * @copy_mode: The requested copy mode + * + * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming + * format, i.e. from the kernel internal hardware dependent storage format + * to the requested @mode. UABI XSTATE is always uncompacted! + * + * It supports partial copy but @to.pos always starts from zero. + */ +void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode copy_mode) +{ + __copy_xstate_to_uabi_buf(to, &tsk->thread.fpu.state.xsave, + tsk->thread.pkru, copy_mode); +} + static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, const void *kbuf, const void __user *ubuf) { diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 0789a04ee705..81f4202781ac 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -15,4 +15,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; } +extern void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, + u32 pkru_val, enum xstate_copy_mode copy_mode); + #endif -- Gitee From 03925dde4a5cd72648f39d0e6d34cbb56a47c1e7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:18 +0200 Subject: [PATCH 106/188] x86/fpu: Mark fpu__init_prepare_fx_sw_frame() as __init mainline inclusion from mainline-v5.16-rc1 commit 9603445549dacd7688532a4076c377e43a3ecfce category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 9603445549da x86/fpu: Mark fpu__init_prepare_fx_sw_frame() as __init. -------------------------------- No need to keep it around. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.296435736@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/signal.h | 2 -- arch/x86/kernel/fpu/internal.h | 8 ++++++++ arch/x86/kernel/fpu/signal.c | 4 +++- arch/x86/kernel/fpu/xstate.c | 1 + 4 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 arch/x86/kernel/fpu/internal.h diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 8b6631dffefd..04868a76239a 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -31,6 +31,4 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long fpu__get_fpstate_size(void); -extern void fpu__init_prepare_fx_sw_frame(void); - #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h new file mode 100644 index 000000000000..036f84c236dd --- /dev/null +++ b/arch/x86/kernel/fpu/internal.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_KERNEL_FPU_INTERNAL_H +#define __X86_KERNEL_FPU_INTERNAL_H + +/* Init functions */ +extern void fpu__init_prepare_fx_sw_frame(void); + +#endif diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index e257805d3d3f..2a4d1d0b32d4 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -16,6 +16,8 @@ #include #include +#include "internal.h" + static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init; static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init; @@ -514,7 +516,7 @@ unsigned long fpu__get_fpstate_size(void) * This will be saved when ever the FP and extended state context is * saved on the user stack during the signal handler delivery to the user. */ -void fpu__init_prepare_fx_sw_frame(void) +void __init fpu__init_prepare_fx_sw_frame(void) { int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index fab0f5727932..cc69088ea787 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -19,6 +19,7 @@ #include +#include "internal.h" #include "xstate.h" #define for_each_extended_xfeature(bit, mask) \ -- Gitee From 2688a26765d8dbc1a17ffee7598cd9eb54356835 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:20 +0200 Subject: [PATCH 107/188] x86/fpu: Move context switch and exit to user inlines into sched.h mainline inclusion from mainline-v5.16-rc1 commit 63e81807c1f94e91b9d71c536112a40cd74bab85 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 63e81807c1f9 x86/fpu: Move context switch and exit to user inlines into sched.h. -------------------------------- internal.h is a kitchen sink which needs to get out of the way to prepare for the upcoming changes. Move the context switch and exit to user inlines into a separate header, which is all that code needs. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.349132461@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 60 ------------------------- arch/x86/include/asm/fpu/sched.h | 68 +++++++++++++++++++++++++++++ arch/x86/kernel/fpu/core.c | 1 + arch/x86/kernel/process.c | 2 +- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- 6 files changed, 72 insertions(+), 63 deletions(-) create mode 100644 arch/x86/include/asm/fpu/sched.h diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 3ac55ba55782..398c87c8e199 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -27,16 +27,11 @@ * High level FPU state handling functions: */ extern bool fpu__restore_sig(void __user *buf, int ia32_frame); -extern void fpu__drop(struct fpu *fpu); extern void fpu__clear_user_states(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern void fpu_sync_fpstate(struct fpu *fpu); -/* Clone and exit operations */ -extern int fpu_clone(struct task_struct *dst); -extern void fpu_flush_thread(void); - /* * Boot time FPU initialization functions: */ @@ -82,7 +77,6 @@ extern void fpstate_init_soft(struct swregs_state *soft); #else static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif -extern void save_fpregs_to_fpstate(struct fpu *fpu); /* * Returns 0 on success or the trap number when the operation raises an @@ -464,58 +458,4 @@ static inline void fpregs_restore_userregs(void) clear_thread_flag(TIF_NEED_FPU_LOAD); } -/* - * FPU state switching for scheduling. - * - * This is a two-stage process: - * - * - switch_fpu_prepare() saves the old state. - * This is done within the context of the old process. - * - * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state - * will get loaded on return to userspace, or when the kernel needs it. - * - * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers - * are saved in the current thread's FPU register state. - * - * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not - * hold current()'s FPU registers. It is required to load the - * registers before returning to userland or using the content - * otherwise. - * - * The FPU context is only stored/restored for a user task and - * PF_KTHREAD is used to distinguish between kernel and user threads. - */ -static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) -{ - if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { - save_fpregs_to_fpstate(old_fpu); - /* - * The save operation preserved register state, so the - * fpu_fpregs_owner_ctx is still @old_fpu. Store the - * current CPU number in @old_fpu, so the next return - * to user space can avoid the FPU register restore - * when is returns on the same CPU and still owns the - * context. - */ - old_fpu->last_cpu = cpu; - - trace_x86_fpu_regs_deactivated(old_fpu); - } -} - -/* - * Misc helper functions: - */ - -/* - * Delay loading of the complete FPU state until the return to userland. - * PKRU is handled separately. - */ -static inline void switch_fpu_finish(void) -{ - if (cpu_feature_enabled(X86_FEATURE_FPU)) - set_thread_flag(TIF_NEED_FPU_LOAD); -} - #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h new file mode 100644 index 000000000000..cdb78d590c86 --- /dev/null +++ b/arch/x86/include/asm/fpu/sched.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FPU_SCHED_H +#define _ASM_X86_FPU_SCHED_H + +#include + +#include +#include + +#include + +extern void save_fpregs_to_fpstate(struct fpu *fpu); +extern void fpu__drop(struct fpu *fpu); +extern int fpu_clone(struct task_struct *dst); +extern void fpu_flush_thread(void); + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + * - switch_fpu_prepare() saves the old state. + * This is done within the context of the old process. + * + * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state + * will get loaded on return to userspace, or when the kernel needs it. + * + * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers + * are saved in the current thread's FPU register state. + * + * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not + * hold current()'s FPU registers. It is required to load the + * registers before returning to userland or using the content + * otherwise. + * + * The FPU context is only stored/restored for a user task and + * PF_KTHREAD is used to distinguish between kernel and user threads. + */ +static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) +{ + if (cpu_feature_enabled(X86_FEATURE_FPU) && + !(current->flags & PF_KTHREAD)) { + save_fpregs_to_fpstate(old_fpu); + /* + * The save operation preserved register state, so the + * fpu_fpregs_owner_ctx is still @old_fpu. Store the + * current CPU number in @old_fpu, so the next return + * to user space can avoid the FPU register restore + * when is returns on the same CPU and still owns the + * context. + */ + old_fpu->last_cpu = cpu; + + trace_x86_fpu_regs_deactivated(old_fpu); + } +} + +/* + * Delay loading of the complete FPU state until the return to userland. + * PKRU is handled separately. + */ +static inline void switch_fpu_finish(void) +{ + if (cpu_feature_enabled(X86_FEATURE_FPU)) + set_thread_flag(TIF_NEED_FPU_LOAD); +} + +#endif /* _ASM_X86_FPU_SCHED_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 65fc87760011..e6087a61a844 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 18986d12277b..d708c97fd6fe 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d008e222a302..26edb1cd07a4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -41,7 +41,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ec407de7f47a..77cf9d87ad45 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -42,7 +42,7 @@ #include #include -#include +#include #include #include #include -- Gitee From a19977b3fcee56252614a1f4f8db859e08d1b299 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:21 +0200 Subject: [PATCH 108/188] x86/fpu: Clean up CPU feature tests mainline inclusion from mainline-v5.16-rc1 commit d06241f52cfe4a0580856ef2cfac90dc7f752cae category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit d06241f52cfe x86/fpu: Clean up CPU feature tests. -------------------------------- Further disintegration of internal.h: Move the CPU feature tests to a core header and remove the unused one. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.401510559@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 18 ------------------ arch/x86/kernel/fpu/core.c | 1 + arch/x86/kernel/fpu/internal.h | 11 +++++++++++ arch/x86/kernel/fpu/regset.c | 2 ++ 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 398c87c8e199..5da7528b3b2f 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -51,24 +51,6 @@ extern void fpu__resume_cpu(void); # define WARN_ON_FPU(x) ({ (void)(x); 0; }) #endif -/* - * FPU related CPU feature flag helper routines: - */ -static __always_inline __pure bool use_xsaveopt(void) -{ - return static_cpu_has(X86_FEATURE_XSAVEOPT); -} - -static __always_inline __pure bool use_xsave(void) -{ - return static_cpu_has(X86_FEATURE_XSAVE); -} - -static __always_inline __pure bool use_fxsr(void) -{ - return static_cpu_has(X86_FEATURE_FXSR); -} - extern union fpregs_state init_fpstate; extern void fpstate_init_user(union fpregs_state *state); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e6087a61a844..e9b51c75e2c9 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -17,6 +17,7 @@ #include #include +#include "internal.h" #include "xstate.h" #define CREATE_TRACE_POINTS diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index 036f84c236dd..a8aac21ba364 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -2,6 +2,17 @@ #ifndef __X86_KERNEL_FPU_INTERNAL_H #define __X86_KERNEL_FPU_INTERNAL_H +/* CPU feature check wrappers */ +static __always_inline __pure bool use_xsave(void) +{ + return cpu_feature_enabled(X86_FEATURE_XSAVE); +} + +static __always_inline __pure bool use_fxsr(void) +{ + return cpu_feature_enabled(X86_FEATURE_FXSR); +} + /* Init functions */ extern void fpu__init_prepare_fx_sw_frame(void); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 66ed317ebc0d..ccf0c59955f1 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -10,6 +10,8 @@ #include #include +#include "internal.h" + /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, * as the "regset->n" for the xstate regset will be updated based on the feature -- Gitee From a4376cffac056f669ac4bb2639188c9a7ea27d8b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:23 +0200 Subject: [PATCH 109/188] x86/fpu: Make os_xrstor_booting() private mainline inclusion from mainline-v5.16-rc1 commit b579d0c3750eedc0dee433edaba88206a8e4348a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit b579d0c3750e x86/fpu: Make os_xrstor_booting() private. -------------------------------- It's only required in the xstate init code. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.455836597@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 25 ------------------------- arch/x86/kernel/fpu/xstate.c | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5da7528b3b2f..3ad2ae73efa5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -225,31 +225,6 @@ static inline void fxsave(struct fxregs_state *fx) : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void os_xrstor_booting(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_fpstate(); - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); - else - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - - /* - * We should never fault when copying from a kernel buffer, and the FPU - * state we set at boot time should be valid. - */ - WARN_ON_FPU(err); -} - /* * Save processor xstate to xsave area. * diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index cc69088ea787..f33c96803b00 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -350,6 +350,29 @@ static void __init print_xstate_offset_size(void) } } +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static __init void os_xrstor_booting(struct xregs_state *xstate) +{ + u64 mask = xfeatures_mask_fpstate(); + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); + else + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + + /* + * We should never fault when copying from a kernel buffer, and the FPU + * state we set at boot time should be valid. + */ + WARN_ON_FPU(err); +} + /* * All supported features have either init state all zeros or are * handled in setup_init_fpu() individually. This is an explicit -- Gitee From d94af7210aba55fe3d7f19e6b27ca922f0b4a0c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:24 +0200 Subject: [PATCH 110/188] x86/fpu: Move os_xsave() and os_xrstor() to core mainline inclusion from mainline-v5.16-rc1 commit df95b0f1aa56dfa71a0ef657e3e62294ee6d9034 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit df95b0f1aa56 x86/fpu: Move os_xsave() and os_xrstor() to core. -------------------------------- Nothing outside the core code needs these. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.513368075@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 165 -------------------------- arch/x86/include/asm/fpu/xstate.h | 6 - arch/x86/kernel/fpu/signal.c | 1 + arch/x86/kernel/fpu/xstate.h | 174 ++++++++++++++++++++++++++++ 4 files changed, 175 insertions(+), 171 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 3ad2ae73efa5..b68f9940489f 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -161,171 +161,6 @@ static inline void fxsave(struct fxregs_state *fx) asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); } -/* These macros all use (%edi)/(%rdi) as the single memory argument. */ -#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" -#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" -#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" -#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" -#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" - -/* - * After this @err contains 0 on success or the trap number when the - * operation raises an exception. - */ -#define XSTATE_OP(op, st, lmask, hmask, err) \ - asm volatile("1:" op "\n\t" \ - "xor %[err], %[err]\n" \ - "2:\n\t" \ - _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ - : [err] "=a" (err) \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact - * format and supervisor states in addition to modified optimization in - * XSAVEOPT. - * - * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT - * supports modified optimization which is not supported by XSAVE. - * - * We use XSAVE as a fallback. - * - * The 661 label is defined in the ALTERNATIVE* macros as the address of the - * original instruction which gets replaced. We need to use it here as the - * address of the instruction where we might get an exception at. - */ -#define XSTATE_XSAVE(st, lmask, hmask, err) \ - asm volatile(ALTERNATIVE_2(XSAVE, \ - XSAVEOPT, X86_FEATURE_XSAVEOPT, \ - XSAVES, X86_FEATURE_XSAVES) \ - "\n" \ - "xor %[err], %[err]\n" \ - "3:\n" \ - ".pushsection .fixup,\"ax\"\n" \ - "4: movl $-2, %[err]\n" \ - "jmp 3b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(661b, 4b) \ - : [err] "=r" (err) \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact - * XSAVE area format. - */ -#define XSTATE_XRESTORE(st, lmask, hmask) \ - asm volatile(ALTERNATIVE(XRSTOR, \ - XRSTORS, X86_FEATURE_XSAVES) \ - "\n" \ - "3:\n" \ - _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \ - : \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * Save processor xstate to xsave area. - * - * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features - * and command line options. The choice is permanent until the next reboot. - */ -static inline void os_xsave(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_all; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON_FPU(!alternatives_patched); - - XSTATE_XSAVE(xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * Restore processor xstate from xsave area. - * - * Uses XRSTORS when XSAVES is used, XRSTOR otherwise. - */ -static inline void os_xrstor(struct xregs_state *xstate, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - - XSTATE_XRESTORE(xstate, lmask, hmask); -} - -/* - * Save xstate to user space xsave area. - * - * We don't use modified optimization because xrstor/xrstors might track - * a different application. - * - * We don't use compacted format xsave area for backward compatibility for - * old applications which don't understand the compacted format of the - * xsave area. - * - * The caller has to zero buf::header before calling this because XSAVE* - * does not touch the reserved fields in the header. - */ -static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) -{ - /* - * Include the features which are not xsaved/rstored by the kernel - * internally, e.g. PKRU. That's user space ABI and also required - * to allow the signal handler to modify PKRU. - */ - u64 mask = xfeatures_mask_uabi(); - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - stac(); - XSTATE_OP(XSAVE, buf, lmask, hmask, err); - clac(); - - return err; -} - -/* - * Restore xstate from user space xsave area. - */ -static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask) -{ - struct xregs_state *xstate = ((__force struct xregs_state *)buf); - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - stac(); - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - clac(); - - return err; -} - -/* - * Restore xstate from kernel space xsave area, return an error code instead of - * an exception. - */ -static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - if (cpu_feature_enabled(X86_FEATURE_XSAVES)) - XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); - else - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - - return err; -} - extern void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 109dfcc75299..b8cebc0ee420 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -78,12 +78,6 @@ XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - extern u64 xfeatures_mask_all; static inline u64 xfeatures_mask_supervisor(void) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 2a4d1d0b32d4..3b38c59ce3f8 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -17,6 +17,7 @@ #include #include "internal.h" +#include "xstate.h" static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init; static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init; diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 81f4202781ac..ae61baa97682 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -18,4 +18,178 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) extern void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, u32 pkru_val, enum xstate_copy_mode copy_mode); +/* XSAVE/XRSTOR wrapper functions */ + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +/* These macros all use (%edi)/(%rdi) as the single memory argument. */ +#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" +#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" +#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" +#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" + +/* + * After this @err contains 0 on success or the trap number when the + * operation raises an exception. + */ +#define XSTATE_OP(op, st, lmask, hmask, err) \ + asm volatile("1:" op "\n\t" \ + "xor %[err], %[err]\n" \ + "2:\n\t" \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ + : [err] "=a" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact + * format and supervisor states in addition to modified optimization in + * XSAVEOPT. + * + * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT + * supports modified optimization which is not supported by XSAVE. + * + * We use XSAVE as a fallback. + * + * The 661 label is defined in the ALTERNATIVE* macros as the address of the + * original instruction which gets replaced. We need to use it here as the + * address of the instruction where we might get an exception at. + */ +#define XSTATE_XSAVE(st, lmask, hmask, err) \ + asm volatile(ALTERNATIVE_2(XSAVE, \ + XSAVEOPT, X86_FEATURE_XSAVEOPT, \ + XSAVES, X86_FEATURE_XSAVES) \ + "\n" \ + "xor %[err], %[err]\n" \ + "3:\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "4: movl $-2, %[err]\n" \ + "jmp 3b\n" \ + ".popsection\n" \ + _ASM_EXTABLE(661b, 4b) \ + : [err] "=r" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact + * XSAVE area format. + */ +#define XSTATE_XRESTORE(st, lmask, hmask) \ + asm volatile(ALTERNATIVE(XRSTOR, \ + XRSTORS, X86_FEATURE_XSAVES) \ + "\n" \ + "3:\n" \ + _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \ + : \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * Save processor xstate to xsave area. + * + * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features + * and command line options. The choice is permanent until the next reboot. + */ +static inline void os_xsave(struct xregs_state *xstate) +{ + u64 mask = xfeatures_mask_all; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + WARN_ON_FPU(!alternatives_patched); + + XSTATE_XSAVE(xstate, lmask, hmask, err); + + /* We should never fault when copying to a kernel buffer: */ + WARN_ON_FPU(err); +} + +/* + * Restore processor xstate from xsave area. + * + * Uses XRSTORS when XSAVES is used, XRSTOR otherwise. + */ +static inline void os_xrstor(struct xregs_state *xstate, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + + XSTATE_XRESTORE(xstate, lmask, hmask); +} + +/* + * Save xstate to user space xsave area. + * + * We don't use modified optimization because xrstor/xrstors might track + * a different application. + * + * We don't use compacted format xsave area for backward compatibility for + * old applications which don't understand the compacted format of the + * xsave area. + * + * The caller has to zero buf::header before calling this because XSAVE* + * does not touch the reserved fields in the header. + */ +static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) +{ + /* + * Include the features which are not xsaved/rstored by the kernel + * internally, e.g. PKRU. That's user space ABI and also required + * to allow the signal handler to modify PKRU. + */ + u64 mask = xfeatures_mask_uabi(); + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + stac(); + XSTATE_OP(XSAVE, buf, lmask, hmask, err); + clac(); + + return err; +} + +/* + * Restore xstate from user space xsave area. + */ +static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask) +{ + struct xregs_state *xstate = ((__force struct xregs_state *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + stac(); + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + clac(); + + return err; +} + +/* + * Restore xstate from kernel space xsave area, return an error code instead of + * an exception. + */ +static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); + else + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + + return err; +} + + #endif -- Gitee From 9687a9033873d7ecb7a6feac0d35a3281a622127 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:26 +0200 Subject: [PATCH 111/188] x86/fpu: Move legacy ASM wrappers to core mainline inclusion from mainline-v5.16-rc1 commit 34002571cb4199a446f7582704424d20a01c276e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 34002571cb41 x86/fpu: Move legacy ASM wrappers to core. -------------------------------- Nothing outside the core code requires them. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.572439164@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 101 -------------------------- arch/x86/kernel/fpu/core.c | 1 + arch/x86/kernel/fpu/legacy.h | 108 ++++++++++++++++++++++++++++ arch/x86/kernel/fpu/signal.c | 1 + arch/x86/kernel/fpu/xstate.c | 1 + 5 files changed, 111 insertions(+), 101 deletions(-) create mode 100644 arch/x86/kernel/fpu/legacy.h diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index b68f9940489f..7722aadc3278 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -60,107 +60,6 @@ extern void fpstate_init_soft(struct swregs_state *soft); static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif -/* - * Returns 0 on success or the trap number when the operation raises an - * exception. - */ -#define user_insn(insn, output, input...) \ -({ \ - int err; \ - \ - might_fault(); \ - \ - asm volatile(ASM_STAC "\n" \ - "1: " #insn "\n" \ - "2: " ASM_CLAC "\n" \ - _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ - : [err] "=a" (err), output \ - : "0"(0), input); \ - err; \ -}) - -#define kernel_insn_err(insn, output, input...) \ -({ \ - int err; \ - asm volatile("1:" #insn "\n\t" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) - -#define kernel_insn(insn, output, input...) \ - asm volatile("1:" #insn "\n\t" \ - "2:\n" \ - _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE) \ - : output : input) - -static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) -{ - return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); -} - -static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); - else - return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - -} - -static inline void fxrstor(struct fxregs_state *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else - kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int fxrstor_safe(struct fxregs_state *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else - return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else - return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline void frstor(struct fregs_state *fx) -{ - kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int frstor_safe(struct fregs_state *fx) -{ - return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int frstor_from_user_sigframe(struct fregs_state __user *fx) -{ - return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline void fxsave(struct fxregs_state *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); - else - asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); -} - extern void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e9b51c75e2c9..a009c82336a4 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -18,6 +18,7 @@ #include #include "internal.h" +#include "legacy.h" #include "xstate.h" #define CREATE_TRACE_POINTS diff --git a/arch/x86/kernel/fpu/legacy.h b/arch/x86/kernel/fpu/legacy.h new file mode 100644 index 000000000000..2ff36b0f79e9 --- /dev/null +++ b/arch/x86/kernel/fpu/legacy.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_KERNEL_FPU_LEGACY_H +#define __X86_KERNEL_FPU_LEGACY_H + +#include + +/* + * Returns 0 on success or the trap number when the operation raises an + * exception. + */ +#define user_insn(insn, output, input...) \ +({ \ + int err; \ + \ + might_fault(); \ + \ + asm volatile(ASM_STAC "\n" \ + "1: " #insn "\n" \ + "2: " ASM_CLAC "\n" \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ + : [err] "=a" (err), output \ + : "0"(0), input); \ + err; \ +}) + +#define kernel_insn_err(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +#define kernel_insn(insn, output, input...) \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE) \ + : output : input) + +static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) +{ + return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); +} + +static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else + return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); + +} + +static inline void fxrstor(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int fxrstor_safe(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline void frstor(struct fregs_state *fx) +{ + kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int frstor_safe(struct fregs_state *fx) +{ + return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int frstor_from_user_sigframe(struct fregs_state __user *fx) +{ + return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + +#endif diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 3b38c59ce3f8..e0198b24e28c 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -17,6 +17,7 @@ #include #include "internal.h" +#include "legacy.h" #include "xstate.h" static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f33c96803b00..4cb4c766aa1d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -20,6 +20,7 @@ #include #include "internal.h" +#include "legacy.h" #include "xstate.h" #define for_each_extended_xfeature(bit, mask) \ -- Gitee From e8536e08bb33c7ba73db4ba779103bc7b5b33b1b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:28 +0200 Subject: [PATCH 112/188] x86/fpu: Make WARN_ON_FPU() private mainline inclusion from mainline-v5.16-rc1 commit cdcb6fa14e1499ff2b2a3f3e0938c7b3b7ef2cd6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit cdcb6fa14e14 x86/fpu: Make WARN_ON_FPU() private. -------------------------------- No point in being in global headers. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.628516182@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 9 --------- arch/x86/kernel/fpu/init.c | 2 ++ arch/x86/kernel/fpu/internal.h | 6 ++++++ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 7722aadc3278..f8413a509ba5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -42,15 +42,6 @@ extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); -/* - * Debugging facility: - */ -#ifdef CONFIG_X86_DEBUG_FPU -# define WARN_ON_FPU(x) WARN_ON_ONCE(x) -#else -# define WARN_ON_FPU(x) ({ (void)(x); 0; }) -#endif - extern union fpregs_state init_fpstate; extern void fpstate_init_user(union fpregs_state *state); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 545c91c723b8..24873dfe2dba 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -10,6 +10,8 @@ #include #include +#include "internal.h" + /* * Initialize the registers found in all CPUs, CR0 and CR4: */ diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index a8aac21ba364..5ddc09e03c2a 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -13,6 +13,12 @@ static __always_inline __pure bool use_fxsr(void) return cpu_feature_enabled(X86_FEATURE_FXSR); } +#ifdef CONFIG_X86_DEBUG_FPU +# define WARN_ON_FPU(x) WARN_ON_ONCE(x) +#else +# define WARN_ON_FPU(x) ({ (void)(x); 0; }) +#endif + /* Init functions */ extern void fpu__init_prepare_fx_sw_frame(void); -- Gitee From 3e7b801e2d5bc7e82a5e0426d1ed968acaf342f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:30 +0200 Subject: [PATCH 113/188] x86/fpu: Move fpregs_restore_userregs() to core mainline inclusion from mainline-v5.16-rc1 commit 9848fb96839bfd6ad4c00748842ccfd5bd3b0346 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 9848fb96839b x86/fpu: Move fpregs_restore_userregs() to core. -------------------------------- Only used internally in the FPU core code. While at it, convert to the percpu accessors which verify preemption is disabled. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.686806639@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 83 ---------------------------- arch/x86/kernel/fpu/context.h | 85 +++++++++++++++++++++++++++++ arch/x86/kernel/fpu/core.c | 1 + arch/x86/kernel/fpu/regset.c | 1 + arch/x86/kernel/fpu/signal.c | 1 + 5 files changed, 88 insertions(+), 83 deletions(-) create mode 100644 arch/x86/kernel/fpu/context.h diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index f8413a509ba5..74b7cc3d2e77 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -55,89 +55,6 @@ extern void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); -/* - * FPU context switch related helper methods: - */ - DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -/* - * The in-register FPU state for an FPU context on a CPU is assumed to be - * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx - * matches the FPU. - * - * If the FPU register state is valid, the kernel can skip restoring the - * FPU state from memory. - * - * Any code that clobbers the FPU registers or updates the in-memory - * FPU state for a task MUST let the rest of the kernel know that the - * FPU registers are no longer valid for this task. - * - * Either one of these invalidation functions is enough. Invalidate - * a resource you control: CPU if using the CPU for something else - * (with preemption disabled), FPU for the current task, or a task that - * is prevented from running by the current task. - */ -static inline void __cpu_invalidate_fpregs_state(void) -{ - __this_cpu_write(fpu_fpregs_owner_ctx, NULL); -} - -static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) -{ - fpu->last_cpu = -1; -} - -static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) -{ - return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; -} - -/* - * These generally need preemption protection to work, - * do try to avoid using these on their own: - */ -static inline void fpregs_deactivate(struct fpu *fpu) -{ - this_cpu_write(fpu_fpregs_owner_ctx, NULL); - trace_x86_fpu_regs_deactivated(fpu); -} - -static inline void fpregs_activate(struct fpu *fpu) -{ - this_cpu_write(fpu_fpregs_owner_ctx, fpu); - trace_x86_fpu_regs_activated(fpu); -} - -/* Internal helper for switch_fpu_return() and signal frame setup */ -static inline void fpregs_restore_userregs(void) -{ - struct fpu *fpu = ¤t->thread.fpu; - int cpu = smp_processor_id(); - - if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) - return; - - if (!fpregs_state_valid(fpu, cpu)) { - u64 mask; - - /* - * This restores _all_ xstate which has not been - * established yet. - * - * If PKRU is enabled, then the PKRU value is already - * correct because it was either set in switch_to() or in - * flush_thread(). So it is excluded because it might be - * not up to date in current->thread.fpu.xsave state. - */ - mask = xfeatures_mask_restore_user() | - xfeatures_mask_supervisor(); - restore_fpregs_from_fpstate(&fpu->state, mask); - - fpregs_activate(fpu); - fpu->last_cpu = cpu; - } - clear_thread_flag(TIF_NEED_FPU_LOAD); -} - #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h new file mode 100644 index 000000000000..e652282842c8 --- /dev/null +++ b/arch/x86/kernel/fpu/context.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_KERNEL_FPU_CONTEXT_H +#define __X86_KERNEL_FPU_CONTEXT_H + +#include +#include + +/* Functions related to FPU context tracking */ + +/* + * The in-register FPU state for an FPU context on a CPU is assumed to be + * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx + * matches the FPU. + * + * If the FPU register state is valid, the kernel can skip restoring the + * FPU state from memory. + * + * Any code that clobbers the FPU registers or updates the in-memory + * FPU state for a task MUST let the rest of the kernel know that the + * FPU registers are no longer valid for this task. + * + * Either one of these invalidation functions is enough. Invalidate + * a resource you control: CPU if using the CPU for something else + * (with preemption disabled), FPU for the current task, or a task that + * is prevented from running by the current task. + */ +static inline void __cpu_invalidate_fpregs_state(void) +{ + __this_cpu_write(fpu_fpregs_owner_ctx, NULL); +} + +static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) +{ + fpu->last_cpu = -1; +} + +static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) +{ + return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; +} + +static inline void fpregs_deactivate(struct fpu *fpu) +{ + __this_cpu_write(fpu_fpregs_owner_ctx, NULL); + trace_x86_fpu_regs_deactivated(fpu); +} + +static inline void fpregs_activate(struct fpu *fpu) +{ + __this_cpu_write(fpu_fpregs_owner_ctx, fpu); + trace_x86_fpu_regs_activated(fpu); +} + +/* Internal helper for switch_fpu_return() and signal frame setup */ +static inline void fpregs_restore_userregs(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + int cpu = smp_processor_id(); + + if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) + return; + + if (!fpregs_state_valid(fpu, cpu)) { + u64 mask; + + /* + * This restores _all_ xstate which has not been + * established yet. + * + * If PKRU is enabled, then the PKRU value is already + * correct because it was either set in switch_to() or in + * flush_thread(). So it is excluded because it might be + * not up to date in current->thread.fpu.xsave state. + */ + mask = xfeatures_mask_restore_user() | + xfeatures_mask_supervisor(); + restore_fpregs_from_fpstate(&fpu->state, mask); + + fpregs_activate(fpu); + fpu->last_cpu = cpu; + } + clear_thread_flag(TIF_NEED_FPU_LOAD); +} + +#endif diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a009c82336a4..739728889b54 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -17,6 +17,7 @@ #include #include +#include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index ccf0c59955f1..a40150e350b6 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -10,6 +10,7 @@ #include #include +#include "context.h" #include "internal.h" /* diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index e0198b24e28c..32dbcde72fbe 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -16,6 +16,7 @@ #include #include +#include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" -- Gitee From f3448b211890556b2c01770176fda40a1eecbed8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:31 +0200 Subject: [PATCH 114/188] x86/fpu: Move mxcsr related code to core mainline inclusion from mainline-v5.16-rc1 commit d9d005f32aac7362a1998f4b7fdf8874e91546bd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit d9d005f32aac x86/fpu: Move mxcsr related code to core. -------------------------------- No need to expose that to code which only needs the XCR0 accessors. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.740012411@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xcr.h | 11 ----------- arch/x86/kernel/fpu/init.c | 1 + arch/x86/kernel/fpu/legacy.h | 7 +++++++ arch/x86/kernel/fpu/regset.c | 1 + arch/x86/kernel/fpu/xstate.c | 3 ++- arch/x86/kvm/svm/sev.c | 1 + 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h index 1c7ab8d95da5..79f95d3787e2 100644 --- a/arch/x86/include/asm/fpu/xcr.h +++ b/arch/x86/include/asm/fpu/xcr.h @@ -2,17 +2,6 @@ #ifndef _ASM_X86_FPU_XCR_H #define _ASM_X86_FPU_XCR_H -/* - * MXCSR and XCR definitions: - */ - -static inline void ldmxcsr(u32 mxcsr) -{ - asm volatile("ldmxcsr %0" :: "m" (mxcsr)); -} - -extern unsigned int mxcsr_feature_mask; - #define XCR_XFEATURE_ENABLED_MASK 0x00000000 static inline u64 xgetbv(u32 index) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 24873dfe2dba..e77084a6ae7c 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -11,6 +11,7 @@ #include #include "internal.h" +#include "legacy.h" /* * Initialize the registers found in all CPUs, CR0 and CR4: diff --git a/arch/x86/kernel/fpu/legacy.h b/arch/x86/kernel/fpu/legacy.h index 2ff36b0f79e9..17c26b164c63 100644 --- a/arch/x86/kernel/fpu/legacy.h +++ b/arch/x86/kernel/fpu/legacy.h @@ -4,6 +4,13 @@ #include +extern unsigned int mxcsr_feature_mask; + +static inline void ldmxcsr(u32 mxcsr) +{ + asm volatile("ldmxcsr %0" :: "m" (mxcsr)); +} + /* * Returns 0 on success or the trap number when the operation raises an * exception. diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index a40150e350b6..3d8ed45da166 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -12,6 +12,7 @@ #include "context.h" #include "internal.h" +#include "legacy.h" /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4cb4c766aa1d..162fb0037c0e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -14,8 +14,9 @@ #include #include -#include #include +#include +#include #include diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 31488ece627e..30c428834f81 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -16,6 +16,7 @@ #include #include +#include #include "x86.h" #include "svm.h" -- Gitee From ef2cb4b6b4a79a4a24b7818c45530b045dfebd4b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:33 +0200 Subject: [PATCH 115/188] x86/fpu: Move fpstate functions to api.h mainline inclusion from mainline-v5.16-rc1 commit 90489f1dee8b703a3301857917c0aba0b22b5d83 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 90489f1dee8b x86/fpu: Move fpstate functions to api.h. -------------------------------- Move function declarations which need to be globally available to api.h where they belong. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.792363754@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 9 +++++++++ arch/x86/include/asm/fpu/internal.h | 9 --------- arch/x86/kernel/fpu/internal.h | 3 +++ arch/x86/math-emu/fpu_entry.c | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 79073383a9bc..d2a063377afa 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -83,6 +83,15 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); static inline void update_pasid(void) { } +#ifdef CONFIG_MATH_EMULATION +extern void fpstate_init_soft(struct swregs_state *soft); +#else +static inline void fpstate_init_soft(struct swregs_state *soft) {} +#endif + +/* fpstate */ +extern union fpregs_state init_fpstate; + /* fpstate-related functions which are exported to KVM */ extern void fpu_init_fpstate_user(struct fpu *fpu); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 74b7cc3d2e77..d8bb49134ebb 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -42,15 +42,6 @@ extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); -extern union fpregs_state init_fpstate; -extern void fpstate_init_user(union fpregs_state *state); - -#ifdef CONFIG_MATH_EMULATION -extern void fpstate_init_soft(struct swregs_state *soft); -#else -static inline void fpstate_init_soft(struct swregs_state *soft) {} -#endif - extern void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index 5ddc09e03c2a..bd7f813242dd 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -22,4 +22,7 @@ static __always_inline __pure bool use_fxsr(void) /* Init functions */ extern void fpu__init_prepare_fx_sw_frame(void); +/* Used in init.c */ +extern void fpstate_init_user(union fpregs_state *state); + #endif diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 8679a9d6c47f..50195e249753 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "fpu_system.h" #include "fpu_emu.h" -- Gitee From 0dbf68f5ecba94c5ebd7b2c633578ba644a7b9b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:35 +0200 Subject: [PATCH 116/188] x86/fpu: Remove internal.h dependency from fpu/signal.h mainline inclusion from mainline-v5.16-rc1 commit 0ae67cc34f765078a63137120e4567ad2f050b75 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 0ae67cc34f76 x86/fpu: Remove internal.h dependency from fpu/signal.h. -------------------------------- In order to remove internal.h make signal.h independent of it. Include asm/fpu/xstate.h to fix a missing update_regset_xstate_info() prototype, which is Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.844565975@linutronix.de Signed-off-by: Lin Wang --- arch/x86/ia32/ia32_signal.c | 1 - arch/x86/include/asm/fpu/api.h | 3 +++ arch/x86/include/asm/fpu/internal.h | 7 ------- arch/x86/include/asm/fpu/signal.h | 13 +++++++++++++ arch/x86/kernel/fpu/signal.c | 1 - arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/signal.c | 1 - arch/x86/mm/extable.c | 3 ++- 8 files changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index e3dd76cf909e..19c1f8a5f26f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index d2a063377afa..2e395fa5fc86 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -89,6 +89,9 @@ extern void fpstate_init_soft(struct swregs_state *soft); static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif +/* State tracking */ +DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + /* fpstate */ extern union fpregs_state init_fpstate; diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index d8bb49134ebb..8f97d3e375de 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -26,7 +26,6 @@ /* * High level FPU state handling functions: */ -extern bool fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__clear_user_states(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); @@ -42,10 +41,4 @@ extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); -extern void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); - -extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); - -DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); - #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 04868a76239a..9a63a21c219d 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -5,6 +5,11 @@ #ifndef _ASM_X86_FPU_SIGNAL_H #define _ASM_X86_FPU_SIGNAL_H +#include +#include + +#include + #ifdef CONFIG_X86_64 # include # include @@ -31,4 +36,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long fpu__get_fpstate_size(void); +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); +extern void fpu__clear_user_states(struct fpu *fpu); +extern bool fpu__restore_sig(void __user *buf, int ia32_frame); + +extern void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); + +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); + #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 32dbcde72fbe..274cd58b3dc8 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -7,7 +7,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index bedca011459c..7e58d2103bf3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -29,9 +29,9 @@ #include #include -#include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 25d9c826d158..5a26695dcc9c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -30,7 +30,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 69a3acc712f1..7489c9fc598e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -4,7 +4,8 @@ #include #include -#include +#include +#include #include #include #include -- Gitee From 301208f0a4869842d5ff17f0867f5eff2ceb1b40 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:36 +0200 Subject: [PATCH 117/188] x86/sev: Include fpu/xcr.h mainline inclusion from mainline-v5.16-rc1 commit ff0c37e191f2629bf2776dbd95db5d06f704ab93 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ff0c37e191f2 x86/sev: Include fpu/xcr.h. -------------------------------- Include the header which only provides the XCR accessors. That's all what is needed here. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.896573039@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/sev-es.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index c222fab112cb..116840eb0ae1 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include -- Gitee From f891ed5084c695574cc8074fa7e5bdea742594a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:38 +0200 Subject: [PATCH 118/188] x86/fpu: Mop up the internal.h leftovers mainline inclusion from mainline-v5.16-rc1 commit 6415bb80926379310afd74800415f6ebf4bb5c31 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 6415bb809263 x86/fpu: Mop up the internal.h leftovers. -------------------------------- Move the global interfaces to api.h and the rest into the core. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011539.948837194@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 10 ++++++++++ arch/x86/include/asm/fpu/internal.h | 18 ------------------ arch/x86/kernel/fpu/init.c | 1 + arch/x86/kernel/fpu/xstate.h | 3 +++ 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 2e395fa5fc86..ced6a3f8bac0 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -83,6 +83,16 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); static inline void update_pasid(void) { } +/* Trap handling */ +extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern void fpu_sync_fpstate(struct fpu *fpu); + +/* Boot, hotplug and resume */ +extern void fpu__init_cpu(void); +extern void fpu__init_system(struct cpuinfo_x86 *c); +extern void fpu__init_check_bugs(void); +extern void fpu__resume_cpu(void); + #ifdef CONFIG_MATH_EMULATION extern void fpstate_init_soft(struct swregs_state *soft); #else diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 8f97d3e375de..8df83e887ff6 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -23,22 +23,4 @@ #include #include -/* - * High level FPU state handling functions: - */ -extern void fpu__clear_user_states(struct fpu *fpu); -extern int fpu__exception_code(struct fpu *fpu, int trap_nr); - -extern void fpu_sync_fpstate(struct fpu *fpu); - -/* - * Boot time FPU initialization functions: - */ -extern void fpu__init_cpu(void); -extern void fpu__init_system_xstate(void); -extern void fpu__init_cpu_xstate(void); -extern void fpu__init_system(struct cpuinfo_x86 *c); -extern void fpu__init_check_bugs(void); -extern void fpu__resume_cpu(void); - #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index e77084a6ae7c..d420d29e58be 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -12,6 +12,7 @@ #include "internal.h" #include "legacy.h" +#include "xstate.h" /* * Initialize the registers found in all CPUs, CR0 and CR4: diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index ae61baa97682..bb6d7d298d2a 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -18,6 +18,9 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) extern void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, u32 pkru_val, enum xstate_copy_mode copy_mode); +extern void fpu__init_cpu_xstate(void); +extern void fpu__init_system_xstate(void); + /* XSAVE/XRSTOR wrapper functions */ #ifdef CONFIG_X86_64 -- Gitee From 301c59fe3cc75f6d8a82881105aef0716df8da28 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:39 +0200 Subject: [PATCH 119/188] x86/fpu: Replace the includes of fpu/internal.h mainline inclusion from mainline-v5.16-rc1 commit b56d2795b29792c465cc8ef036abad5127a003fb category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit b56d2795b297 x86/fpu: Replace the includes of fpu/internal.h. -------------------------------- Now that the file is empty, fixup all references with the proper includes and delete the former kitchen sink. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011540.001197214@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/internal.h | 26 -------------------------- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/fpu/bugs.c | 2 +- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/init.c | 2 +- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/xstate.c | 1 - arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/traps.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/power/cpu.c | 2 +- 12 files changed, 10 insertions(+), 37 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 8df83e887ff6..e69de29bb2d1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef _ASM_X86_FPU_INTERNAL_H -#define _ASM_X86_FPU_INTERNAL_H - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2a21046846b6..a0b9d3d83bd0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 186e3014a306..e0eada235547 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index 2954fab15e51..794e70151203 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -2,7 +2,7 @@ /* * x86 FPU bug checks: */ -#include +#include /* * Boot time CPU/FPU FDIV bug detection code: diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 739728889b54..9bb0c1c45e27 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -6,7 +6,7 @@ * General FPU state handling cleanups * Gareth Hughes , May 2000 */ -#include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index d420d29e58be..23791355ca67 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -2,7 +2,7 @@ /* * x86 FPU boot time init code: */ -#include +#include #include #include diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 3d8ed45da166..01a1d97c3cb6 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 162fb0037c0e..6039a248eacd 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8baff500914e..12cc3cc1908c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -69,7 +69,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4c0fcbea60ad..96c8351ce678 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6bb07e495eca..6bd6cd341dc3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index decebcd8ee1c..2743fbb6c70b 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include -- Gitee From 99910745f9e6dda0fd876e6cb9f607586ad25128 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:41 +0200 Subject: [PATCH 120/188] x86/fpu: Provide a proper function for ex_handler_fprestore() mainline inclusion from mainline-v5.16-rc1 commit 079ec41b22b952cdf3126527d735e373c9125f6d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 079ec41b22b9 x86/fpu: Provide a proper function for ex_handler_fprestore(). -------------------------------- To make upcoming changes for support of dynamically enabled features simpler, provide a proper function for the exception handler which removes exposure of FPU internals. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211015011540.053515012@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 4 +--- arch/x86/kernel/fpu/core.c | 5 +++++ arch/x86/kernel/fpu/internal.h | 2 ++ arch/x86/mm/extable.c | 5 ++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index ced6a3f8bac0..a1f197ff401b 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -86,6 +86,7 @@ static inline void update_pasid(void) { } /* Trap handling */ extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern void fpu_sync_fpstate(struct fpu *fpu); +extern void fpu_reset_from_exception_fixup(void); /* Boot, hotplug and resume */ extern void fpu__init_cpu(void); @@ -102,9 +103,6 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {} /* State tracking */ DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -/* fpstate */ -extern union fpregs_state init_fpstate; - /* fpstate-related functions which are exported to KVM */ extern void fpu_init_fpstate_user(struct fpu *fpu); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9bb0c1c45e27..79f2e8ddd10c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -155,6 +155,11 @@ void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) } } +void fpu_reset_from_exception_fixup(void) +{ + restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); +} + #if IS_ENABLED(CONFIG_KVM) void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) { diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index bd7f813242dd..479f2db6e160 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -2,6 +2,8 @@ #ifndef __X86_KERNEL_FPU_INTERNAL_H #define __X86_KERNEL_FPU_INTERNAL_H +extern union fpregs_state init_fpstate; + /* CPU feature check wrappers */ static __always_inline __pure bool use_xsave(void) { diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 7489c9fc598e..8b6dddec86ac 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -4,8 +4,7 @@ #include #include -#include -#include +#include #include #include #include @@ -48,7 +47,7 @@ static bool ex_handler_fprestore(const struct exception_table_entry *fixup, WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.", (void *)instruction_pointer(regs)); - restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); + fpu_reset_from_exception_fixup(); return true; } -- Gitee From d4df577f08ceea60dd2580991b1439155d7fbb53 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 03:16:17 +0200 Subject: [PATCH 121/188] x86/fpu: Replace KVMs home brewed FPU copy to user mainline inclusion from mainline-v5.16-rc1 commit bf5d00470787067ff27593c6a097b5eb6e01168e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit bf5d00470787 x86/fpu: Replace KVMs home brewed FPU copy to user. -------------------------------- Similar to the copy from user function the FPU core has this already implemented with all bells and whistles. Get rid of the duplicated code and use the core functionality. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: kvm@vger.kernel.org Link: https://lkml.kernel.org/r/20211015011539.244101845@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 1 + arch/x86/kernel/fpu/core.c | 18 +++++++++++ arch/x86/kvm/x86.c | 56 ++-------------------------------- 3 files changed, 22 insertions(+), 53 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a1f197ff401b..360cef8367a7 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -110,5 +110,6 @@ extern void fpu_init_fpstate_user(struct fpu *fpu); extern void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask); extern int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, u32 *pkru); +extern void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf, unsigned int size, u32 pkru); #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 79f2e8ddd10c..ac540a7d410e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -184,6 +184,24 @@ void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) } EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpu); +void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf, + unsigned int size, u32 pkru) +{ + union fpregs_state *kstate = &fpu->state; + union fpregs_state *ustate = buf; + struct membuf mb = { .p = buf, .left = size }; + + if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { + __copy_xstate_to_uabi_buf(mb, &kstate->xsave, pkru, + XSTATE_COPY_XSAVE); + } else { + memcpy(&ustate->fxsave, &kstate->fxsave, sizeof(ustate->fxsave)); + /* Make it restorable on a XSAVE enabled host */ + ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE; + } +} +EXPORT_SYMBOL_GPL(fpu_copy_fpstate_to_kvm_uabi); + int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, u32 *vpkru) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index def5897c9bce..536ca4ceb504 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4521,62 +4521,12 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } -static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) -{ - struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave; - u64 xstate_bv = xsave->header.xfeatures; - u64 valid; - - /* - * Copy legacy XSAVE area, to avoid complications with CPUID - * leaves 0 and 1 in the loop below. - */ - memcpy(dest, xsave, XSAVE_HDR_OFFSET); - - /* Set XSTATE_BV */ - xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE; - *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; - - /* - * Copy each region from the possibly compacted offset to the - * non-compacted offset. - */ - valid = xstate_bv & ~XFEATURE_MASK_FPSSE; - while (valid) { - u32 size, offset, ecx, edx; - u64 xfeature_mask = valid & -valid; - int xfeature_nr = fls64(xfeature_mask) - 1; - void *src; - - cpuid_count(XSTATE_CPUID, xfeature_nr, - &size, &offset, &ecx, &edx); - - if (xfeature_nr == XFEATURE_PKRU) { - memcpy(dest + offset, &vcpu->arch.pkru, - sizeof(vcpu->arch.pkru)); - } else { - src = get_xsave_addr(xsave, xfeature_nr); - if (src) - memcpy(dest + offset, src, size); - } - - valid -= xfeature_mask; - } -} - static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - if (boot_cpu_has(X86_FEATURE_XSAVE)) { - memset(guest_xsave, 0, sizeof(struct kvm_xsave)); - fill_xsave((u8 *) guest_xsave->region, vcpu); - } else { - memcpy(guest_xsave->region, - &vcpu->arch.guest_fpu->state.fxsave, - sizeof(struct fxregs_state)); - *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = - XFEATURE_MASK_FPSSE; - } + fpu_copy_fpstate_to_kvm_uabi(vcpu->arch.guest_fpu, guest_xsave->region, + sizeof(guest_xsave->region), + vcpu->arch.pkru); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, -- Gitee From 8a759d575b9a462844164544d121d3229a60304b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:27 +0200 Subject: [PATCH 122/188] x86/fpu: Provide struct fpstate mainline inclusion from mainline-v5.16-rc1 commit 87d0e5be0fac322f4415128def9f16a71a267a40 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 87d0e5be0fac x86/fpu: Provide struct fpstate. -------------------------------- New xfeatures will not longer be automatically stored in the regular XSAVE buffer in thread_struct::fpu. The kernel will provide the default sized buffer for storing the regular features up to AVX512 in thread_struct::fpu and if a task requests to use one of the new features then the register storage has to be extended. The state will be accessed via a pointer in thread_struct::fpu which defaults to the builtin storage and can be switched when extended storage is required. To avoid conditionals all over the code, create a new container for the register storage which will gain other information, e.g. size, feature masks etc., later. For now it just contains the register storage, which gives it exactly the same layout as the exiting fpu::state. Stick fpu::state and the new fpu::__fpstate into an anonymous union and initialize the pointer. Add build time checks to validate that both are at the same place and have the same size. This allows step by step conversion of all users. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.234458659@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 20 +++++++++++++++++++- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/kernel/fpu/core.c | 11 ++++++++++- arch/x86/kernel/fpu/init.c | 9 +++++++-- arch/x86/kernel/fpu/internal.h | 1 + 5 files changed, 39 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index f5a38a5f3ae1..3bb6277efbb5 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -309,6 +309,13 @@ union fpregs_state { u8 __padding[PAGE_SIZE]; }; +struct fpstate { + /* @regs: The register state union for all supported formats */ + union fpregs_state regs; + + /* @regs is dynamically sized! Don't add anything after @regs! */ +} __aligned(64); + /* * Highest level per task FPU state data structure that * contains the FPU register state plus various FPU @@ -336,6 +343,14 @@ struct fpu { */ unsigned long avx512_timestamp; + /* + * @fpstate: + * + * Pointer to the active struct fpstate. Initialized to + * point at @__fpstate below. + */ + struct fpstate *fpstate; + /* * @state: * @@ -345,7 +360,10 @@ struct fpu { * copy. If the task context-switches away then they get * saved here and represent the FPU state. */ - union fpregs_state state; + union { + struct fpstate __fpstate; + union fpregs_state state; + }; /* * WARNING: 'state' is dynamically-sized. Do not put * anything after it here. diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8ca33a5e214d..7344b54e4d00 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -554,11 +554,11 @@ struct thread_struct { */ }; -/* Whitelist the FPU state from the task_struct for hardened usercopy. */ +/* Whitelist the FPU register state from the task_struct for hardened usercopy. */ static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { - *offset = offsetof(struct thread_struct, fpu.state); + *offset = offsetof(struct thread_struct, fpu.__fpstate.regs); *size = fpu_kernel_xstate_size; } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ac540a7d410e..d7643115a7ee 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -337,10 +337,17 @@ void fpstate_init_user(union fpregs_state *state) fpstate_init_fstate(&state->fsave); } +void fpstate_reset(struct fpu *fpu) +{ + /* Set the fpstate pointer to the default fpstate */ + fpu->fpstate = &fpu->__fpstate; +} + #if IS_ENABLED(CONFIG_KVM) void fpu_init_fpstate_user(struct fpu *fpu) { - fpstate_init_user(&fpu->state); + fpstate_reset(fpu); + fpstate_init_user(&fpu->fpstate->regs); } EXPORT_SYMBOL_GPL(fpu_init_fpstate_user); #endif @@ -354,6 +361,8 @@ int fpu_clone(struct task_struct *dst) /* The new task's FPU state cannot be valid in the hardware. */ dst_fpu->last_cpu = -1; + fpstate_reset(dst_fpu); + if (!cpu_feature_enabled(X86_FEATURE_FPU)) return 0; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 23791355ca67..31ecbfba9ff7 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -165,7 +165,7 @@ static void __init fpu__init_task_struct_size(void) * Subtract off the static size of the register state. * It potentially has a bunch of padding. */ - task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state); + task_size -= sizeof(current->thread.fpu.__fpstate.regs); /* * Add back the dynamically-calculated register state @@ -180,10 +180,14 @@ static void __init fpu__init_task_struct_size(void) * you hit a compile error here, check the structure to * see if something got added to the end. */ - CHECK_MEMBER_AT_END_OF(struct fpu, state); + CHECK_MEMBER_AT_END_OF(struct fpu, __fpstate); CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); CHECK_MEMBER_AT_END_OF(struct task_struct, thread); + BUILD_BUG_ON(sizeof(struct fpstate) != sizeof(union fpregs_state)); + BUILD_BUG_ON(offsetof(struct thread_struct, fpu.state) != + offsetof(struct thread_struct, fpu.__fpstate)); + arch_task_struct_size = task_size; } @@ -220,6 +224,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ void __init fpu__init_system(struct cpuinfo_x86 *c) { + fpstate_reset(¤t->thread.fpu); fpu__init_system_early_generic(c); /* diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index 479f2db6e160..63bd75fe95a8 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -26,5 +26,6 @@ extern void fpu__init_prepare_fx_sw_frame(void); /* Used in init.c */ extern void fpstate_init_user(union fpregs_state *state); +extern void fpstate_reset(struct fpu *fpu); #endif -- Gitee From 0a7657997e0af878646e8623322448f4d0da21d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:28 +0200 Subject: [PATCH 123/188] x86/fpu: Convert fpstate_init() to struct fpstate mainline inclusion from mainline-v5.16-rc1 commit f83ac56acdad0815366bb541b6cc9d24f6cea2b2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit f83ac56acdad x86/fpu: Convert fpstate_init() to struct fpstate. -------------------------------- Convert fpstate_init() and related code to the new register storage mechanism in preparation for dynamically sized buffers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.292157401@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 44 +++++++++++++++++----------------- arch/x86/kernel/fpu/internal.h | 4 ++-- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 12 +++++----- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d7643115a7ee..19e14b5c519d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -29,7 +29,7 @@ * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: */ -union fpregs_state init_fpstate __ro_after_init; +struct fpstate init_fpstate __ro_after_init; /* * Track whether the kernel is using the FPU state @@ -157,7 +157,7 @@ void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) void fpu_reset_from_exception_fixup(void) { - restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); + restore_fpregs_from_fpstate(&init_fpstate.regs, xfeatures_mask_fpstate()); } #if IS_ENABLED(CONFIG_KVM) @@ -297,24 +297,24 @@ static inline unsigned int init_fpstate_copy_size(void) return fpu_kernel_xstate_size; /* XSAVE(S) just needs the legacy and the xstate header part */ - return sizeof(init_fpstate.xsave); + return sizeof(init_fpstate.regs.xsave); } -static inline void fpstate_init_fxstate(struct fxregs_state *fx) +static inline void fpstate_init_fxstate(struct fpstate *fpstate) { - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; + fpstate->regs.fxsave.cwd = 0x37f; + fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT; } /* * Legacy x87 fpstate state init: */ -static inline void fpstate_init_fstate(struct fregs_state *fp) +static inline void fpstate_init_fstate(struct fpstate *fpstate) { - fp->cwd = 0xffff037fu; - fp->swd = 0xffff0000u; - fp->twd = 0xffffffffu; - fp->fos = 0xffff0000u; + fpstate->regs.fsave.cwd = 0xffff037fu; + fpstate->regs.fsave.swd = 0xffff0000u; + fpstate->regs.fsave.twd = 0xffffffffu; + fpstate->regs.fsave.fos = 0xffff0000u; } /* @@ -322,19 +322,19 @@ static inline void fpstate_init_fstate(struct fregs_state *fp) * 1) Early boot to setup init_fpstate for non XSAVE systems * 2) fpu_init_fpstate_user() which is invoked from KVM */ -void fpstate_init_user(union fpregs_state *state) +void fpstate_init_user(struct fpstate *fpstate) { if (!cpu_feature_enabled(X86_FEATURE_FPU)) { - fpstate_init_soft(&state->soft); + fpstate_init_soft(&fpstate->regs.soft); return; } - xstate_init_xcomp_bv(&state->xsave, xfeatures_mask_uabi()); + xstate_init_xcomp_bv(&fpstate->regs.xsave, xfeatures_mask_uabi()); if (cpu_feature_enabled(X86_FEATURE_FXSR)) - fpstate_init_fxstate(&state->fxsave); + fpstate_init_fxstate(fpstate); else - fpstate_init_fstate(&state->fsave); + fpstate_init_fstate(fpstate); } void fpstate_reset(struct fpu *fpu) @@ -347,7 +347,7 @@ void fpstate_reset(struct fpu *fpu) void fpu_init_fpstate_user(struct fpu *fpu) { fpstate_reset(fpu); - fpstate_init_user(&fpu->fpstate->regs); + fpstate_init_user(fpu->fpstate); } EXPORT_SYMBOL_GPL(fpu_init_fpstate_user); #endif @@ -378,7 +378,7 @@ int fpu_clone(struct task_struct *dst) */ if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) { /* Clear out the minimal state */ - memcpy(&dst_fpu->state, &init_fpstate, + memcpy(&dst_fpu->state, &init_fpstate.regs, init_fpstate_copy_size()); return 0; } @@ -435,11 +435,11 @@ void fpu__drop(struct fpu *fpu) static inline void restore_fpregs_from_init_fpstate(u64 features_mask) { if (use_xsave()) - os_xrstor(&init_fpstate.xsave, features_mask); + os_xrstor(&init_fpstate.regs.xsave, features_mask); else if (use_fxsr()) - fxrstor(&init_fpstate.fxsave); + fxrstor(&init_fpstate.regs.fxsave); else - frstor(&init_fpstate.fsave); + frstor(&init_fpstate.regs.fsave); pkru_write_default(); } @@ -466,7 +466,7 @@ static void fpu_reset_fpstate(void) * user space as PKRU is eagerly written in switch_to() and * flush_thread(). */ - memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size()); + memcpy(&fpu->state, &init_fpstate.regs, init_fpstate_copy_size()); set_thread_flag(TIF_NEED_FPU_LOAD); fpregs_unlock(); } diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index 63bd75fe95a8..e1d8a352f12d 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -2,7 +2,7 @@ #ifndef __X86_KERNEL_FPU_INTERNAL_H #define __X86_KERNEL_FPU_INTERNAL_H -extern union fpregs_state init_fpstate; +extern struct fpstate init_fpstate; /* CPU feature check wrappers */ static __always_inline __pure bool use_xsave(void) @@ -25,7 +25,7 @@ static __always_inline __pure bool use_fxsr(void) extern void fpu__init_prepare_fx_sw_frame(void); /* Used in init.c */ -extern void fpstate_init_user(union fpregs_state *state); +extern void fpstate_init_user(struct fpstate *fpstate); extern void fpstate_reset(struct fpu *fpu); #endif diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 274cd58b3dc8..416a110f2196 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -243,7 +243,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, ret = fxrstor_from_user_sigframe(buf); if (!ret && unlikely(init_bv)) - os_xrstor(&init_fpstate.xsave, init_bv); + os_xrstor(&init_fpstate.regs.xsave, init_bv); return ret; } else if (use_fxsr()) { return fxrstor_from_user_sigframe(buf); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6039a248eacd..a096ca8aa741 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -408,12 +408,12 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); print_xstate_features(); - xstate_init_xcomp_bv(&init_fpstate.xsave, xfeatures_mask_all); + xstate_init_xcomp_bv(&init_fpstate.regs.xsave, xfeatures_mask_all); /* * Init all the features state with header.xfeatures being 0x0 */ - os_xrstor_booting(&init_fpstate.xsave); + os_xrstor_booting(&init_fpstate.regs.xsave); /* * All components are now in init state. Read the state back so @@ -431,7 +431,7 @@ static void __init setup_init_fpu_buf(void) * state is all zeroes or if not to add the necessary handling * here. */ - fxsave(&init_fpstate.fxsave); + fxsave(&init_fpstate.regs.fxsave); } static int xfeature_uncompacted_offset(int xfeature_nr) @@ -672,11 +672,11 @@ static unsigned int __init get_xsave_size(void) */ static bool __init is_supported_xstate_size(unsigned int test_xstate_size) { - if (test_xstate_size <= sizeof(union fpregs_state)) + if (test_xstate_size <= sizeof(init_fpstate.regs)) return true; pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n", - sizeof(union fpregs_state), test_xstate_size); + sizeof(init_fpstate.regs), test_xstate_size); return false; } @@ -981,7 +981,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, u32 pkru_val, enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); - struct xregs_state *xinit = &init_fpstate.xsave; + struct xregs_state *xinit = &init_fpstate.regs.xsave; struct xstate_header header; unsigned int zerofrom; u64 mask; -- Gitee From 383b73bd0186d56f12a6ed4a6a1b1e7d845a13d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:30 +0200 Subject: [PATCH 124/188] x86/fpu: Convert restore_fpregs_from_fpstate() to struct fpstate mainline inclusion from mainline-v5.16-rc1 commit 18b3fa1ad15fa8d777ac32f117553cce1a968460 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 18b3fa1ad15f x86/fpu: Convert restore_fpregs_from_fpstate() to struct fpstate. -------------------------------- Convert restore_fpregs_from_fpstate() and related code to the new register storage mechanism in preparation for dynamically sized buffers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.347395546@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/signal.h | 2 +- arch/x86/kernel/fpu/context.h | 2 +- arch/x86/kernel/fpu/core.c | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 9a63a21c219d..22b0273a8bf1 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -40,7 +40,7 @@ extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size extern void fpu__clear_user_states(struct fpu *fpu); extern bool fpu__restore_sig(void __user *buf, int ia32_frame); -extern void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); +extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask); extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index e652282842c8..f8f510519688 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -74,7 +74,7 @@ static inline void fpregs_restore_userregs(void) */ mask = xfeatures_mask_restore_user() | xfeatures_mask_supervisor(); - restore_fpregs_from_fpstate(&fpu->state, mask); + restore_fpregs_from_fpstate(fpu->fpstate, mask); fpregs_activate(fpu); fpu->last_cpu = cpu; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 19e14b5c519d..03926bf00971 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -129,7 +129,7 @@ void save_fpregs_to_fpstate(struct fpu *fpu) frstor(&fpu->state.fsave); } -void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) +void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask) { /* * AMD K7/K8 and later CPUs up to Zen don't save/restore @@ -146,18 +146,18 @@ void restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) } if (use_xsave()) { - os_xrstor(&fpstate->xsave, mask); + os_xrstor(&fpstate->regs.xsave, mask); } else { if (use_fxsr()) - fxrstor(&fpstate->fxsave); + fxrstor(&fpstate->regs.fxsave); else - frstor(&fpstate->fsave); + frstor(&fpstate->regs.fsave); } } void fpu_reset_from_exception_fixup(void) { - restore_fpregs_from_fpstate(&init_fpstate.regs, xfeatures_mask_fpstate()); + restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); } #if IS_ENABLED(CONFIG_KVM) @@ -176,7 +176,7 @@ void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) if (rstor) { restore_mask &= xfeatures_mask_fpstate(); - restore_fpregs_from_fpstate(&rstor->state, restore_mask); + restore_fpregs_from_fpstate(rstor->fpstate, restore_mask); } fpregs_mark_activate(); -- Gitee From 7871fe2924c14e7109605cd4118dbc51f977f705 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:31 +0200 Subject: [PATCH 125/188] x86/fpu: Replace KVMs xstate component clearing mainline inclusion from mainline-v5.16-rc1 commit 087df48c298c1cb829f4cd468d90f93234b1bc44 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 087df48c298c x86/fpu: Replace KVMs xstate component clearing. -------------------------------- In order to prepare for the support of dynamically enabled FPU features, move the clearing of xstate components to the FPU core code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: kvm@vger.kernel.org Link: https://lkml.kernel.org/r/20211013145322.399567049@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 1 + arch/x86/include/asm/fpu/xstate.h | 1 - arch/x86/kernel/fpu/xstate.c | 12 +++++++++++- arch/x86/kernel/fpu/xstate.h | 2 ++ arch/x86/kvm/x86.c | 14 +++++--------- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 360cef8367a7..e78b03c477fd 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -105,6 +105,7 @@ DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); /* fpstate-related functions which are exported to KVM */ extern void fpu_init_fpstate_user(struct fpu *fpu); +extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); /* KVM specific functions */ extern void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask); diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index b8cebc0ee420..fb329bbfe89f 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -128,7 +128,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); -void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int xfeature_size(int xfeature_nr); int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a096ca8aa741..99d967c605c0 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -908,7 +908,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) return __raw_xsave_addr(xsave, xfeature_nr); } -EXPORT_SYMBOL_GPL(get_xsave_addr); #ifdef CONFIG_ARCH_HAS_PKEYS @@ -1257,6 +1256,17 @@ void xrstors(struct xregs_state *xstate, u64 mask) WARN_ON_ONCE(err); } +#if IS_ENABLED(CONFIG_KVM) +void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature) +{ + void *addr = get_xsave_addr(&fps->regs.xsave, xfeature); + + if (addr) + memset(addr, 0, xstate_sizes[xfeature]); +} +EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); +#endif + #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index bb6d7d298d2a..99f8cfec719d 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -21,6 +21,8 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsav extern void fpu__init_cpu_xstate(void); extern void fpu__init_system_xstate(void); +extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); + /* XSAVE/XRSTOR wrapper functions */ #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 536ca4ceb504..8bdada03b9d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10144,7 +10144,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.apf.halted = false; if (kvm_mpx_supported()) { - void *mpx_state_buffer; + struct fpstate *fpstate = vcpu->arch.guest_fpu->fpstate; /* * To avoid have the INIT path from kvm_apic_has_events() that be @@ -10152,14 +10152,10 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) */ if (init_event) kvm_put_guest_fpu(vcpu); - mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, - XFEATURE_BNDREGS); - if (mpx_state_buffer) - memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state)); - mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, - XFEATURE_BNDCSR); - if (mpx_state_buffer) - memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); + + fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS); + fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR); + if (init_event) kvm_load_guest_fpu(vcpu); } -- Gitee From f07a6bc9dcd35455a788281cb0a8d6ce09455ddc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:33 +0200 Subject: [PATCH 126/188] x86/KVM: Convert to fpstate mainline inclusion from mainline-v5.16-rc1 commit 1c57572d754fc54e0b8ac0df5350969ce6292d12 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1c57572d754f x86/KVM: Convert to fpstate. -------------------------------- Convert KVM code to the new register storage mechanism in preparation for dynamically sized buffers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Paolo Bonzini Cc: kvm@vger.kernel.org Link: https://lkml.kernel.org/r/20211013145322.451439983@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8bdada03b9d7..dfea149b850b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9856,7 +9856,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) vcpu_load(vcpu); - fxsave = &vcpu->arch.guest_fpu->state.fxsave; + fxsave = &vcpu->arch.guest_fpu->fpstate->regs.fxsave; memcpy(fpu->fpr, fxsave->st_space, 128); fpu->fcw = fxsave->cwd; fpu->fsw = fxsave->swd; @@ -9876,7 +9876,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) vcpu_load(vcpu); - fxsave = &vcpu->arch.guest_fpu->state.fxsave; + fxsave = &vcpu->arch.guest_fpu->fpstate->regs.fxsave; memcpy(fxsave->st_space, fpu->fpr, 128); fxsave->cwd = fpu->fcw; -- Gitee From 0e44ba77ef9fcd030f668b3fc2c55869b80450dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:34 +0200 Subject: [PATCH 127/188] x86/fpu: Convert tracing to fpstate mainline inclusion from mainline-v5.16-rc1 commit cceb496420fa11a6e11989abc68b8e7564dc40f9 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit cceb496420fa x86/fpu: Convert tracing to fpstate. -------------------------------- Convert FPU tracing code to the new register storage mechanism in preparation for dynamically sized buffers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.503327333@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/trace/fpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 879b77792f94..4645a6334063 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -22,8 +22,8 @@ DECLARE_EVENT_CLASS(x86_fpu, __entry->fpu = fpu; __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { - __entry->xfeatures = fpu->state.xsave.header.xfeatures; - __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; + __entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures; + __entry->xcomp_bv = fpu->fpstate->regs.xsave.header.xcomp_bv; } ), TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", -- Gitee From 173624627791c6dc68df4f656bb652277f1cc129 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:36 +0200 Subject: [PATCH 128/188] x86/fpu/regset: Convert to fpstate mainline inclusion from mainline-v5.16-rc1 commit caee31a36c33ed7788d0b3d93a663860157f6c55 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit caee31a36c33 x86/fpu/regset: Convert to fpstate. -------------------------------- Convert regset related code to the new register storage mechanism in preparation for dynamically sized buffers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.555239736@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/regset.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 01a1d97c3cb6..ec777793d890 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -78,8 +78,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, sync_fpstate(fpu); if (!use_xsave()) { - return membuf_write(&to, &fpu->state.fxsave, - sizeof(fpu->state.fxsave)); + return membuf_write(&to, &fpu->fpstate->regs.fxsave, + sizeof(fpu->fpstate->regs.fxsave)); } copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_FX); @@ -114,15 +114,15 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, fpu_force_restore(fpu); /* Copy the state */ - memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate)); + memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate)); /* Clear xmm8..15 */ - BUILD_BUG_ON(sizeof(fpu->state.fxsave.xmm_space) != 16 * 16); - memset(&fpu->state.fxsave.xmm_space[8], 0, 8 * 16); + BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16); + memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16); /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) - fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; + fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; return 0; } @@ -168,7 +168,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, } fpu_force_restore(fpu); - ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf); + ret = copy_uabi_from_kernel_to_xstate(&fpu->fpstate->regs.xsave, + kbuf ?: tmpbuf); out: vfree(tmpbuf); @@ -287,7 +288,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env, void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - __convert_from_fxsr(env, tsk, &tsk->thread.fpu.state.fxsave); + __convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave); } void convert_to_fxsr(struct fxregs_state *fxsave, @@ -330,7 +331,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, return fpregs_soft_get(target, regset, to); if (!cpu_feature_enabled(X86_FEATURE_FXSR)) { - return membuf_write(&to, &fpu->state.fsave, + return membuf_write(&to, &fpu->fpstate->regs.fsave, sizeof(struct fregs_state)); } @@ -341,7 +342,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, copy_xstate_to_uabi_buf(mb, target, XSTATE_COPY_FP); fx = &fxsave; } else { - fx = &fpu->state.fxsave; + fx = &fpu->fpstate->regs.fxsave; } __convert_from_fxsr(&env, target, fx); @@ -370,16 +371,16 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, fpu_force_restore(fpu); if (cpu_feature_enabled(X86_FEATURE_FXSR)) - convert_to_fxsr(&fpu->state.fxsave, &env); + convert_to_fxsr(&fpu->fpstate->regs.fxsave, &env); else - memcpy(&fpu->state.fsave, &env, sizeof(env)); + memcpy(&fpu->fpstate->regs.fsave, &env, sizeof(env)); /* * Update the header bit in the xsave header, indicating the * presence of FP. */ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) - fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; + fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FP; return 0; } -- Gitee From 851f4eefd47bcda473dc7308ea81ac8e2660aecd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:37 +0200 Subject: [PATCH 129/188] x86/fpu/signal: Convert to fpstate mainline inclusion from mainline-v5.16-rc1 commit 7e049e8b74591038c831e765585ae9038b7880a1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 7e049e8b7459 x86/fpu/signal: Convert to fpstate. -------------------------------- Convert signal related code to the new register storage mechanism in preparation for dynamically sized buffers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.607370221@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 416a110f2196..c54c2a3dda44 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -72,13 +72,13 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { - struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave; struct user_i387_ia32_struct env; struct _fpstate_32 __user *fp = buf; fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - fxsave(&tsk->thread.fpu.state.fxsave); + fxsave(&tsk->thread.fpu.fpstate->regs.fxsave); fpregs_unlock(); convert_from_fxsr(&env, tsk); @@ -303,7 +303,7 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, * been restored from a user buffer directly. */ if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor()) - os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); + os_xrstor(&fpu->fpstate->regs.xsave, xfeatures_mask_supervisor()); fpregs_mark_activate(); fpregs_unlock(); @@ -317,6 +317,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; + union fpregs_state *fpregs; u64 user_xfeatures = 0; bool fx_only = false; bool success; @@ -349,6 +350,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, if (__copy_from_user(&env, buf, sizeof(env))) return false; + fpregs = &fpu->fpstate->regs; /* * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is * not modified on context switch and that the xstate is considered @@ -366,7 +368,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, * the right place in memory. It's ia32 mode. Shrug. */ if (xfeatures_mask_supervisor()) - os_xsave(&fpu->state.xsave); + os_xsave(&fpregs->xsave); set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); @@ -374,29 +376,29 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, fpregs_unlock(); if (use_xsave() && !fx_only) { - if (copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx)) + if (copy_sigframe_from_user_to_xstate(&fpregs->xsave, buf_fx)) return false; } else { - if (__copy_from_user(&fpu->state.fxsave, buf_fx, - sizeof(fpu->state.fxsave))) + if (__copy_from_user(&fpregs->fxsave, buf_fx, + sizeof(fpregs->fxsave))) return false; if (IS_ENABLED(CONFIG_X86_64)) { /* Reject invalid MXCSR values. */ - if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask) + if (fpregs->fxsave.mxcsr & ~mxcsr_feature_mask) return false; } else { /* Mask invalid bits out for historical reasons (broken hardware). */ - fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; + fpregs->fxsave.mxcsr &= mxcsr_feature_mask; } /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ if (use_xsave()) - fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; + fpregs->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; } /* Fold the legacy FP storage */ - convert_to_fxsr(&fpu->state.fxsave, &env); + convert_to_fxsr(&fpregs->fxsave, &env); fpregs_lock(); if (use_xsave()) { @@ -411,10 +413,10 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, */ u64 mask = user_xfeatures | xfeatures_mask_supervisor(); - fpu->state.xsave.header.xfeatures &= mask; - success = !os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all); + fpregs->xsave.header.xfeatures &= mask; + success = !os_xrstor_safe(&fpregs->xsave, xfeatures_mask_all); } else { - success = !fxrstor_safe(&fpu->state.fxsave); + success = !fxrstor_safe(&fpregs->fxsave); } if (likely(success)) -- Gitee From 150ea671b412b1d9bb113590721d5dbfad246675 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:39 +0200 Subject: [PATCH 130/188] x86/fpu/core: Convert to fpstate mainline inclusion from mainline-v5.16-rc1 commit c20942ce5128ef92e2c451f943ba33462ad2fbc4 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit c20942ce5128 x86/fpu/core: Convert to fpstate. -------------------------------- Convert the rest of the core code to the new register storage mechanism in preparation for dynamically sized buffers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.659456185@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 4 ++-- arch/x86/kernel/fpu/core.c | 44 ++++++++++++++++++---------------- arch/x86/kernel/fpu/init.c | 2 +- arch/x86/kernel/fpu/xstate.c | 2 +- 4 files changed, 27 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index e78b03c477fd..4fce3152af17 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -38,9 +38,9 @@ static inline void kernel_fpu_begin(void) } /* - * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. + * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate. * A context switch will (and softirq might) save CPU's FPU registers to - * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in + * fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in * a random state. */ static inline void fpregs_lock(void) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 03926bf00971..14560fda15c2 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -89,7 +89,7 @@ bool irq_fpu_usable(void) EXPORT_SYMBOL(irq_fpu_usable); /* - * Save the FPU register state in fpu->state. The register state is + * Save the FPU register state in fpu->fpstate->regs. The register state is * preserved. * * Must be called with fpregs_lock() held. @@ -105,19 +105,19 @@ EXPORT_SYMBOL(irq_fpu_usable); void save_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { - os_xsave(&fpu->state.xsave); + os_xsave(&fpu->fpstate->regs.xsave); /* * AVX512 state is tracked here because its use is * known to slow the max clock speed of the core. */ - if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) + if (fpu->fpstate->regs.xsave.header.xfeatures & XFEATURE_MASK_AVX512) fpu->avx512_timestamp = jiffies; return; } if (likely(use_fxsr())) { - fxsave(&fpu->state.fxsave); + fxsave(&fpu->fpstate->regs.fxsave); return; } @@ -125,8 +125,8 @@ void save_fpregs_to_fpstate(struct fpu *fpu) * Legacy FPU register saving, FNSAVE always clears FPU registers, * so we have to reload them from the memory state. */ - asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); - frstor(&fpu->state.fsave); + asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave)); + frstor(&fpu->fpstate->regs.fsave); } void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask) @@ -167,7 +167,8 @@ void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) if (save) { if (test_thread_flag(TIF_NEED_FPU_LOAD)) { - memcpy(&save->state, ¤t->thread.fpu.state, + memcpy(&save->fpstate->regs, + ¤t->thread.fpu.fpstate->regs, fpu_kernel_xstate_size); } else { save_fpregs_to_fpstate(save); @@ -187,7 +188,7 @@ EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpu); void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf, unsigned int size, u32 pkru) { - union fpregs_state *kstate = &fpu->state; + union fpregs_state *kstate = &fpu->fpstate->regs; union fpregs_state *ustate = buf; struct membuf mb = { .p = buf, .left = size }; @@ -205,7 +206,7 @@ EXPORT_SYMBOL_GPL(fpu_copy_fpstate_to_kvm_uabi); int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, u32 *vpkru) { - union fpregs_state *kstate = &fpu->state; + union fpregs_state *kstate = &fpu->fpstate->regs; const union fpregs_state *ustate = buf; struct pkru_state *xpkru; int ret; @@ -378,7 +379,7 @@ int fpu_clone(struct task_struct *dst) */ if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) { /* Clear out the minimal state */ - memcpy(&dst_fpu->state, &init_fpstate.regs, + memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size()); return 0; } @@ -389,11 +390,12 @@ int fpu_clone(struct task_struct *dst) * child's FPU context, without any memory-to-memory copying. */ fpregs_lock(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); - - else + if (test_thread_flag(TIF_NEED_FPU_LOAD)) { + memcpy(&dst_fpu->fpstate->regs, &src_fpu->fpstate->regs, + fpu_kernel_xstate_size); + } else { save_fpregs_to_fpstate(dst_fpu); + } fpregs_unlock(); trace_x86_fpu_copy_src(src_fpu); @@ -466,7 +468,7 @@ static void fpu_reset_fpstate(void) * user space as PKRU is eagerly written in switch_to() and * flush_thread(). */ - memcpy(&fpu->state, &init_fpstate.regs, init_fpstate_copy_size()); + memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size()); set_thread_flag(TIF_NEED_FPU_LOAD); fpregs_unlock(); } @@ -493,7 +495,7 @@ void fpu__clear_user_states(struct fpu *fpu) */ if (xfeatures_mask_supervisor() && !fpregs_state_valid(fpu, smp_processor_id())) { - os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); + os_xrstor(&fpu->fpstate->regs.xsave, xfeatures_mask_supervisor()); } /* Reset user states in registers. */ @@ -574,11 +576,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) * fully reproduce the context of the exception. */ if (boot_cpu_has(X86_FEATURE_FXSR)) { - cwd = fpu->state.fxsave.cwd; - swd = fpu->state.fxsave.swd; + cwd = fpu->fpstate->regs.fxsave.cwd; + swd = fpu->fpstate->regs.fxsave.swd; } else { - cwd = (unsigned short)fpu->state.fsave.cwd; - swd = (unsigned short)fpu->state.fsave.swd; + cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd; + swd = (unsigned short)fpu->fpstate->regs.fsave.swd; } err = swd & ~cwd; @@ -592,7 +594,7 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) unsigned short mxcsr = MXCSR_DEFAULT; if (boot_cpu_has(X86_FEATURE_XMM)) - mxcsr = fpu->state.fxsave.mxcsr; + mxcsr = fpu->fpstate->regs.fxsave.mxcsr; err = ~(mxcsr >> 7) & mxcsr; } diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 31ecbfba9ff7..b524cd053114 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -38,7 +38,7 @@ static void fpu__init_cpu_generic(void) /* Flush out any pending x87 state: */ #ifdef CONFIG_MATH_EMULATION if (!boot_cpu_has(X86_FEATURE_FPU)) - fpstate_init_soft(¤t->thread.fpu.state.soft); + fpstate_init_soft(¤t->thread.fpu.fpstate->regs.soft); else #endif asm volatile ("fninit"); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 99d967c605c0..72b271d1083b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1094,7 +1094,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { - __copy_xstate_to_uabi_buf(to, &tsk->thread.fpu.state.xsave, + __copy_xstate_to_uabi_buf(to, &tsk->thread.fpu.fpstate->regs.xsave, tsk->thread.pkru, copy_mode); } -- Gitee From 41b415b9e9efb3e8bf2c316869a4f63fae2865ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:40 +0200 Subject: [PATCH 131/188] x86/math-emu: Convert to fpstate mainline inclusion from mainline-v5.16-rc1 commit 63d6bdf36ce1541e656966604c12ac4d9fc5d1f0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 63d6bdf36ce1 x86/math-emu: Convert to fpstate. -------------------------------- Convert math emulation code to the new register storage mechanism in preparation for dynamically sized buffers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.711347464@linutronix.de Signed-off-by: Lin Wang --- arch/x86/math-emu/fpu_aux.c | 2 +- arch/x86/math-emu/fpu_entry.c | 4 ++-- arch/x86/math-emu/fpu_system.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index 034748459482..d62662bdd460 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c @@ -53,7 +53,7 @@ void fpstate_init_soft(struct swregs_state *soft) void finit(void) { - fpstate_init_soft(¤t->thread.fpu.state.soft); + fpstate_init_soft(¤t->thread.fpu.fpstate->regs.soft); } /* diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 50195e249753..7fe56c594aa6 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -640,7 +640,7 @@ int fpregs_soft_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct swregs_state *s387 = &target->thread.fpu.state.soft; + struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft; void *space = s387->st_space; int ret; int offset, other, i, tags, regnr, tag, newtop; @@ -691,7 +691,7 @@ int fpregs_soft_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - struct swregs_state *s387 = &target->thread.fpu.state.soft; + struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft; const void *space = s387->st_space; int offset = (S387->ftop & 7) * 10, other = 80 - offset; diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 9b41391867dc..eec3e4805c75 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -73,7 +73,7 @@ static inline bool seg_writable(struct desc_struct *d) return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE; } -#define I387 (¤t->thread.fpu.state) +#define I387 (¤t->thread.fpu.fpstate->regs) #define FPU_info (I387->soft.info) #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) -- Gitee From 766b7576482a7ee756ebe4cb7cb0b9b9340c31bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:42 +0200 Subject: [PATCH 132/188] x86/fpu: Remove fpu::state mainline inclusion from mainline-v5.16-rc1 commit 2f27b5034244c4ebd70c90066defa771a99a5320 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2f27b5034244 x86/fpu: Remove fpu::state. -------------------------------- All users converted. Remove it along with the sanity checks. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.765063318@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 18 +++++++----------- arch/x86/kernel/fpu/init.c | 4 ---- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3bb6277efbb5..297e3b4920cb 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -352,20 +352,16 @@ struct fpu { struct fpstate *fpstate; /* - * @state: + * @__fpstate: * - * In-memory copy of all FPU registers that we save/restore - * over context switches. If the task is using the FPU then - * the registers in the FPU are more recent than this state - * copy. If the task context-switches away then they get - * saved here and represent the FPU state. + * Initial in-memory storage for FPU registers which are saved in + * context switch and when the kernel uses the FPU. The registers + * are restored from this storage on return to user space if they + * are not longer containing the tasks FPU register state. */ - union { - struct fpstate __fpstate; - union fpregs_state state; - }; + struct fpstate __fpstate; /* - * WARNING: 'state' is dynamically-sized. Do not put + * WARNING: '__fpstate' is dynamically-sized. Do not put * anything after it here. */ }; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index b524cd053114..cffbaf491886 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -184,10 +184,6 @@ static void __init fpu__init_task_struct_size(void) CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); CHECK_MEMBER_AT_END_OF(struct task_struct, thread); - BUILD_BUG_ON(sizeof(struct fpstate) != sizeof(union fpregs_state)); - BUILD_BUG_ON(offsetof(struct thread_struct, fpu.state) != - offsetof(struct thread_struct, fpu.__fpstate)); - arch_task_struct_size = task_size; } -- Gitee From 51d4027fe65433cc4207a340c0e5a4f1b6e19e4f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:43 +0200 Subject: [PATCH 133/188] x86/fpu: Do not leak fpstate pointer on fork mainline inclusion from mainline-v5.16-rc1 commit f0cbc8b3cdf7d1c724155cd9cecffe329bb96119 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit f0cbc8b3cdf7 x86/fpu: Do not leak fpstate pointer on fork. -------------------------------- If fork fails early then the copied task struct would carry the fpstate pointer of the parent task. Not a problem right now, but later when dynamically allocated buffers are available, keeping the pointer might result in freeing the parent's buffer. Set it to NULL which prevents that. If fork reaches clone_thread(), the pointer will be correctly set to the new task context. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.817101108@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d708c97fd6fe..f3b2539786a3 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -93,6 +93,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) #ifdef CONFIG_VM86 dst->thread.vm86 = NULL; #endif + /* Drop the copied pointer to current's fpstate */ + dst->thread.fpu.fpstate = NULL; return 0; } -- Gitee From 1f6d3f9c975fa45ec3274d70edd690d89677dc6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:45 +0200 Subject: [PATCH 134/188] x86/process: Move arch_thread_struct_whitelist() out of line mainline inclusion from mainline-v5.16-rc1 commit 2dd8eedc80b184bb16aad697ae60367c5bf07299 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2dd8eedc80b1 x86/process: Move arch_thread_struct_whitelist() out of line. -------------------------------- In preparation for dynamically enabled FPU features move the function out of line as the goal is to expose less and not more information. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.869001791@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/processor.h | 9 +++------ arch/x86/kernel/fpu/core.c | 10 ++++++++++ arch/x86/kernel/fpu/internal.h | 2 ++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7344b54e4d00..008237b10e1f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -477,9 +477,6 @@ DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); #endif /* X86_64 */ -extern unsigned int fpu_kernel_xstate_size; -extern unsigned int fpu_user_xstate_size; - struct perf_event; struct thread_struct { @@ -554,12 +551,12 @@ struct thread_struct { */ }; -/* Whitelist the FPU register state from the task_struct for hardened usercopy. */ +extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size); + static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { - *offset = offsetof(struct thread_struct, fpu.__fpstate.regs); - *size = fpu_kernel_xstate_size; + fpu_thread_struct_whitelist(offset, size); } static inline void diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 14560fda15c2..c6df97517ec8 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -404,6 +404,16 @@ int fpu_clone(struct task_struct *dst) return 0; } +/* + * Whitelist the FPU register state embedded into task_struct for hardened + * usercopy. + */ +void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size) +{ + *offset = offsetof(struct thread_struct, fpu.__fpstate.regs); + *size = fpu_kernel_xstate_size; +} + /* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index e1d8a352f12d..5c4f71ff6ae9 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -2,6 +2,8 @@ #ifndef __X86_KERNEL_FPU_INTERNAL_H #define __X86_KERNEL_FPU_INTERNAL_H +extern unsigned int fpu_kernel_xstate_size; +extern unsigned int fpu_user_xstate_size; extern struct fpstate init_fpstate; /* CPU feature check wrappers */ -- Gitee From b07458ecea412eaf39d8b267a9c2e8d3160fd483 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:46 +0200 Subject: [PATCH 135/188] x86/fpu: Add size and mask information to fpstate mainline inclusion from mainline-v5.16-rc1 commit 248452ce21aeb08da2d2af23d88f890886bd379f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 248452ce21ae x86/fpu: Add size and mask information to fpstate. -------------------------------- Add state size and feature mask information to the fpstate container. This will be used for runtime checks with the upcoming support for dynamically enabled features and dynamically sized buffers. That avoids conditionals all over the place as the required information is accessible for both default and extended buffers. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.921388806@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 12 ++++++++++++ arch/x86/kernel/fpu/core.c | 6 ++++++ arch/x86/kernel/fpu/init.c | 9 +++++++++ arch/x86/kernel/fpu/xstate.c | 3 +++ 4 files changed, 30 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 297e3b4920cb..3a12e97e475d 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -310,6 +310,18 @@ union fpregs_state { }; struct fpstate { + /* @kernel_size: The size of the kernel register image */ + unsigned int size; + + /* @user_size: The size in non-compacted UABI format */ + unsigned int user_size; + + /* @xfeatures: xfeatures for which the storage is sized */ + u64 xfeatures; + + /* @user_xfeatures: xfeatures valid in UABI buffers */ + u64 user_xfeatures; + /* @regs: The register state union for all supported formats */ union fpregs_state regs; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c6df97517ec8..a8cc20e90751 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -342,6 +342,12 @@ void fpstate_reset(struct fpu *fpu) { /* Set the fpstate pointer to the default fpstate */ fpu->fpstate = &fpu->__fpstate; + + /* Initialize sizes and feature masks */ + fpu->fpstate->size = fpu_kernel_xstate_size; + fpu->fpstate->user_size = fpu_user_xstate_size; + fpu->fpstate->xfeatures = xfeatures_mask_all; + fpu->fpstate->user_xfeatures = xfeatures_mask_uabi(); } #if IS_ENABLED(CONFIG_KVM) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index cffbaf491886..65d763faace9 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -212,6 +212,14 @@ static void __init fpu__init_system_xstate_size_legacy(void) } fpu_user_xstate_size = fpu_kernel_xstate_size; + fpstate_reset(¤t->thread.fpu); +} + +static void __init fpu__init_init_fpstate(void) +{ + /* Bring init_fpstate size and features up to date */ + init_fpstate.size = fpu_kernel_xstate_size; + init_fpstate.xfeatures = xfeatures_mask_all; } /* @@ -233,4 +241,5 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(); fpu__init_task_struct_size(); + fpu__init_init_fpstate(); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 72b271d1083b..010e735b9829 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -720,6 +720,7 @@ static void __init fpu__init_disable_system_xstate(void) xfeatures_mask_all = 0; cr4_clear_bits(X86_CR4_OSXSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE); + fpstate_reset(¤t->thread.fpu); } /* @@ -792,6 +793,8 @@ void __init fpu__init_system_xstate(void) if (err) goto out_disable; + fpstate_reset(¤t->thread.fpu); + /* * Update info used for ptrace frames; use standard-format size and no * supervisor xstates: -- Gitee From 118193fe96e9e4594a7de0c18af82ead3f8a6ec2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:48 +0200 Subject: [PATCH 136/188] x86/fpu: Use fpstate::size mainline inclusion from mainline-v5.16-rc1 commit be31dfdfd75b172af3ddcfa7511cdc3bb7adb25e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit be31dfdfd75b x86/fpu: Use fpstate::size. -------------------------------- Make use of fpstate::size in various places which require the buffer size information for sanity checks or memcpy() sizing. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145322.973518954@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 13 ++++++------- arch/x86/kernel/fpu/signal.c | 7 +++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a8cc20e90751..cb48c80ce5e3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -166,13 +166,12 @@ void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) fpregs_lock(); if (save) { - if (test_thread_flag(TIF_NEED_FPU_LOAD)) { - memcpy(&save->fpstate->regs, - ¤t->thread.fpu.fpstate->regs, - fpu_kernel_xstate_size); - } else { + struct fpstate *fpcur = current->thread.fpu.fpstate; + + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&save->fpstate->regs, &fpcur->regs, fpcur->size); + else save_fpregs_to_fpstate(save); - } } if (rstor) { @@ -398,7 +397,7 @@ int fpu_clone(struct task_struct *dst) fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) { memcpy(&dst_fpu->fpstate->regs, &src_fpu->fpstate->regs, - fpu_kernel_xstate_size); + dst_fpu->fpstate->size); } else { save_fpregs_to_fpstate(dst_fpu); } diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index c54c2a3dda44..aa9329189864 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -313,15 +313,13 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, bool ia32_fxstate) { - int state_size = fpu_kernel_xstate_size; struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; + bool success, fx_only = false; union fpregs_state *fpregs; + unsigned int state_size; u64 user_xfeatures = 0; - bool fx_only = false; - bool success; - if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; @@ -334,6 +332,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; + state_size = fpu->fpstate->size; } if (likely(!ia32_fxstate)) { -- Gitee From 7d622de6e1500743a0d8b4ea038ca7f8da5ad873 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:49 +0200 Subject: [PATCH 137/188] x86/fpu/xstate: Use fpstate for os_xsave() mainline inclusion from mainline-v5.16-rc1 commit 073e627a4537e682c43a1e8df659ce24cbced40c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 073e627a4537 x86/fpu/xstate: Use fpstate for os_xsave(). -------------------------------- With variable feature sets XSAVE[S] requires to know the feature set for which the buffer is valid. Retrieve it from fpstate. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145323.025695590@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/signal.c | 4 ++-- arch/x86/kernel/fpu/xstate.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index cb48c80ce5e3..f4db70b64e2e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -105,7 +105,7 @@ EXPORT_SYMBOL(irq_fpu_usable); void save_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { - os_xsave(&fpu->fpstate->regs.xsave); + os_xsave(fpu->fpstate); /* * AVX512 state is tracked here because its use is diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index aa9329189864..5aca418490f0 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -349,7 +349,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, if (__copy_from_user(&env, buf, sizeof(env))) return false; - fpregs = &fpu->fpstate->regs; /* * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is * not modified on context switch and that the xstate is considered @@ -367,13 +366,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, * the right place in memory. It's ia32 mode. Shrug. */ if (xfeatures_mask_supervisor()) - os_xsave(&fpregs->xsave); + os_xsave(fpu->fpstate); set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); __cpu_invalidate_fpregs_state(); fpregs_unlock(); + fpregs = &fpu->fpstate->regs; if (use_xsave() && !fx_only) { if (copy_sigframe_from_user_to_xstate(&fpregs->xsave, buf_fx)) return false; diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 99f8cfec719d..24a1479caea2 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -101,16 +101,16 @@ extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features * and command line options. The choice is permanent until the next reboot. */ -static inline void os_xsave(struct xregs_state *xstate) +static inline void os_xsave(struct fpstate *fpstate) { - u64 mask = xfeatures_mask_all; + u64 mask = fpstate->xfeatures; u32 lmask = mask; u32 hmask = mask >> 32; int err; WARN_ON_FPU(!alternatives_patched); - XSTATE_XSAVE(xstate, lmask, hmask, err); + XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err); /* We should never fault when copying to a kernel buffer: */ WARN_ON_FPU(err); -- Gitee From f4aa7d91e37a62112e927d42e2db1b16a40e1da5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:51 +0200 Subject: [PATCH 138/188] x86/fpu/xstate: Use fpstate for xsave_to_user_sigframe() mainline inclusion from mainline-v5.16-rc1 commit 0b2d39aa03574eb401cdfaac2f483a6f68173355 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 0b2d39aa0357 x86/fpu/xstate: Use fpstate for xsave_to_user_sigframe(). -------------------------------- With dynamically enabled features the sigframe code must know the features which are enabled for the task. Get them from fpstate. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145323.077781448@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 24a1479caea2..3e9eaf9f7cf3 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -149,7 +149,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) * internally, e.g. PKRU. That's user space ABI and also required * to allow the signal handler to modify PKRU. */ - u64 mask = xfeatures_mask_uabi(); + u64 mask = current->thread.fpu.fpstate->user_xfeatures; u32 lmask = mask; u32 hmask = mask >> 32; int err; -- Gitee From 18364456c7982bfbb760e24b8f18ffacb744c63a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:52 +0200 Subject: [PATCH 139/188] x86/fpu: Use fpstate in fpu_copy_kvm_uabi_to_fpstate() mainline inclusion from mainline-v5.16-rc1 commit ad6ede407aae01d9617e172b27e179ce1046cbfc category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit ad6ede407aae x86/fpu: Use fpstate in fpu_copy_kvm_uabi_to_fpstate(). -------------------------------- Straight forward conversion. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145323.129699950@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index f4db70b64e2e..052e5efbf9f4 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -205,7 +205,7 @@ EXPORT_SYMBOL_GPL(fpu_copy_fpstate_to_kvm_uabi); int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, u32 *vpkru) { - union fpregs_state *kstate = &fpu->fpstate->regs; + struct fpstate *kstate = fpu->fpstate; const union fpregs_state *ustate = buf; struct pkru_state *xpkru; int ret; @@ -215,25 +215,25 @@ int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, return -EINVAL; if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask) return -EINVAL; - memcpy(&kstate->fxsave, &ustate->fxsave, sizeof(ustate->fxsave)); + memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave)); return 0; } if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; - ret = copy_uabi_from_kernel_to_xstate(&kstate->xsave, ustate); + ret = copy_uabi_from_kernel_to_xstate(&kstate->regs.xsave, ustate); if (ret) return ret; /* Retrieve PKRU if not in init state */ - if (kstate->xsave.header.xfeatures & XFEATURE_MASK_PKRU) { - xpkru = get_xsave_addr(&kstate->xsave, XFEATURE_PKRU); + if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) { + xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU); *vpkru = xpkru->pkru; } /* Ensure that XCOMP_BV is set up for XSAVES */ - xstate_init_xcomp_bv(&kstate->xsave, xfeatures_mask_uabi()); + xstate_init_xcomp_bv(&kstate->regs.xsave, xfeatures_mask_uabi()); return 0; } EXPORT_SYMBOL_GPL(fpu_copy_kvm_uabi_to_fpstate); -- Gitee From e7b68f2339413faaff34b46a2f062c6f0066bfaf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:54 +0200 Subject: [PATCH 140/188] x86/fpu: Use fpstate in __copy_xstate_to_uabi_buf() mainline inclusion from mainline-v5.16-rc1 commit 3ac8d75778fc8c1c22daad9bc674166b862f6f6e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 3ac8d75778fc x86/fpu: Use fpstate in __copy_xstate_to_uabi_buf(). -------------------------------- With dynamically enabled features the copy function must know the features and the size which is valid for the task. Retrieve them from fpstate. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145323.181495492@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 8 ++++---- arch/x86/kernel/fpu/xstate.c | 11 ++++++----- arch/x86/kernel/fpu/xstate.h | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 052e5efbf9f4..04fef4795211 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -187,15 +187,15 @@ EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpu); void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf, unsigned int size, u32 pkru) { - union fpregs_state *kstate = &fpu->fpstate->regs; + struct fpstate *kstate = fpu->fpstate; union fpregs_state *ustate = buf; struct membuf mb = { .p = buf, .left = size }; if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { - __copy_xstate_to_uabi_buf(mb, &kstate->xsave, pkru, - XSTATE_COPY_XSAVE); + __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE); } else { - memcpy(&ustate->fxsave, &kstate->fxsave, sizeof(ustate->fxsave)); + memcpy(&ustate->fxsave, &kstate->regs.fxsave, + sizeof(ustate->fxsave)); /* Make it restorable on a XSAVE enabled host */ ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE; } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 010e735b9829..9bec473533a0 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -969,7 +969,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, /** * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor - * @xsave: The xsave from which to copy + * @fpstate: The fpstate buffer from which to copy * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * @@ -979,11 +979,12 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * * It supports partial copy but @to.pos always starts from zero. */ -void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, +void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, u32 pkru_val, enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.regs.xsave; + struct xregs_state *xsave = &fpstate->regs.xsave; struct xstate_header header; unsigned int zerofrom; u64 mask; @@ -1003,7 +1004,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, break; case XSTATE_COPY_XSAVE: - header.xfeatures &= xfeatures_mask_uabi(); + header.xfeatures &= fpstate->user_xfeatures; break; } @@ -1046,7 +1047,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, * but there is no state to copy from in the compacted * init_fpstate. The gap tracking will zero these states. */ - mask = xfeatures_mask_uabi(); + mask = fpstate->user_xfeatures; for_each_extended_xfeature(i, mask) { /* @@ -1097,7 +1098,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { - __copy_xstate_to_uabi_buf(to, &tsk->thread.fpu.fpstate->regs.xsave, + __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, tsk->thread.pkru, copy_mode); } diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 3e9eaf9f7cf3..b74c5953558c 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -15,7 +15,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; } -extern void __copy_xstate_to_uabi_buf(struct membuf to, struct xregs_state *xsave, +extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, u32 pkru_val, enum xstate_copy_mode copy_mode); extern void fpu__init_cpu_xstate(void); -- Gitee From 02e59277fe82a57e5baefe62b59512b299459ca1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Oct 2021 16:55:55 +0200 Subject: [PATCH 141/188] x86/fpu/xstate: Use fpstate for copy_uabi_to_xstate() mainline inclusion from mainline-v5.16-rc1 commit 49e4eb4125d506937e52e10c34c8cafd93ab0ed6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 49e4eb4125d5 x86/fpu/xstate: Use fpstate for copy_uabi_to_xstate(). -------------------------------- Prepare for dynamically enabled states per task. The function needs to retrieve the features and sizes which are valid in a fpstate context. Retrieve them from fpstate. Move the function declarations to the core header as they are not required anywhere else. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211013145323.233529986@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 12 ------------ arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/regset.c | 5 ++--- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 18 ++++++++++-------- arch/x86/kernel/fpu/xstate.h | 12 ++++++++++++ 6 files changed, 26 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index fb329bbfe89f..61fcb15d880a 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -129,20 +129,8 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); int xfeature_size(int xfeature_nr); -int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); -int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); void xsaves(struct xregs_state *xsave, u64 mask); void xrstors(struct xregs_state *xsave, u64 mask); -enum xstate_copy_mode { - XSTATE_COPY_FP, - XSTATE_COPY_FX, - XSTATE_COPY_XSAVE, -}; - -struct membuf; -void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, - enum xstate_copy_mode mode); - #endif diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 04fef4795211..b497ecae9270 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -222,7 +222,7 @@ int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; - ret = copy_uabi_from_kernel_to_xstate(&kstate->regs.xsave, ustate); + ret = copy_uabi_from_kernel_to_xstate(kstate, ustate); if (ret) return ret; diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index ec777793d890..f8c485ab73f5 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -8,11 +8,11 @@ #include #include #include -#include #include "context.h" #include "internal.h" #include "legacy.h" +#include "xstate.h" /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, @@ -168,8 +168,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, } fpu_force_restore(fpu); - ret = copy_uabi_from_kernel_to_xstate(&fpu->fpstate->regs.xsave, - kbuf ?: tmpbuf); + ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf); out: vfree(tmpbuf); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 5aca418490f0..935818b0406e 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -375,7 +375,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, fpregs = &fpu->fpstate->regs; if (use_xsave() && !fx_only) { - if (copy_sigframe_from_user_to_xstate(&fpregs->xsave, buf_fx)) + if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx)) return false; } else { if (__copy_from_user(&fpregs->fxsave, buf_fx, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9bec473533a0..1007b5a79e95 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -463,10 +463,11 @@ int xfeature_size(int xfeature_nr) } /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -static int validate_user_xstate_header(const struct xstate_header *hdr) +static int validate_user_xstate_header(const struct xstate_header *hdr, + struct fpstate *fpstate) { /* No unknown or supervisor features may be set */ - if (hdr->xfeatures & ~xfeatures_mask_uabi()) + if (hdr->xfeatures & ~fpstate->user_xfeatures) return -EINVAL; /* Userspace must use the uncompacted format */ @@ -1115,9 +1116,10 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, } -static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, +static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, const void __user *ubuf) { + struct xregs_state *xsave = &fpstate->regs.xsave; unsigned int offset, size; struct xstate_header hdr; u64 mask; @@ -1127,7 +1129,7 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf)) return -EFAULT; - if (validate_user_xstate_header(&hdr)) + if (validate_user_xstate_header(&hdr, fpstate)) return -EINVAL; /* Validate MXCSR when any of the related features is in use */ @@ -1182,9 +1184,9 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] * format and copy to the target thread. Used by ptrace and KVM. */ -int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) +int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf) { - return copy_uabi_to_xstate(xsave, kbuf, NULL); + return copy_uabi_to_xstate(fpstate, kbuf, NULL); } /* @@ -1192,10 +1194,10 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) * XSAVE[S] format and copy to the target thread. This is called from the * sigreturn() and rt_sigreturn() system calls. */ -int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, +int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf) { - return copy_uabi_to_xstate(xsave, NULL, ubuf); + return copy_uabi_to_xstate(fpstate, NULL, ubuf); } static bool validate_independent_components(u64 mask) diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index b74c5953558c..379dbfa4f526 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -15,8 +15,20 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; } +enum xstate_copy_mode { + XSTATE_COPY_FP, + XSTATE_COPY_FX, + XSTATE_COPY_XSAVE, +}; + +struct membuf; extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, u32 pkru_val, enum xstate_copy_mode copy_mode); +extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode mode); +extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf); +extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf); + extern void fpu__init_cpu_xstate(void); extern void fpu__init_system_xstate(void); -- Gitee From c4112fb6b4deaf821667b596307b2700d5b33c6f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 00:51:51 +0200 Subject: [PATCH 142/188] x86/fpu/signal: Use fpstate for size and features mainline inclusion from mainline-v5.16-rc1 commit 5509cc78080d29b23706dbf076d51691b69f3c79 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 5509cc78080d x86/fpu/signal: Use fpstate for size and features. -------------------------------- For dynamically enabled features it's required to get the features which are enabled for that context when restoring from sigframe. The same applies for all signal frame size calculations. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/87ilxz5iew.ffs@tglx Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 44 ++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 935818b0406e..f9af1747be6e 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -41,7 +41,7 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, /* Check for the first magic field and other error scenarios. */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || fx_sw->xstate_size < min_xstate_size || - fx_sw->xstate_size > fpu_user_xstate_size || + fx_sw->xstate_size > current->thread.fpu.fpstate->user_size || fx_sw->xstate_size > fx_sw->extended_size) goto setfx; @@ -98,7 +98,8 @@ static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) return true; } -static inline bool save_xstate_epilog(void __user *buf, int ia32_frame) +static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, + unsigned int usize) { struct xregs_state __user *x = buf; struct _fpx_sw_bytes *sw_bytes; @@ -113,7 +114,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame) return !err; err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *)(buf + fpu_user_xstate_size)); + (__u32 __user *)(buf + usize)); /* * Read the xfeatures which we copied (directly from the cpu or @@ -171,6 +172,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { struct task_struct *tsk = current; + struct fpstate *fpstate = tsk->thread.fpu.fpstate; int ia32_fxstate = (buf != buf_fx); int ret; @@ -215,7 +217,7 @@ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); if (ret) { - if (!__clear_user(buf_fx, fpu_user_xstate_size)) + if (!__clear_user(buf_fx, fpstate->user_size)) goto retry; return false; } @@ -224,17 +226,18 @@ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf)) return false; - if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate)) + if (use_fxsr() && + !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate->user_size)) return false; return true; } -static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only) +static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, + u64 xrestore, bool fx_only) { if (use_xsave()) { - u64 init_bv = xfeatures_mask_uabi() & ~xrestore; + u64 init_bv = ufeatures & ~xrestore; int ret; if (likely(!fx_only)) @@ -265,7 +268,8 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, retry: fpregs_lock(); pagefault_disable(); - ret = __restore_fpregs_from_user(buf, xrestore, fx_only); + ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures, + xrestore, fx_only); pagefault_enable(); if (unlikely(ret)) { @@ -332,7 +336,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; - state_size = fpu->fpstate->size; + state_size = fpu->fpstate->user_size; } if (likely(!ia32_fxstate)) { @@ -425,10 +429,11 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, return success; } -static inline int xstate_sigframe_size(void) +static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate) { - return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE : - fpu_user_xstate_size; + unsigned int size = fpstate->user_size; + + return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size; } /* @@ -436,17 +441,19 @@ static inline int xstate_sigframe_size(void) */ bool fpu__restore_sig(void __user *buf, int ia32_frame) { - unsigned int size = xstate_sigframe_size(); struct fpu *fpu = ¤t->thread.fpu; void __user *buf_fx = buf; bool ia32_fxstate = false; bool success = false; + unsigned int size; if (unlikely(!buf)) { fpu__clear_user_states(fpu); return true; } + size = xstate_sigframe_size(fpu->fpstate); + ia32_frame &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -481,7 +488,7 @@ unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size) { - unsigned long frame_size = xstate_sigframe_size(); + unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate); *buf_fx = sp = round_down(sp - frame_size, 64); if (ia32_frame && use_fxsr()) { @@ -494,9 +501,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, return sp; } -unsigned long fpu__get_fpstate_size(void) +unsigned long __init fpu__get_fpstate_size(void) { - unsigned long ret = xstate_sigframe_size(); + unsigned long ret = fpu_user_xstate_size; + + if (use_xsave()) + ret += FP_XSTATE_MAGIC2_SIZE; /* * This space is needed on (most) 32-bit kernels, or when a 32-bit -- Gitee From 0877e7fda08c15058de8f947bba45e52bf07be9e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 01:09:29 +0200 Subject: [PATCH 143/188] x86/fpu: Provide struct fpu_config mainline inclusion from mainline-v5.16-rc1 commit 578971f4e228f386ad4d7ce16e979f2ed922de54 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 578971f4e228 x86/fpu: Provide struct fpu_config. -------------------------------- Provide a struct to store information about the maximum supported and the default feature set and buffer sizes for both user and kernel space. This allows quick retrieval of this information for the upcoming support for dynamically enabled features. [ bp: Add vertical spacing between the struct members. ] Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211014230739.126107370@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 42 ++++++++++++++++++++++++++++++++ arch/x86/kernel/fpu/core.c | 4 +++ 2 files changed, 46 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3a12e97e475d..a32be07f1418 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -378,4 +378,46 @@ struct fpu { */ }; +/* + * FPU state configuration data. Initialized at boot time. Read only after init. + */ +struct fpu_state_config { + /* + * @max_size: + * + * The maximum size of the register state buffer. Includes all + * supported features except independent managed features. + */ + unsigned int max_size; + + /* + * @default_size: + * + * The default size of the register state buffer. Includes all + * supported features except independent managed features and + * features which have to be requested by user space before usage. + */ + unsigned int default_size; + + /* + * @max_features: + * + * The maximum supported features bitmap. Does not include + * independent managed features. + */ + u64 max_features; + + /* + * @default_features: + * + * The default supported features bitmap. Does not include + * independent managed features and features which have to + * be requested by user space before usage. + */ + u64 default_features; +}; + +/* FPU state configuration information */ +extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg; + #endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b497ecae9270..3512bb241d95 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -25,6 +25,10 @@ #define CREATE_TRACE_POINTS #include +/* The FPU state configuration data for kernel and user space */ +struct fpu_state_config fpu_kernel_cfg __ro_after_init; +struct fpu_state_config fpu_user_cfg __ro_after_init; + /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: -- Gitee From 20f941a61562d2361a81198b260de362b9d17ad9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 01:09:31 +0200 Subject: [PATCH 144/188] x86/fpu: Cleanup fpu__init_system_xstate_size_legacy() mainline inclusion from mainline-v5.16-rc1 commit 617473acdfe45aa9aa2be23cd5b02da7cd2717f8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 617473acdfe4 x86/fpu: Cleanup fpu__init_system_xstate_size_legacy(). -------------------------------- Clean the function up before making changes. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211014230739.184014242@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/init.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 65d763faace9..c9293ade321d 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -199,17 +199,12 @@ static void __init fpu__init_system_xstate_size_legacy(void) * Note that xstate sizes might be overwritten later during * fpu__init_system_xstate(). */ - - if (!boot_cpu_has(X86_FEATURE_FPU)) { + if (!cpu_feature_enabled(X86_FEATURE_FPU)) fpu_kernel_xstate_size = sizeof(struct swregs_state); - } else { - if (boot_cpu_has(X86_FEATURE_FXSR)) - fpu_kernel_xstate_size = - sizeof(struct fxregs_state); - else - fpu_kernel_xstate_size = - sizeof(struct fregs_state); - } + else if (cpu_feature_enabled(X86_FEATURE_FXSR)) + fpu_kernel_xstate_size = sizeof(struct fxregs_state); + else + fpu_kernel_xstate_size = sizeof(struct fregs_state); fpu_user_xstate_size = fpu_kernel_xstate_size; fpstate_reset(¤t->thread.fpu); -- Gitee From c7f0c398840365adc8be13f89714dec61b616bd7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 01:09:32 +0200 Subject: [PATCH 145/188] x86/fpu/xstate: Cleanup size calculations mainline inclusion from mainline-v5.16-rc1 commit cd9ae761744912a96d7fd968b9c0173594e3f6be category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit cd9ae7617449 x86/fpu/xstate: Cleanup size calculations. -------------------------------- The size calculations are partially unreadable gunk. Clean them up. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211014230739.241223689@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 82 ++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1007b5a79e95..98ee897e61dd 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -527,7 +527,7 @@ static void __init __xstate_dump_leaves(void) * that our software representation matches what the CPU * tells us about the state's size. */ -static void __init check_xstate_against_struct(int nr) +static bool __init check_xstate_against_struct(int nr) { /* * Ask the CPU for the size of the state. @@ -557,7 +557,9 @@ static void __init check_xstate_against_struct(int nr) ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) { WARN_ONCE(1, "no structure for xstate: %d\n", nr); XSTATE_WARN_ON(1); + return false; } + return true; } /* @@ -569,38 +571,44 @@ static void __init check_xstate_against_struct(int nr) * covered by these checks. Only the size of the buffer for task->fpu * is checked here. */ -static void __init do_extra_xstate_size_checks(void) +static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) { - int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; for_each_extended_xfeature(i, xfeatures_mask_all) { - check_xstate_against_struct(i); + if (!check_xstate_against_struct(i)) + return false; /* * Supervisor state components can be managed only by * XSAVES. */ - if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) - XSTATE_WARN_ON(xfeature_is_supervisor(i)); + if (!compacted && xfeature_is_supervisor(i)) { + XSTATE_WARN_ON(1); + return false; + } /* Align from the end of the previous feature */ if (xfeature_is_aligned(i)) - paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64); + size = ALIGN(size, 64); /* - * The offset of a given state in the non-compacted - * format is given to us in a CPUID leaf. We check - * them for being ordered (increasing offsets) in - * setup_xstate_features(). XSAVES uses compacted format. + * In compacted format the enabled features are packed, + * i.e. disabled features do not occupy space. + * + * In non-compacted format the offsets are fixed and + * disabled states still occupy space in the memory buffer. */ - if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) - paranoid_xstate_size = xfeature_uncompacted_offset(i); + if (!compacted) + size = xfeature_uncompacted_offset(i); /* - * The compacted-format offset always depends on where - * the previous state ended. + * Add the feature size even for non-compacted format + * to make the end result correct */ - paranoid_xstate_size += xfeature_size(i); + size += xfeature_size(i); } - XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size); + XSTATE_WARN_ON(size != kernel_size); + return size == kernel_size; } /* @@ -653,7 +661,7 @@ static unsigned int __init get_xsaves_size_no_independent(void) return size; } -static unsigned int __init get_xsave_size(void) +static unsigned int __init get_xsave_size_user(void) { unsigned int eax, ebx, ecx, edx; /* @@ -684,31 +692,33 @@ static bool __init is_supported_xstate_size(unsigned int test_xstate_size) static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ - unsigned int possible_xstate_size; - unsigned int xsave_size; + unsigned int user_size, kernel_size; - xsave_size = get_xsave_size(); + /* Uncompacted user space size */ + user_size = get_xsave_size_user(); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - possible_xstate_size = get_xsaves_size_no_independent(); + /* + * XSAVES kernel size includes supervisor states and + * uses compacted format. + * + * XSAVE does not support supervisor states so + * kernel and user size is identical. + */ + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + kernel_size = get_xsaves_size_no_independent(); else - possible_xstate_size = xsave_size; + kernel_size = user_size; - /* Ensure we have the space to store all enabled: */ - if (!is_supported_xstate_size(possible_xstate_size)) + /* Ensure we have the space to store all enabled features. */ + if (!is_supported_xstate_size(kernel_size)) return -EINVAL; - /* - * The size is OK, we are definitely going to use xsave, - * make it known to the world that we need more space. - */ - fpu_kernel_xstate_size = possible_xstate_size; - do_extra_xstate_size_checks(); + if (!paranoid_xstate_size_valid(kernel_size)) + return -EINVAL; + + fpu_kernel_xstate_size = kernel_size; + fpu_user_xstate_size = user_size; - /* - * User space is always in standard format. - */ - fpu_user_xstate_size = xsave_size; return 0; } -- Gitee From 0dcaf3821decd0247fc01fa1dfaee947807745f3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 01:09:34 +0200 Subject: [PATCH 146/188] x86/fpu: Move xstate size to fpu_*_cfg mainline inclusion from mainline-v5.16-rc1 commit 2bd264bce238cedbf00bde1f28ad51ba45b9114e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2bd264bce238 x86/fpu: Move xstate size to fpu_*_cfg. -------------------------------- Use the new kernel and user space config storage to store and retrieve the XSTATE buffer sizes. The default and the maximum size are the same for now, but will change when support for dynamically enabled features is added. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211014230739.296830097@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 8 ++++---- arch/x86/kernel/fpu/init.c | 31 ++++++++++++++----------------- arch/x86/kernel/fpu/internal.h | 2 -- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/signal.c | 6 +++--- arch/x86/kernel/fpu/xstate.c | 32 ++++++++++++++++++-------------- arch/x86/kernel/fpu/xstate.h | 2 +- 7 files changed, 41 insertions(+), 42 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3512bb241d95..69abf3a2299d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -298,7 +298,7 @@ void fpu_sync_fpstate(struct fpu *fpu) static inline unsigned int init_fpstate_copy_size(void) { if (!use_xsave()) - return fpu_kernel_xstate_size; + return fpu_kernel_cfg.default_size; /* XSAVE(S) just needs the legacy and the xstate header part */ return sizeof(init_fpstate.regs.xsave); @@ -347,8 +347,8 @@ void fpstate_reset(struct fpu *fpu) fpu->fpstate = &fpu->__fpstate; /* Initialize sizes and feature masks */ - fpu->fpstate->size = fpu_kernel_xstate_size; - fpu->fpstate->user_size = fpu_user_xstate_size; + fpu->fpstate->size = fpu_kernel_cfg.default_size; + fpu->fpstate->user_size = fpu_user_cfg.default_size; fpu->fpstate->xfeatures = xfeatures_mask_all; fpu->fpstate->user_xfeatures = xfeatures_mask_uabi(); } @@ -420,7 +420,7 @@ int fpu_clone(struct task_struct *dst) void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { *offset = offsetof(struct thread_struct, fpu.__fpstate.regs); - *size = fpu_kernel_xstate_size; + *size = fpu_kernel_cfg.default_size; } /* diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index c9293ade321d..58043ed08662 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -133,14 +133,6 @@ static void __init fpu__init_system_generic(void) fpu__init_system_mxcsr(); } -/* - * Size of the FPU context state. All tasks in the system use the - * same context size, regardless of what portion they use. - * This is inherent to the XSAVE architecture which puts all state - * components into a single, continuous memory block: - */ -unsigned int fpu_kernel_xstate_size __ro_after_init; - /* Get alignment of the TYPE. */ #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) @@ -171,7 +163,7 @@ static void __init fpu__init_task_struct_size(void) * Add back the dynamically-calculated register state * size. */ - task_size += fpu_kernel_xstate_size; + task_size += fpu_kernel_cfg.default_size; /* * We dynamically size 'struct fpu', so we require that @@ -195,25 +187,30 @@ static void __init fpu__init_task_struct_size(void) */ static void __init fpu__init_system_xstate_size_legacy(void) { + unsigned int size; + /* - * Note that xstate sizes might be overwritten later during - * fpu__init_system_xstate(). + * Note that the size configuration might be overwritten later + * during fpu__init_system_xstate(). */ if (!cpu_feature_enabled(X86_FEATURE_FPU)) - fpu_kernel_xstate_size = sizeof(struct swregs_state); + size = sizeof(struct swregs_state); else if (cpu_feature_enabled(X86_FEATURE_FXSR)) - fpu_kernel_xstate_size = sizeof(struct fxregs_state); + size = sizeof(struct fxregs_state); else - fpu_kernel_xstate_size = sizeof(struct fregs_state); + size = sizeof(struct fregs_state); - fpu_user_xstate_size = fpu_kernel_xstate_size; + fpu_kernel_cfg.max_size = size; + fpu_kernel_cfg.default_size = size; + fpu_user_cfg.max_size = size; + fpu_user_cfg.default_size = size; fpstate_reset(¤t->thread.fpu); } static void __init fpu__init_init_fpstate(void) { /* Bring init_fpstate size and features up to date */ - init_fpstate.size = fpu_kernel_xstate_size; + init_fpstate.size = fpu_kernel_cfg.max_size; init_fpstate.xfeatures = xfeatures_mask_all; } @@ -234,7 +231,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_generic(); fpu__init_system_xstate_size_legacy(); - fpu__init_system_xstate(); + fpu__init_system_xstate(fpu_kernel_cfg.max_size); fpu__init_task_struct_size(); fpu__init_init_fpstate(); } diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index 5c4f71ff6ae9..e1d8a352f12d 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -2,8 +2,6 @@ #ifndef __X86_KERNEL_FPU_INTERNAL_H #define __X86_KERNEL_FPU_INTERNAL_H -extern unsigned int fpu_kernel_xstate_size; -extern unsigned int fpu_user_xstate_size; extern struct fpstate init_fpstate; /* CPU feature check wrappers */ diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index f8c485ab73f5..437d7c930c0b 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -153,7 +153,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, /* * A whole standard-format XSAVE buffer is needed: */ - if (pos != 0 || count != fpu_user_xstate_size) + if (pos != 0 || count != fpu_user_cfg.max_size) return -EFAULT; if (!kbuf) { diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index f9af1747be6e..fab440369663 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -503,7 +503,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long __init fpu__get_fpstate_size(void) { - unsigned long ret = fpu_user_xstate_size; + unsigned long ret = fpu_user_cfg.max_size; if (use_xsave()) ret += FP_XSTATE_MAGIC2_SIZE; @@ -531,12 +531,12 @@ unsigned long __init fpu__get_fpstate_size(void) */ void __init fpu__init_prepare_fx_sw_frame(void) { - int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE; + int size = fpu_user_cfg.default_size + FP_XSTATE_MAGIC2_SIZE; fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; fx_sw_reserved.xfeatures = xfeatures_mask_uabi(); - fx_sw_reserved.xstate_size = fpu_user_xstate_size; + fx_sw_reserved.xstate_size = fpu_user_cfg.default_size; if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 98ee897e61dd..b3f1ca404ae2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -77,13 +77,6 @@ static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init = static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; -/* - * The XSAVE area of kernel can be in standard or compacted format; - * it is always in standard format for user mode. This is the user - * mode standard format size used for signal and ptrace frames. - */ -unsigned int fpu_user_xstate_size __ro_after_init; - /* * Return whether the system supports a given xfeature. * @@ -716,8 +709,11 @@ static int __init init_xstate_size(void) if (!paranoid_xstate_size_valid(kernel_size)) return -EINVAL; - fpu_kernel_xstate_size = kernel_size; - fpu_user_xstate_size = user_size; + /* Keep it the same for now */ + fpu_kernel_cfg.max_size = kernel_size; + fpu_kernel_cfg.default_size = kernel_size; + fpu_user_cfg.max_size = user_size; + fpu_user_cfg.default_size = user_size; return 0; } @@ -726,11 +722,18 @@ static int __init init_xstate_size(void) * We enabled the XSAVE hardware, but something went wrong and * we can not use it. Disable it. */ -static void __init fpu__init_disable_system_xstate(void) +static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) { xfeatures_mask_all = 0; cr4_clear_bits(X86_CR4_OSXSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE); + + /* Restore the legacy size.*/ + fpu_kernel_cfg.max_size = legacy_size; + fpu_kernel_cfg.default_size = legacy_size; + fpu_user_cfg.max_size = legacy_size; + fpu_user_cfg.default_size = legacy_size; + fpstate_reset(¤t->thread.fpu); } @@ -738,7 +741,7 @@ static void __init fpu__init_disable_system_xstate(void) * Enable and initialize the xsave feature. * Called once per system bootup. */ -void __init fpu__init_system_xstate(void) +void __init fpu__init_system_xstate(unsigned int legacy_size) { unsigned int eax, ebx, ecx, edx; u64 xfeatures; @@ -810,7 +813,8 @@ void __init fpu__init_system_xstate(void) * Update info used for ptrace frames; use standard-format size and no * supervisor xstates: */ - update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_uabi()); + update_regset_xstate_info(fpu_user_cfg.max_size, + xfeatures_mask_uabi()); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); @@ -830,13 +834,13 @@ void __init fpu__init_system_xstate(void) print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask_all, - fpu_kernel_xstate_size, + fpu_kernel_cfg.max_size, boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); return; out_disable: /* something went wrong, try to boot without any XSAVE support */ - fpu__init_disable_system_xstate(); + fpu__init_disable_system_xstate(legacy_size); } /* diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 379dbfa4f526..3d45eb04471b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -31,7 +31,7 @@ extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void extern void fpu__init_cpu_xstate(void); -extern void fpu__init_system_xstate(void); +extern void fpu__init_system_xstate(unsigned int legacy_size); extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); -- Gitee From 25bb492ff1f23c682261628512c137a3d3fbe17f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 01:09:35 +0200 Subject: [PATCH 147/188] x86/fpu: Move xstate feature masks to fpu_*_cfg mainline inclusion from mainline-v5.16-rc1 commit 1c253ff2287fe31307a67938c4487936db967ff5 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1c253ff2287f x86/fpu: Move xstate feature masks to fpu_*_cfg. -------------------------------- Move the feature mask storage to the kernel and user config structs. Default and maximum feature set are the same for now. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211014230739.352041752@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 10 +++--- arch/x86/kernel/fpu/core.c | 4 +-- arch/x86/kernel/fpu/init.c | 2 +- arch/x86/kernel/fpu/signal.c | 3 +- arch/x86/kernel/fpu/xstate.c | 57 ++++++++++++++++--------------- 5 files changed, 38 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 61fcb15d880a..fe7c9af9ea42 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -78,11 +78,9 @@ XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) -extern u64 xfeatures_mask_all; - static inline u64 xfeatures_mask_supervisor(void) { - return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED; + return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; } /* @@ -91,7 +89,7 @@ static inline u64 xfeatures_mask_supervisor(void) */ static inline u64 xfeatures_mask_uabi(void) { - return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; + return fpu_kernel_cfg.max_features & XFEATURE_MASK_USER_SUPPORTED; } /* @@ -102,7 +100,7 @@ static inline u64 xfeatures_mask_uabi(void) */ static inline u64 xfeatures_mask_restore_user(void) { - return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE; + return fpu_kernel_cfg.max_features & XFEATURE_MASK_USER_RESTORE; } /* @@ -111,7 +109,7 @@ static inline u64 xfeatures_mask_restore_user(void) */ static inline u64 xfeatures_mask_fpstate(void) { - return xfeatures_mask_all & \ + return fpu_kernel_cfg.max_features & \ (XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 69abf3a2299d..501e21c341f1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -349,8 +349,8 @@ void fpstate_reset(struct fpu *fpu) /* Initialize sizes and feature masks */ fpu->fpstate->size = fpu_kernel_cfg.default_size; fpu->fpstate->user_size = fpu_user_cfg.default_size; - fpu->fpstate->xfeatures = xfeatures_mask_all; - fpu->fpstate->user_xfeatures = xfeatures_mask_uabi(); + fpu->fpstate->xfeatures = fpu_kernel_cfg.default_features; + fpu->fpstate->user_xfeatures = fpu_user_cfg.default_features; } #if IS_ENABLED(CONFIG_KVM) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 58043ed08662..7074154131e6 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -211,7 +211,7 @@ static void __init fpu__init_init_fpstate(void) { /* Bring init_fpstate size and features up to date */ init_fpstate.size = fpu_kernel_cfg.max_size; - init_fpstate.xfeatures = xfeatures_mask_all; + init_fpstate.xfeatures = fpu_kernel_cfg.max_features; } /* diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index fab440369663..c14f477f5651 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -417,7 +417,8 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, u64 mask = user_xfeatures | xfeatures_mask_supervisor(); fpregs->xsave.header.xfeatures &= mask; - success = !os_xrstor_safe(&fpregs->xsave, xfeatures_mask_all); + success = !os_xrstor_safe(&fpregs->xsave, + fpu_kernel_cfg.max_features); } else { success = !fxrstor_safe(&fpregs->fxsave); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b3f1ca404ae2..31cd839d84ca 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -62,12 +62,6 @@ static short xsave_cpuid_features[] __initdata = { X86_FEATURE_ENQCMD, }; -/* - * This represents the full set of bits that should ever be set in a kernel - * XSAVE buffer, both supervisor and user xstates. - */ -u64 xfeatures_mask_all __ro_after_init; - static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = @@ -84,7 +78,7 @@ static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init */ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) { - u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all; + u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features; if (unlikely(feature_name)) { long xfeature_idx, max_idx; @@ -134,7 +128,7 @@ static bool xfeature_is_supervisor(int xfeature_nr) */ void fpu__init_cpu_xstate(void) { - if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all) + if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features) return; cr4_set_bits(X86_CR4_OSXSAVE); @@ -144,7 +138,7 @@ void fpu__init_cpu_xstate(void) * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user * states can be set here. */ - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_uabi()); + xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); /* * MSR_IA32_XSS sets supervisor states managed by XSAVES. @@ -157,7 +151,7 @@ void fpu__init_cpu_xstate(void) static bool xfeature_enabled(enum xfeature xfeature) { - return xfeatures_mask_all & BIT_ULL(xfeature); + return fpu_kernel_cfg.max_features & BIT_ULL(xfeature); } /* @@ -183,7 +177,7 @@ static void __init setup_xstate_features(void) xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, xmm_space); - for_each_extended_xfeature(i, xfeatures_mask_all) { + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); xstate_sizes[i] = eax; @@ -288,14 +282,14 @@ static void __init setup_xstate_comp_offsets(void) xmm_space); if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) { - for_each_extended_xfeature(i, xfeatures_mask_all) + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) xstate_comp_offsets[i] = xstate_offsets[i]; return; } next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for_each_extended_xfeature(i, xfeatures_mask_all) { + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { if (xfeature_is_aligned(i)) next_offset = ALIGN(next_offset, 64); @@ -319,7 +313,7 @@ static void __init setup_supervisor_only_offsets(void) next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for_each_extended_xfeature(i, xfeatures_mask_all) { + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { if (!xfeature_is_supervisor(i)) continue; @@ -338,7 +332,7 @@ static void __init print_xstate_offset_size(void) { int i; - for_each_extended_xfeature(i, xfeatures_mask_all) { + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, xstate_comp_offsets[i], i, xstate_sizes[i]); } @@ -401,7 +395,7 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); print_xstate_features(); - xstate_init_xcomp_bv(&init_fpstate.regs.xsave, xfeatures_mask_all); + xstate_init_xcomp_bv(&init_fpstate.regs.xsave, fpu_kernel_cfg.max_features); /* * Init all the features state with header.xfeatures being 0x0 @@ -570,7 +564,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; - for_each_extended_xfeature(i, xfeatures_mask_all) { + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { if (!check_xstate_against_struct(i)) return false; /* @@ -724,7 +718,7 @@ static int __init init_xstate_size(void) */ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) { - xfeatures_mask_all = 0; + fpu_kernel_cfg.max_features = 0; cr4_clear_bits(X86_CR4_OSXSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE); @@ -768,13 +762,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) * Find user xstates supported by the processor. */ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xfeatures_mask_all = eax + ((u64)edx << 32); + fpu_kernel_cfg.max_features = eax + ((u64)edx << 32); /* * Find supervisor xstates supported by the processor. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); - xfeatures_mask_all |= ecx + ((u64)edx << 32); + fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32); if ((xfeatures_mask_uabi() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { /* @@ -783,7 +777,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) * booting without it. This is too early to BUG(). */ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", - xfeatures_mask_all); + fpu_kernel_cfg.max_features); goto out_disable; } @@ -792,14 +786,21 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) */ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { if (!boot_cpu_has(xsave_cpuid_features[i])) - xfeatures_mask_all &= ~BIT_ULL(i); + fpu_kernel_cfg.max_features &= ~BIT_ULL(i); } - xfeatures_mask_all &= XFEATURE_MASK_USER_SUPPORTED | + fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED; + fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; + fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; + + /* Identical for now */ + fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features; + fpu_user_cfg.default_features = fpu_user_cfg.max_features; + /* Store it for paranoia check at the end */ - xfeatures = xfeatures_mask_all; + xfeatures = fpu_kernel_cfg.max_features; /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); @@ -825,15 +826,15 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) * Paranoia check whether something in the setup modified the * xfeatures mask. */ - if (xfeatures != xfeatures_mask_all) { + if (xfeatures != fpu_kernel_cfg.max_features) { pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n", - xfeatures, xfeatures_mask_all); + xfeatures, fpu_kernel_cfg.max_features); goto out_disable; } print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", - xfeatures_mask_all, + fpu_kernel_cfg.max_features, fpu_kernel_cfg.max_size, boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); return; @@ -908,7 +909,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) * We should not ever be requesting features that we * have not enabled. */ - WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)), + WARN_ONCE(!(fpu_kernel_cfg.max_features & BIT_ULL(xfeature_nr)), "get of unsupported state"); /* * This assumes the last 'xsave*' instruction to -- Gitee From b86e7602be156cd0cf61d9c09f5f7f765aeff3bd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 01:09:37 +0200 Subject: [PATCH 148/188] x86/fpu: Mop up xfeatures_mask_uabi() mainline inclusion from mainline-v5.16-rc1 commit daddee24731938781b7876d20335ea3754d23484 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit daddee247319 x86/fpu: Mop up xfeatures_mask_uabi(). -------------------------------- Use the new fpu_user_cfg to retrieve the information instead of xfeatures_mask_uabi() which will be no longer correct when dynamically enabled features become available. Using fpu_user_cfg is appropriate when setting XCOMP_BV in the init_fpstate since it has space allocated for "max_features". But, normal fpstates might only have space for default xfeatures. Since XRSTOR* derives the format of the XSAVE buffer from XCOMP_BV, this can lead to XRSTOR reading out of bounds. So when copying actively used fpstate, simply read the XCOMP_BV features bits directly out of the fpstate instead. This correction courtesy of Dave Hansen Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211014230739.408879849@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 9 --------- arch/x86/kernel/fpu/core.c | 4 ++-- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 6 +++--- 4 files changed, 6 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index fe7c9af9ea42..3c890b97f195 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -83,15 +83,6 @@ static inline u64 xfeatures_mask_supervisor(void) return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; } -/* - * The xfeatures which are enabled in XCR0 and expected to be in ptrace - * buffers and signal frames. - */ -static inline u64 xfeatures_mask_uabi(void) -{ - return fpu_kernel_cfg.max_features & XFEATURE_MASK_USER_SUPPORTED; -} - /* * The xfeatures which are restored by the kernel when returning to user * mode. This is not necessarily the same as xfeatures_mask_uabi() as the diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 501e21c341f1..5acc077cb9f1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -237,7 +237,7 @@ int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, } /* Ensure that XCOMP_BV is set up for XSAVES */ - xstate_init_xcomp_bv(&kstate->regs.xsave, xfeatures_mask_uabi()); + xstate_init_xcomp_bv(&kstate->regs.xsave, kstate->xfeatures); return 0; } EXPORT_SYMBOL_GPL(fpu_copy_kvm_uabi_to_fpstate); @@ -333,7 +333,7 @@ void fpstate_init_user(struct fpstate *fpstate) return; } - xstate_init_xcomp_bv(&fpstate->regs.xsave, xfeatures_mask_uabi()); + xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures); if (cpu_feature_enabled(X86_FEATURE_FXSR)) fpstate_init_fxstate(fpstate); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index c14f477f5651..3e42e6e8b56c 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -536,7 +536,7 @@ void __init fpu__init_prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; - fx_sw_reserved.xfeatures = xfeatures_mask_uabi(); + fx_sw_reserved.xfeatures = fpu_user_cfg.default_features; fx_sw_reserved.xstate_size = fpu_user_cfg.default_size; if (IS_ENABLED(CONFIG_IA32_EMULATION) || diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 31cd839d84ca..b66526d983c7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -770,7 +770,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32); - if ((xfeatures_mask_uabi() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { + if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { /* * This indicates that something really unexpected happened * with the enumeration. Disable XSAVE and try to continue @@ -815,7 +815,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) * supervisor xstates: */ update_regset_xstate_info(fpu_user_cfg.max_size, - xfeatures_mask_uabi()); + fpu_user_cfg.max_features); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); @@ -853,7 +853,7 @@ void fpu__resume_cpu(void) * Restore XCR0 on xsave capable CPUs: */ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_uabi()); + xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); /* * Restore IA32_XSS. The same CPUID bit enumerates support -- Gitee From e655a8b0086d84a663a2d637ef5b22ae756c1b2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 01:09:38 +0200 Subject: [PATCH 149/188] x86/fpu: Rework restore_regs_from_fpstate() mainline inclusion from mainline-v5.16-rc1 commit category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit eda32f4f93b4 x86/fpu: Rework restore_regs_from_fpstate(). -------------------------------- xfeatures_mask_fpstate() is no longer valid when dynamically enabled features come into play. Rework restore_regs_from_fpstate() so it takes a constant mask which will then be applied against the maximum feature set so that the restore operation brings all features which are not in the xsave buffer xfeature bitmap into init state. This ensures that if the previous task used a dynamically enabled feature that the task which restores has all unused components properly initialized. Cleanup the last user of xfeatures_mask_fpstate() as well and remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211014230739.461348278@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 27 +++++++-------------------- arch/x86/kernel/fpu/context.h | 6 +----- arch/x86/kernel/fpu/core.c | 17 ++++++++++++++--- arch/x86/kernel/fpu/xstate.c | 2 +- 4 files changed, 23 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 3c890b97f195..61ae396bc6e7 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -78,30 +78,17 @@ XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) -static inline u64 xfeatures_mask_supervisor(void) -{ - return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; -} - /* - * The xfeatures which are restored by the kernel when returning to user - * mode. This is not necessarily the same as xfeatures_mask_uabi() as the - * kernel does not manage all XCR0 enabled features via xsave/xrstor as - * some of them have to be switched eagerly on context switch and exec(). + * The feature mask required to restore FPU state: + * - All user states which are not eagerly switched in switch_to()/exec() + * - The suporvisor states */ -static inline u64 xfeatures_mask_restore_user(void) -{ - return fpu_kernel_cfg.max_features & XFEATURE_MASK_USER_RESTORE; -} +#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ + XFEATURE_MASK_SUPERVISOR_SUPPORTED) -/* - * Like xfeatures_mask_restore_user() but additionally restors the - * supported supervisor states. - */ -static inline u64 xfeatures_mask_fpstate(void) +static inline u64 xfeatures_mask_supervisor(void) { - return fpu_kernel_cfg.max_features & \ - (XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED); + return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; } static inline u64 xfeatures_mask_independent(void) diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index f8f510519688..a06ebf315d83 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -61,8 +61,6 @@ static inline void fpregs_restore_userregs(void) return; if (!fpregs_state_valid(fpu, cpu)) { - u64 mask; - /* * This restores _all_ xstate which has not been * established yet. @@ -72,9 +70,7 @@ static inline void fpregs_restore_userregs(void) * flush_thread(). So it is excluded because it might be * not up to date in current->thread.fpu.xsave state. */ - mask = xfeatures_mask_restore_user() | - xfeatures_mask_supervisor(); - restore_fpregs_from_fpstate(fpu->fpstate, mask); + restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE); fpregs_activate(fpu); fpu->last_cpu = cpu; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 5acc077cb9f1..0fb9defaba47 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -150,6 +150,17 @@ void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask) } if (use_xsave()) { + /* + * Restoring state always needs to modify all features + * which are in @mask even if the current task cannot use + * extended features. + * + * So fpstate->xfeatures cannot be used here, because then + * a feature for which the task has no permission but was + * used by the previous task would not go into init state. + */ + mask = fpu_kernel_cfg.max_features & mask; + os_xrstor(&fpstate->regs.xsave, mask); } else { if (use_fxsr()) @@ -161,7 +172,7 @@ void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask) void fpu_reset_from_exception_fixup(void) { - restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); + restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE); } #if IS_ENABLED(CONFIG_KVM) @@ -179,7 +190,7 @@ void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) } if (rstor) { - restore_mask &= xfeatures_mask_fpstate(); + restore_mask &= XFEATURE_MASK_FPSTATE; restore_fpregs_from_fpstate(rstor->fpstate, restore_mask); } @@ -518,7 +529,7 @@ void fpu__clear_user_states(struct fpu *fpu) } /* Reset user states in registers. */ - restore_fpregs_from_init_fpstate(xfeatures_mask_restore_user()); + restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE); /* * Now all FPU registers have their desired values. Inform the FPU diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b66526d983c7..05fce3d6f52e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -344,7 +344,7 @@ static void __init print_xstate_offset_size(void) */ static __init void os_xrstor_booting(struct xregs_state *xstate) { - u64 mask = xfeatures_mask_fpstate(); + u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE; u32 lmask = mask; u32 hmask = mask >> 32; int err; -- Gitee From 9e979953653955a1c93b16ef973ed80cfda094ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2021 01:09:40 +0200 Subject: [PATCH 150/188] x86/fpu/xstate: Move remaining xfeature helpers to core mainline inclusion from mainline-v5.16-rc1 commit d72c87018d00782c3ac0a844c372158087debc0a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit d72c87018d00 x86/fpu/xstate: Move remaining xfeature helpers to core. -------------------------------- Now that everything is mopped up, move all the helpers and prototypes into the core header. They are not required by the outside. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211014230739.514095101@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 13 ------------- arch/x86/kernel/fpu/xstate.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 61ae396bc6e7..43ae89d4bcd2 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -86,19 +86,6 @@ #define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ XFEATURE_MASK_SUPERVISOR_SUPPORTED) -static inline u64 xfeatures_mask_supervisor(void) -{ - return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; -} - -static inline u64 xfeatures_mask_independent(void) -{ - if (!boot_cpu_has(X86_FEATURE_ARCH_LBR)) - return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; - - return XFEATURE_MASK_INDEPENDENT; -} - extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 3d45eb04471b..a1aa0bad2c9c 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -35,6 +35,19 @@ extern void fpu__init_system_xstate(unsigned int legacy_size); extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); +static inline u64 xfeatures_mask_supervisor(void) +{ + return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; +} + +static inline u64 xfeatures_mask_independent(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) + return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; + + return XFEATURE_MASK_INDEPENDENT; +} + /* XSAVE/XRSTOR wrapper functions */ #ifdef CONFIG_X86_64 -- Gitee From 41799a235570499fb2c76434b9472ec5233e9147 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Oct 2021 20:55:49 +0200 Subject: [PATCH 151/188] x86/fpu: Prepare for sanitizing KVM FPU code mainline inclusion from mainline-v5.16-rc1 commit 75c52dad5e327605f1025f399dafdf4aaf5dae9c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 75c52dad5e32 x86/fpu: Prepare for sanitizing KVM FPU code. -------------------------------- For the upcoming AMX support it's necessary to do a proper integration with KVM. To avoid more nasty hackery in KVM which violate encapsulation extend struct fpu and fpstate so the fpstate switching can be consolidated and simplified. Currently KVM allocates two FPU structs which are used for saving the user state of the vCPU thread and restoring the guest state when entering vcpu_run() and doing the reverse operation before leaving vcpu_run(). With the new fpstate mechanism this can be reduced to one extra buffer by swapping the fpstate pointer in current::thread::fpu. This makes the upcoming support for AMX and XFD simpler because then fpstate information (features, sizes, xfd) are always consistent and it does not require any nasty workarounds. Add fpu::__task_fpstate to save the regular fpstate pointer while the task is inside vcpu_run(). Add some state fields to fpstate to indicate the nature of the state. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211022185312.896403942@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 44 +++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index a32be07f1418..c72cb2269adc 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -322,8 +322,32 @@ struct fpstate { /* @user_xfeatures: xfeatures valid in UABI buffers */ u64 user_xfeatures; + /* @is_valloc: Indicator for dynamically allocated state */ + unsigned int is_valloc : 1; + + /* @is_guest: Indicator for guest state (KVM) */ + unsigned int is_guest : 1; + + /* + * @is_confidential: Indicator for KVM confidential mode. + * The FPU registers are restored by the + * vmentry firmware from encrypted guest + * memory. On vmexit the FPU registers are + * saved by firmware to encrypted guest memory + * and the registers are scrubbed before + * returning to the host. So there is no + * content which is worth saving and restoring. + * The fpstate has to be there so that + * preemption and softirq FPU usage works + * without special casing. + */ + unsigned int is_confidential : 1; + + /* @in_use: State is in use */ + unsigned int in_use : 1; + /* @regs: The register state union for all supported formats */ - union fpregs_state regs; + union fpregs_state regs; /* @regs is dynamically sized! Don't add anything after @regs! */ } __aligned(64); @@ -363,6 +387,14 @@ struct fpu { */ struct fpstate *fpstate; + /* + * @__task_fpstate: + * + * Pointer to an inactive struct fpstate. Initialized to NULL. Is + * used only for KVM support to swap out the regular task fpstate. + */ + struct fpstate *__task_fpstate; + /* * @__fpstate: * @@ -378,6 +410,16 @@ struct fpu { */ }; +/* + * Guest pseudo FPU container + */ +struct fpu_guest { + /* + * @fpstate: Pointer to the allocated guest fpstate + */ + struct fpstate *fpstate; +}; + /* * FPU state configuration data. Initialized at boot time. Read only after init. */ -- Gitee From c79f05aa52d93d033fd961a8fa6abf9a658f0031 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Oct 2021 20:55:51 +0200 Subject: [PATCH 152/188] x86/fpu: Provide infrastructure for KVM FPU cleanup mainline inclusion from mainline-v5.16-rc1 commit 69f6ed1d14c6bcf712f4bb22a231c15eeab401e7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 69f6ed1d14c6 x86/fpu: Provide infrastructure for KVM FPU cleanup. -------------------------------- For the upcoming AMX support it's necessary to do a proper integration with KVM. Currently KVM allocates two FPU structs which are used for saving the user state of the vCPU thread and restoring the guest state when entering vcpu_run() and doing the reverse operation before leaving vcpu_run(). With the new fpstate mechanism this can be reduced to one extra buffer by swapping the fpstate pointer in current::thread::fpu. This makes the upcoming support for AMX and XFD simpler because then fpstate information (features, sizes, xfd) are always consistent and it does not require any nasty workarounds. Provide: - An allocator which initializes the state properly - A replacement for the existing FPU swap mechanim Aside of the reduced memory footprint, this also makes state switching more efficient when TIF_FPU_NEED_LOAD is set. It does not require a memcpy as the state is already correct in the to be swapped out fpstate. The existing interfaces will be removed once KVM is converted over. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211022185312.954684740@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 13 ++++++ arch/x86/kernel/fpu/core.c | 85 +++++++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 4fce3152af17..4bfe39aa4c74 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -108,9 +108,22 @@ extern void fpu_init_fpstate_user(struct fpu *fpu); extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); /* KVM specific functions */ +extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); +extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); +extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); extern void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask); extern int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, u32 *pkru); extern void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf, unsigned int size, u32 pkru); +static inline void fpstate_set_confidential(struct fpu_guest *gfpu) +{ + gfpu->fpstate->is_confidential = true; +} + +static inline bool fpstate_is_confidential(struct fpu_guest *gfpu) +{ + return gfpu->fpstate->is_confidential; +} + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0fb9defaba47..748d7b2fcacb 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -176,6 +176,75 @@ void fpu_reset_from_exception_fixup(void) } #if IS_ENABLED(CONFIG_KVM) +static void __fpstate_reset(struct fpstate *fpstate); + +bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) +{ + struct fpstate *fpstate; + unsigned int size; + + size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64); + fpstate = vzalloc(size); + if (!fpstate) + return false; + + __fpstate_reset(fpstate); + fpstate_init_user(fpstate); + fpstate->is_valloc = true; + fpstate->is_guest = true; + + gfpu->fpstate = fpstate; + return true; +} +EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate); + +void fpu_free_guest_fpstate(struct fpu_guest *gfpu) +{ + struct fpstate *fps = gfpu->fpstate; + + if (!fps) + return; + + if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use)) + return; + + gfpu->fpstate = NULL; + vfree(fps); +} +EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate); + +int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) +{ + struct fpstate *guest_fps = guest_fpu->fpstate; + struct fpu *fpu = ¤t->thread.fpu; + struct fpstate *cur_fps = fpu->fpstate; + + fpregs_lock(); + if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD)) + save_fpregs_to_fpstate(fpu); + + /* Swap fpstate */ + if (enter_guest) { + fpu->__task_fpstate = cur_fps; + fpu->fpstate = guest_fps; + guest_fps->in_use = true; + } else { + guest_fps->in_use = false; + fpu->fpstate = fpu->__task_fpstate; + fpu->__task_fpstate = NULL; + } + + cur_fps = fpu->fpstate; + + if (!cur_fps->is_confidential) + restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE); + + fpregs_mark_activate(); + fpregs_unlock(); + return 0; +} +EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); + void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) { fpregs_lock(); @@ -352,16 +421,20 @@ void fpstate_init_user(struct fpstate *fpstate) fpstate_init_fstate(fpstate); } +static void __fpstate_reset(struct fpstate *fpstate) +{ + /* Initialize sizes and feature masks */ + fpstate->size = fpu_kernel_cfg.default_size; + fpstate->user_size = fpu_user_cfg.default_size; + fpstate->xfeatures = fpu_kernel_cfg.default_features; + fpstate->user_xfeatures = fpu_user_cfg.default_features; +} + void fpstate_reset(struct fpu *fpu) { /* Set the fpstate pointer to the default fpstate */ fpu->fpstate = &fpu->__fpstate; - - /* Initialize sizes and feature masks */ - fpu->fpstate->size = fpu_kernel_cfg.default_size; - fpu->fpstate->user_size = fpu_user_cfg.default_size; - fpu->fpstate->xfeatures = fpu_kernel_cfg.default_features; - fpu->fpstate->user_xfeatures = fpu_user_cfg.default_features; + __fpstate_reset(fpu->fpstate); } #if IS_ENABLED(CONFIG_KVM) -- Gitee From 55807a741c343195f686ab6f9dab8f8c6d8d56cc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Oct 2021 20:55:53 +0200 Subject: [PATCH 153/188] x86/kvm: Convert FPU handling to a single swap buffer mainline inclusion from mainline-v5.16-rc1 commit d69c1382e1b73a0496a70872a035ca2b22d074e5 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit d69c1382e1b7 x86/kvm: Convert FPU handling to a single swap buffer. -------------------------------- For the upcoming AMX support it's necessary to do a proper integration with KVM. Currently KVM allocates two FPU structs which are used for saving the user state of the vCPU thread and restoring the guest state when entering vcpu_run() and doing the reverse operation before leaving vcpu_run(). With the new fpstate mechanism this can be reduced to one extra buffer by swapping the fpstate pointer in current::thread::fpu. This makes the upcoming support for AMX and XFD simpler because then fpstate information (features, sizes, xfd) are always consistent and it does not require any nasty workarounds. Convert the KVM FPU code over to this new scheme. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211022185313.019454292@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 4 +- arch/x86/include/asm/kvm_host.h | 5 +-- arch/x86/kernel/fpu/core.c | 16 ++++---- arch/x86/kvm/x86.c | 69 +++++++++------------------------ 4 files changed, 31 insertions(+), 63 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 4bfe39aa4c74..8a453a0818f0 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -113,8 +113,8 @@ extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); extern void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask); -extern int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, u32 *pkru); -extern void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf, unsigned int size, u32 pkru); +extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); +extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); static inline void fpstate_set_confidential(struct fpu_guest *gfpu) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3d9da4629325..6ecc24e335ff 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -604,11 +604,10 @@ struct kvm_vcpu_arch { * * Note that while the PKRU state lives inside the fpu registers, * it is switched out separately at VMENTER and VMEXIT time. The - * "guest_fpu" state here contains the guest FPU context, with the + * "guest_fpstate" state here contains the guest FPU context, with the * host PRKU bits. */ - struct fpu *user_fpu; - struct fpu *guest_fpu; + struct fpu_guest guest_fpu; u64 xcr0; u64 guest_supported_xcr0; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 748d7b2fcacb..01fbf7c3e799 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -268,10 +268,10 @@ void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) } EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpu); -void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf, - unsigned int size, u32 pkru) +void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, + unsigned int size, u32 pkru) { - struct fpstate *kstate = fpu->fpstate; + struct fpstate *kstate = gfpu->fpstate; union fpregs_state *ustate = buf; struct membuf mb = { .p = buf, .left = size }; @@ -284,12 +284,12 @@ void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf, ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE; } } -EXPORT_SYMBOL_GPL(fpu_copy_fpstate_to_kvm_uabi); +EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi); -int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, - u32 *vpkru) +int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, + u64 xcr0, u32 *vpkru) { - struct fpstate *kstate = fpu->fpstate; + struct fpstate *kstate = gfpu->fpstate; const union fpregs_state *ustate = buf; struct pkru_state *xpkru; int ret; @@ -320,7 +320,7 @@ int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0, xstate_init_xcomp_bv(&kstate->regs.xsave, kstate->xfeatures); return 0; } -EXPORT_SYMBOL_GPL(fpu_copy_kvm_uabi_to_fpstate); +EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); #endif /* CONFIG_KVM */ void kernel_fpu_begin_mask(unsigned int kfpu_mask) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dfea149b850b..033fcb0e11ec 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -297,8 +297,6 @@ u64 __read_mostly host_xcr0; u64 __read_mostly supported_xcr0; EXPORT_SYMBOL_GPL(supported_xcr0); -static struct kmem_cache *x86_fpu_cache; - static struct kmem_cache *x86_emulator_cache; /* @@ -4524,17 +4522,18 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - fpu_copy_fpstate_to_kvm_uabi(vcpu->arch.guest_fpu, guest_xsave->region, - sizeof(guest_xsave->region), - vcpu->arch.pkru); + fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, + guest_xsave->region, + sizeof(guest_xsave->region), + vcpu->arch.pkru); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - return fpu_copy_kvm_uabi_to_fpstate(vcpu->arch.guest_fpu, - guest_xsave->region, - supported_xcr0, &vcpu->arch.pkru); + return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu, + guest_xsave->region, + supported_xcr0, &vcpu->arch.pkru); } static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, @@ -7968,18 +7967,11 @@ int kvm_arch_init(void *opaque) } r = -ENOMEM; - x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu), - __alignof__(struct fpu), SLAB_ACCOUNT, - NULL); - if (!x86_fpu_cache) { - printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n"); - goto out; - } x86_emulator_cache = kvm_alloc_emulator_cache(); if (!x86_emulator_cache) { pr_err("kvm: failed to allocate cache for x86 emulator\n"); - goto out_free_x86_fpu_cache; + goto out; } user_return_msrs = alloc_percpu(struct kvm_user_return_msrs); @@ -8022,8 +8014,6 @@ int kvm_arch_init(void *opaque) free_percpu(user_return_msrs); out_free_x86_emulator_cache: kmem_cache_destroy(x86_emulator_cache); -out_free_x86_fpu_cache: - kmem_cache_destroy(x86_fpu_cache); out: return r; } @@ -8051,7 +8041,6 @@ void kvm_arch_exit(void) kvm_mmu_vendor_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_emulator_cache); - kmem_cache_destroy(x86_fpu_cache); } int kvm_vcpu_halt(struct kvm_vcpu *vcpu) @@ -9338,23 +9327,17 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { /* - * Guests with protected state have guest_fpu == NULL which makes - * the swap only save the host state. Exclude PKRU from restore as - * it is restored separately in kvm_x86_ops.run(). + * Exclude PKRU from restore as restored separately in + * kvm_x86_ops.run(). */ - fpu_swap_kvm_fpu(vcpu->arch.user_fpu, vcpu->arch.guest_fpu, - ~XFEATURE_MASK_PKRU); + fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true); trace_kvm_fpu(1); } /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - /* - * Guests with protected state have guest_fpu == NULL which makes - * swap only restore the host state. - */ - fpu_swap_kvm_fpu(vcpu->arch.guest_fpu, vcpu->arch.user_fpu, ~0ULL); + fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } @@ -9856,7 +9839,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) vcpu_load(vcpu); - fxsave = &vcpu->arch.guest_fpu->fpstate->regs.fxsave; + fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave; memcpy(fpu->fpr, fxsave->st_space, 128); fpu->fcw = fxsave->cwd; fpu->fsw = fxsave->swd; @@ -9876,7 +9859,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) vcpu_load(vcpu); - fxsave = &vcpu->arch.guest_fpu->fpstate->regs.fxsave; + fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave; memcpy(fxsave->st_space, fpu->fpr, 128); fxsave->cwd = fpu->fcw; @@ -9994,22 +9977,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (!alloc_emulate_ctxt(vcpu)) goto free_wbinvd_dirty_mask; - vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vcpu->arch.user_fpu) { - pr_err("kvm: failed to allocate userspace's fpu\n"); - goto free_emulate_ctxt; - } - - vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vcpu->arch.guest_fpu) { + if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) { pr_err("kvm: failed to allocate vcpu's fpu\n"); - goto free_user_fpu; + goto free_emulate_ctxt; } - fpu_init_fpstate_user(vcpu->arch.user_fpu); - fpu_init_fpstate_user(vcpu->arch.guest_fpu); fx_init(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); @@ -10039,9 +10011,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return 0; free_guest_fpu: - kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); + fpu_free_guest_fpstate(&vcpu->arch.guest_fpu); free_emulate_ctxt: kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); free_wbinvd_dirty_mask: @@ -10092,8 +10062,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); - kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + fpu_free_guest_fpstate(&vcpu->arch.guest_fpu); kvm_hv_vcpu_uninit(vcpu); kvm_pmu_destroy(vcpu); @@ -10144,7 +10113,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.apf.halted = false; if (kvm_mpx_supported()) { - struct fpstate *fpstate = vcpu->arch.guest_fpu->fpstate; + struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; /* * To avoid have the INIT path from kvm_apic_has_events() that be -- Gitee From f163f0656956d25ff67c600d0bbddc6dfa667bc9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Oct 2021 20:55:54 +0200 Subject: [PATCH 154/188] x86/fpu: Remove old KVM FPU interface mainline inclusion from mainline-v5.16-rc1 commit 582b01b6ab2714a0a4d554cea7f0d4efeaa2154d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 582b01b6ab27 x86/fpu: Remove old KVM FPU interface. -------------------------------- No more users. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211022185313.074853631@linutronix.de Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 2 -- arch/x86/kernel/fpu/core.c | 32 -------------------------------- 2 files changed, 34 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 8a453a0818f0..058486b8ba6e 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -104,14 +104,12 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {} DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); /* fpstate-related functions which are exported to KVM */ -extern void fpu_init_fpstate_user(struct fpu *fpu); extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); -extern void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask); extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 01fbf7c3e799..9c475e2efd4d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -245,29 +245,6 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) } EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); -void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask) -{ - fpregs_lock(); - - if (save) { - struct fpstate *fpcur = current->thread.fpu.fpstate; - - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - memcpy(&save->fpstate->regs, &fpcur->regs, fpcur->size); - else - save_fpregs_to_fpstate(save); - } - - if (rstor) { - restore_mask &= XFEATURE_MASK_FPSTATE; - restore_fpregs_from_fpstate(rstor->fpstate, restore_mask); - } - - fpregs_mark_activate(); - fpregs_unlock(); -} -EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpu); - void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru) { @@ -437,15 +414,6 @@ void fpstate_reset(struct fpu *fpu) __fpstate_reset(fpu->fpstate); } -#if IS_ENABLED(CONFIG_KVM) -void fpu_init_fpstate_user(struct fpu *fpu) -{ - fpstate_reset(fpu); - fpstate_init_user(fpu->fpstate); -} -EXPORT_SYMBOL_GPL(fpu_init_fpstate_user); -#endif - /* Clone current's FPU state on fork */ int fpu_clone(struct task_struct *dst) { -- Gitee From 1ceab94a883ae1129471555f9956c2f12c1afbef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:55:05 -0700 Subject: [PATCH 155/188] signal: Add an optional check for altstack size mainline inclusion from mainline-v5.16-rc1 commit 1bdda24c4af64cd2d65dec5192ab624c5fee7ca0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 1bdda24c4af6 signal: Add an optional check for altstack size. -------------------------------- New x86 FPU features will be very large, requiring ~10k of stack in signal handlers. These new features require a new approach called "dynamic features". The kernel currently tries to ensure that altstacks are reasonably sized. Right now, on x86, sys_sigaltstack() requires a size of >=2k. However, that 2k is a constant. Simply raising that 2k requirement to >10k for the new features would break existing apps which have a compiled-in size of 2k. Instead of universally enforcing a larger stack, prohibit a process from using dynamic features without properly-sized altstacks. This must be enforced in two places: * A dynamic feature can not be enabled without an large-enough altstack for each process thread. * Once a dynamic feature is enabled, any request to install a too-small altstack will be rejected The dynamic feature enabling code must examine each thread in a process to ensure that the altstacks are large enough. Add a new lock (sigaltstack_lock()) to ensure that threads can not race and change their altstack after being examined. Add the infrastructure in form of a config option and provide empty stubs for architectures which do not need dynamic altstack size checks. This implementation will be fleshed out for x86 in a future patch called x86/arch_prctl: Add controls for dynamic XSTATE components [dhansen: commit message. ] Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-2-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/Kconfig | 3 +++ include/linux/signal.h | 6 ++++++ kernel/signal.c | 35 +++++++++++++++++++++++++++++------ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 7a8e3d45b2a1..d2c7203dfaef 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1119,6 +1119,9 @@ config ARCH_SPLIT_ARG64 If a 32-bit architecture requires 64-bit arguments to be split into pairs of 32-bit arguments, select this option. +config DYNAMIC_SIGFRAME + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/include/linux/signal.h b/include/linux/signal.h index b256f9c65661..3038a0610407 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -464,6 +464,12 @@ int __save_altstack(stack_t __user *, unsigned long); sas_ss_reset(t); \ } while (0); +#ifdef CONFIG_DYNAMIC_SIGFRAME +bool sigaltstack_size_valid(size_t ss_size); +#else +static inline bool sigaltstack_size_valid(size_t size) { return true; } +#endif /* !CONFIG_DYNAMIC_SIGFRAME */ + #ifdef CONFIG_PROC_FS struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); diff --git a/kernel/signal.c b/kernel/signal.c index 54f86e0b97f3..ce95a76fe60a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4010,11 +4010,29 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) return 0; } +#ifdef CONFIG_DYNAMIC_SIGFRAME +static inline void sigaltstack_lock(void) + __acquires(¤t->sighand->siglock) +{ + spin_lock_irq(¤t->sighand->siglock); +} + +static inline void sigaltstack_unlock(void) + __releases(¤t->sighand->siglock) +{ + spin_unlock_irq(¤t->sighand->siglock); +} +#else +static inline void sigaltstack_lock(void) { } +static inline void sigaltstack_unlock(void) { } +#endif + static int do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, size_t min_ss_size) { struct task_struct *t = current; + int ret = 0; if (oss) { memset(oss, 0, sizeof(stack_t)); @@ -4038,19 +4056,24 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, ss_mode != 0)) return -EINVAL; + sigaltstack_lock(); if (ss_mode == SS_DISABLE) { ss_size = 0; ss_sp = NULL; } else { if (unlikely(ss_size < min_ss_size)) - return -ENOMEM; + ret = -ENOMEM; + if (!sigaltstack_size_valid(ss_size)) + ret = -ENOMEM; } - - t->sas_ss_sp = (unsigned long) ss_sp; - t->sas_ss_size = ss_size; - t->sas_ss_flags = ss_flags; + if (!ret) { + t->sas_ss_sp = (unsigned long) ss_sp; + t->sas_ss_size = ss_size; + t->sas_ss_flags = ss_flags; + } + sigaltstack_unlock(); } - return 0; + return ret; } SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) -- Gitee From 088a75b378d7307be55073c52ffbb77d222da200 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:55:06 -0700 Subject: [PATCH 156/188] x86/signal: Implement sigaltstack size validation mainline inclusion from mainline-v5.16-rc1 commit 7cd60e43a6def40ecb75deb8decc677995970d0b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 3aac3ebea08f x86/signal: Implement sigaltstack size validation. -------------------------------- For historical reasons MINSIGSTKSZ is a constant which became already too small with AVX512 support. Add a mechanism to enforce strict checking of the sigaltstack size against the real size of the FPU frame. The strict check can be enabled via a config option and can also be controlled via the kernel command line option 'strict_sas_size' independent of the config switch. Enabling it might break existing applications which allocate a too small sigaltstack but 'work' because they never get a signal delivered. Though it can be handy to filter out binaries which are not yet aware of AT_MINSIGSTKSZ. Also the upcoming support for dynamically enabled FPU features requires a strict sanity check to ensure that: - Enabling of a dynamic feature, which changes the sigframe size fits into an enabled sigaltstack - Installing a too small sigaltstack after a dynamic feature has been added is not possible. Implement the base check which is controlled by config and command line options. Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-3-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- .../admin-guide/kernel-parameters.txt | 9 +++++ arch/x86/Kconfig | 17 ++++++++++ arch/x86/kernel/signal.c | 34 +++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 247acf7fc837..2547a1e0b8f9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5384,6 +5384,15 @@ stifb= [HW] Format: bpp:[:[:...]] + strict_sas_size= + [X86] + Format: + Enable or disable strict sigaltstack size checks + against the required signal frame size which + depends on the supported FPU features. This can + be used to filter out binaries which have + not yet been made aware of AT_MINSIGSTKSZ. + sunrpc.min_resvport= sunrpc.max_resvport= [NFS,SUNRPC] diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f39cfb5a6535..e0795d057745 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -111,6 +111,7 @@ config X86 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG select DCACHE_WORD_ACCESS + select DYNAMIC_SIGFRAME select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_CLOCKEVENTS @@ -2452,6 +2453,22 @@ config MODIFY_LDT_SYSCALL Saying 'N' here may make sense for embedded or server kernels. +config STRICT_SIGALTSTACK_SIZE + bool "Enforce strict size checking for sigaltstack" + depends on DYNAMIC_SIGFRAME + help + For historical reasons MINSIGSTKSZ is a constant which became + already too small with AVX512 support. Add a mechanism to + enforce strict checking of the sigaltstack size against the + real size of the FPU frame. This option enables the check + by default. It can also be controlled via the kernel command + line option 'strict_sas_size' independent of this config + switch. Enabling it might break existing applications which + allocate a too small sigaltstack but 'work' because they + never get a signal delivered. + + Say 'N' unless you want to really enforce this check. + source "kernel/livepatch/Kconfig" endmenu diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 5a26695dcc9c..a64536b8bffb 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -40,6 +40,7 @@ #include #include #include +#include #endif /* CONFIG_X86_64 */ #include @@ -907,6 +908,39 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) force_sig(SIGSEGV); } +#ifdef CONFIG_DYNAMIC_SIGFRAME +#ifdef CONFIG_STRICT_SIGALTSTACK_SIZE +static bool strict_sigaltstack_size __ro_after_init = true; +#else +static bool strict_sigaltstack_size __ro_after_init = false; +#endif + +static int __init strict_sas_size(char *arg) +{ + return kstrtobool(arg, &strict_sigaltstack_size); +} +__setup("strict_sas_size", strict_sas_size); + +/* + * MINSIGSTKSZ is 2048 and can't be changed despite the fact that AVX512 + * exceeds that size already. As such programs might never use the + * sigaltstack they just continued to work. While always checking against + * the real size would be correct, this might be considered a regression. + * + * Therefore avoid the sanity check, unless enforced by kernel config or + * command line option. + */ +bool sigaltstack_size_valid(size_t ss_size) +{ + lockdep_assert_held(¤t->sighand->siglock); + + if (strict_sigaltstack_size) + return ss_size > get_sigframe_size(); + + return true; +} +#endif /* CONFIG_DYNAMIC_SIGFRAME */ + #ifdef CONFIG_X86_X32_ABI COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) { -- Gitee From d2cf537bd8555512df9d20b0435e5d9bc32d3d19 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:07 -0700 Subject: [PATCH 157/188] x86/fpu/xstate: Provide xstate_calculate_size() mainline inclusion from mainline-v5.16-rc1 commit 84e4dccc8fce20b497388d756e12de5c9006eb48 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 84e4dccc8fce x86/fpu/xstate: Provide xstate_calculate_size(). -------------------------------- Split out the size calculation from the paranoia check so it can be used for recalculating buffer sizes when dynamically enabled features are supported. Signed-off-by: Chang S. Bae [ tglx: Adopted to changed base code ] Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-4-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 46 ++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 05fce3d6f52e..149e09a3923d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -549,6 +549,33 @@ static bool __init check_xstate_against_struct(int nr) return true; } +static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) +{ + unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + int i; + + for_each_extended_xfeature(i, xfeatures) { + /* Align from the end of the previous feature */ + if (xfeature_is_aligned(i)) + size = ALIGN(size, 64); + /* + * In compacted format the enabled features are packed, + * i.e. disabled features do not occupy space. + * + * In non-compacted format the offsets are fixed and + * disabled states still occupy space in the memory buffer. + */ + if (!compacted) + size = xfeature_uncompacted_offset(i); + /* + * Add the feature size even for non-compacted format + * to make the end result correct + */ + size += xfeature_size(i); + } + return size; +} + /* * This essentially double-checks what the cpu told us about * how large the XSAVE buffer needs to be. We are recalculating @@ -575,25 +602,8 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) XSTATE_WARN_ON(1); return false; } - - /* Align from the end of the previous feature */ - if (xfeature_is_aligned(i)) - size = ALIGN(size, 64); - /* - * In compacted format the enabled features are packed, - * i.e. disabled features do not occupy space. - * - * In non-compacted format the offsets are fixed and - * disabled states still occupy space in the memory buffer. - */ - if (!compacted) - size = xfeature_uncompacted_offset(i); - /* - * Add the feature size even for non-compacted format - * to make the end result correct - */ - size += xfeature_size(i); } + size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted); XSTATE_WARN_ON(size != kernel_size); return size == kernel_size; } -- Gitee From 40388d8655e04ac6814fa92d5c47a3e79a8c07fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:55:08 -0700 Subject: [PATCH 158/188] x86/fpu: Add members to struct fpu to cache permission information mainline inclusion from mainline-v5.16-rc1 commit 6f6a7c09c4065a5b140194dfcfe4cf7104fec4d2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 6f6a7c09c406 x86/fpu: Add members to struct fpu to cache permission information. -------------------------------- Dynamically enabled features can be requested by any thread of a running process at any time. The request does neither enable the feature nor allocate larger buffers. It just stores the permission to use the feature by adding the features to the permission bitmap and by calculating the required sizes for kernel and user space. The reallocation of the kernel buffer happens when the feature is used for the first time which is caught by an exception. The permission bitmap is then checked and if the feature is permitted, then it becomes fully enabled. If not, the task dies similarly to a task which uses an undefined instruction. The size information is precomputed to allow proper sigaltstack size checks once the feature is permitted, but not yet in use because otherwise this would open race windows where too small stacks could be installed causing a later fail on signal delivery. Initialize them to the default feature set and sizes. Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-5-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 46 ++++++++++++++++++++++++++++++++ arch/x86/kernel/fpu/core.c | 5 ++++ 2 files changed, 51 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index c72cb2269adc..c3ec56279767 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -352,6 +352,45 @@ struct fpstate { /* @regs is dynamically sized! Don't add anything after @regs! */ } __aligned(64); +struct fpu_state_perm { + /* + * @__state_perm: + * + * This bitmap indicates the permission for state components, which + * are available to a thread group. The permission prctl() sets the + * enabled state bits in thread_group_leader()->thread.fpu. + * + * All run time operations use the per thread information in the + * currently active fpu.fpstate which contains the xfeature masks + * and sizes for kernel and user space. + * + * This master permission field is only to be used when + * task.fpu.fpstate based checks fail to validate whether the task + * is allowed to expand it's xfeatures set which requires to + * allocate a larger sized fpstate buffer. + * + * Do not access this field directly. Use the provided helper + * function. Unlocked access is possible for quick checks. + */ + u64 __state_perm; + + /* + * @__state_size: + * + * The size required for @__state_perm. Only valid to access + * with sighand locked. + */ + unsigned int __state_size; + + /* + * @__user_state_size: + * + * The size required for @__state_perm user part. Only valid to + * access with sighand locked. + */ + unsigned int __user_state_size; +}; + /* * Highest level per task FPU state data structure that * contains the FPU register state plus various FPU @@ -395,6 +434,13 @@ struct fpu { */ struct fpstate *__task_fpstate; + /* + * @perm: + * + * Permission related information + */ + struct fpu_state_perm perm; + /* * @__fpstate: * diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9c475e2efd4d..b05f6a3b2057 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -412,6 +412,11 @@ void fpstate_reset(struct fpu *fpu) /* Set the fpstate pointer to the default fpstate */ fpu->fpstate = &fpu->__fpstate; __fpstate_reset(fpu->fpstate); + + /* Initialize the permission related info in fpu */ + fpu->perm.__state_perm = fpu_kernel_cfg.default_features; + fpu->perm.__state_size = fpu_kernel_cfg.default_size; + fpu->perm.__user_state_size = fpu_user_cfg.default_size; } /* Clone current's FPU state on fork */ -- Gitee From 300f972dbf30d5075ab4e535a8ed7d69d2506880 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:55:09 -0700 Subject: [PATCH 159/188] x86/fpu: Add fpu_state_config::legacy_features mainline inclusion from mainline-v5.16-rc1 commit c33f0a81a2cf3920465309ce683534751bb86485 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit c33f0a81a2cf x86/fpu: Add fpu_state_config::legacy_features. -------------------------------- The upcoming prctl() which is required to request the permission for a dynamically enabled feature will also provide an option to retrieve the supported features. If the CPU does not support XSAVE, the supported features would be 0 even when the CPU supports FP and SSE. Provide separate storage for the legacy feature set to avoid that and fill in the bits in the legacy init function. Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-6-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 7 +++++++ arch/x86/kernel/fpu/init.c | 9 ++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index c3ec56279767..595122fcaf51 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -503,6 +503,13 @@ struct fpu_state_config { * be requested by user space before usage. */ u64 default_features; + /* + * @legacy_features: + * + * Features which can be reported back to user space + * even without XSAVE support, i.e. legacy features FP + SSE + */ + u64 legacy_features; }; /* FPU state configuration information */ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 7074154131e6..621f4b6cac4a 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -193,12 +193,15 @@ static void __init fpu__init_system_xstate_size_legacy(void) * Note that the size configuration might be overwritten later * during fpu__init_system_xstate(). */ - if (!cpu_feature_enabled(X86_FEATURE_FPU)) + if (!cpu_feature_enabled(X86_FEATURE_FPU)) { size = sizeof(struct swregs_state); - else if (cpu_feature_enabled(X86_FEATURE_FXSR)) + } else if (cpu_feature_enabled(X86_FEATURE_FXSR)) { size = sizeof(struct fxregs_state); - else + fpu_user_cfg.legacy_features = XFEATURE_MASK_FPSSE; + } else { size = sizeof(struct fregs_state); + fpu_user_cfg.legacy_features = XFEATURE_MASK_FP; + } fpu_kernel_cfg.max_size = size; fpu_kernel_cfg.default_size = size; -- Gitee From e4e4c815a36277733134808aee5b9a8a1fcab3fc Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:10 -0700 Subject: [PATCH 160/188] x86/arch_prctl: Add controls for dynamic XSTATE components mainline inclusion from mainline-v5.16-rc1 commit db8268df0983adc2bb1fb48c9e5f7bfbb5f617f3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit db8268df0983 x86/arch_prctl: Add controls for dynamic XSTATE components. -------------------------------- Dynamically enabled XSTATE features are by default disabled for all processes. A process has to request permission to use such a feature. To support this implement a architecture specific prctl() with the options: - ARCH_GET_XCOMP_SUPP Copies the supported feature bitmap into the user space provided u64 storage. The pointer is handed in via arg2 - ARCH_GET_XCOMP_PERM Copies the process wide permitted feature bitmap into the user space provided u64 storage. The pointer is handed in via arg2 - ARCH_REQ_XCOMP_PERM Request permission for a feature set. A feature set can be mapped to a facility, e.g. AMX, and can require one or more XSTATE components to be enabled. The feature argument is the number of the highest XSTATE component which is required for a facility to work. The request argument is not a user supplied bitmap because that makes filtering harder (think seccomp) and even impossible because to support 32bit tasks the argument would have to be a pointer. The permission mechanism works this way: Task asks for permission for a facility and kernel checks whether that's supported. If supported it does: 1) Check whether permission has already been granted 2) Compute the size of the required kernel and user space buffer (sigframe) size. 3) Validate that no task has a sigaltstack installed which is smaller than the resulting sigframe size 4) Add the requested feature bit(s) to the permission bitmap of current->group_leader->fpu and store the sizes in the group leaders fpu struct as well. If that is successful then the feature is still not enabled for any of the tasks. The first usage of a related instruction will result in a #NM trap. The trap handler validates the permission bit of the tasks group leader and if permitted it installs a larger kernel buffer and transfers the permission and size info to the new fpstate container which makes all the FPU functions which require per task information aware of the extended feature set. [ tglx: Adopted to new base code, added missing serialization, massaged namings, comments and changelog ] Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-7-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 4 + arch/x86/include/asm/proto.h | 2 +- arch/x86/include/uapi/asm/prctl.h | 4 + arch/x86/kernel/fpu/xstate.c | 156 ++++++++++++++++++++++++++++++ arch/x86/kernel/fpu/xstate.h | 6 ++ arch/x86/kernel/process.c | 9 +- 6 files changed, 178 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 058486b8ba6e..ba11c13acefb 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -124,4 +124,8 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu) return gfpu->fpstate->is_confidential; } +/* prctl */ +struct task_struct; +extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2); + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 8c5d1910a848..feed36d44d04 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -40,6 +40,6 @@ void x86_report_nx(void); extern int reboot_force; long do_arch_prctl_common(struct task_struct *task, int option, - unsigned long cpuid_enabled); + unsigned long arg2); #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 5a6aac9fa41f..754a07856817 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -10,6 +10,10 @@ #define ARCH_GET_CPUID 0x1011 #define ARCH_SET_CPUID 0x1012 +#define ARCH_GET_XCOMP_SUPP 0x1021 +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 + #define ARCH_MAP_VDSO_X32 0x2001 #define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_64 0x2003 diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 149e09a3923d..5c6dace080a8 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,8 @@ #include #include +#include +#include #include "internal.h" #include "legacy.h" @@ -1298,6 +1301,159 @@ void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature) EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); #endif +#ifdef CONFIG_X86_64 +static int validate_sigaltstack(unsigned int usize) +{ + struct task_struct *thread, *leader = current->group_leader; + unsigned long framesize = get_sigframe_size(); + + lockdep_assert_held(¤t->sighand->siglock); + + /* get_sigframe_size() is based on fpu_user_cfg.max_size */ + framesize -= fpu_user_cfg.max_size; + framesize += usize; + for_each_thread(leader, thread) { + if (thread->sas_ss_size && thread->sas_ss_size < framesize) + return -ENOSPC; + } + return 0; +} + +static int __xstate_request_perm(u64 permitted, u64 requested) +{ + /* + * This deliberately does not exclude !XSAVES as we still might + * decide to optionally context switch XCR0 or talk the silicon + * vendors into extending XFD for the pre AMX states. + */ + bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + struct fpu *fpu = ¤t->group_leader->thread.fpu; + unsigned int ksize, usize; + u64 mask; + int ret; + + /* Check whether fully enabled */ + if ((permitted & requested) == requested) + return 0; + + /* Calculate the resulting kernel state size */ + mask = permitted | requested; + ksize = xstate_calculate_size(mask, compacted); + + /* Calculate the resulting user state size */ + mask &= XFEATURE_MASK_USER_SUPPORTED; + usize = xstate_calculate_size(mask, false); + + ret = validate_sigaltstack(usize); + if (ret) + return ret; + + /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ + WRITE_ONCE(fpu->perm.__state_perm, requested); + /* Protected by sighand lock */ + fpu->perm.__state_size = ksize; + fpu->perm.__user_state_size = usize; + return ret; +} + +/* + * Permissions array to map facilities with more than one component + */ +static const u64 xstate_prctl_req[XFEATURE_MAX] = { + /* [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE, */ +}; + +static int xstate_request_perm(unsigned long idx) +{ + u64 permitted, requested; + int ret; + + if (idx >= XFEATURE_MAX) + return -EINVAL; + + /* + * Look up the facility mask which can require more than + * one xstate component. + */ + idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req)); + requested = xstate_prctl_req[idx]; + if (!requested) + return -EOPNOTSUPP; + + if ((fpu_user_cfg.max_features & requested) != requested) + return -EOPNOTSUPP; + + /* Lockless quick check */ + permitted = xstate_get_host_group_perm(); + if ((permitted & requested) == requested) + return 0; + + /* Protect against concurrent modifications */ + spin_lock_irq(¤t->sighand->siglock); + permitted = xstate_get_host_group_perm(); + ret = __xstate_request_perm(permitted, requested); + spin_unlock_irq(¤t->sighand->siglock); + return ret; +} +#else /* CONFIG_X86_64 */ +static inline int xstate_request_perm(unsigned long idx) +{ + return -EPERM; +} +#endif /* !CONFIG_X86_64 */ + +/** + * fpu_xstate_prctl - xstate permission operations + * @tsk: Redundant pointer to current + * @option: A subfunction of arch_prctl() + * @arg2: option argument + * Return: 0 if successful; otherwise, an error code + * + * Option arguments: + * + * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info + * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info + * ARCH_REQ_XCOMP_PERM: Facility number requested + * + * For facilities which require more than one XSTATE component, the request + * must be the highest state component number related to that facility, + * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and + * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18). + */ +long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2) +{ + u64 __user *uptr = (u64 __user *)arg2; + u64 permitted, supported; + unsigned long idx = arg2; + + if (tsk != current) + return -EPERM; + + switch (option) { + case ARCH_GET_XCOMP_SUPP: + supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features; + return put_user(supported, uptr); + + case ARCH_GET_XCOMP_PERM: + /* + * Lockless snapshot as it can also change right after the + * dropping the lock. + */ + permitted = xstate_get_host_group_perm(); + permitted &= XFEATURE_MASK_USER_SUPPORTED; + return put_user(permitted, uptr); + + case ARCH_REQ_XCOMP_PERM: + if (!IS_ENABLED(CONFIG_X86_64)) + return -EOPNOTSUPP; + + return xstate_request_perm(idx); + + default: + return -EINVAL; + } +} + #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index a1aa0bad2c9c..4ce1dc030f38 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -15,6 +15,12 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; } +static inline u64 xstate_get_host_group_perm(void) +{ + /* Pairs with WRITE_ONCE() in xstate_request_perm() */ + return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm); +} + enum xstate_copy_mode { XSTATE_COPY_FP, XSTATE_COPY_FX, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f3b2539786a3..da4f44e00fe4 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -964,13 +965,17 @@ unsigned long get_wchan(struct task_struct *p) } long do_arch_prctl_common(struct task_struct *task, int option, - unsigned long cpuid_enabled) + unsigned long arg2) { switch (option) { case ARCH_GET_CPUID: return get_cpuid_mode(); case ARCH_SET_CPUID: - return set_cpuid_mode(task, cpuid_enabled); + return set_cpuid_mode(task, arg2); + case ARCH_GET_XCOMP_SUPP: + case ARCH_GET_XCOMP_PERM: + case ARCH_REQ_XCOMP_PERM: + return fpu_xstate_prctl(task, option, arg2); } return -EINVAL; -- Gitee From ac0b40782389d230f9bef510ca2514fc3be23311 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:55:11 -0700 Subject: [PATCH 161/188] x86/fpu: Add basic helpers for dynamically enabled features mainline inclusion from mainline-v5.16-rc1 commit 23686ef25d4ae81bc12fe3994d1905191fcf71f8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 23686ef25d4a x86/fpu: Add basic helpers for dynamically enabled features. -------------------------------- To allow building up the infrastructure required to support dynamically enabled FPU features, add: - XFEATURES_MASK_DYNAMIC This constant will hold xfeatures which can be dynamically enabled. - fpu_state_size_dynamic() A static branch for 64-bit and a simple 'return false' for 32-bit. This helper allows to add dynamic-feature-specific changes to common code which is shared between 32-bit and 64-bit without #ifdeffery. Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-8-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 21 +++++++++++++++++++++ arch/x86/kernel/fpu/core.c | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 43ae89d4bcd2..cf285464eabe 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -43,6 +43,9 @@ #define XFEATURE_MASK_USER_RESTORE \ (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) +/* Features which are dynamically enabled for a process on request */ +#define XFEATURE_MASK_USER_DYNAMIC 0ULL + /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID) @@ -96,4 +99,22 @@ int xfeature_size(int xfeature_nr); void xsaves(struct xregs_state *xsave, u64 mask); void xrstors(struct xregs_state *xsave, u64 mask); +#ifdef CONFIG_X86_64 +DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); +#endif + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); + +static __always_inline __pure bool fpu_state_size_dynamic(void) +{ + return static_branch_unlikely(&__fpu_state_size_dynamic); +} +#else +static __always_inline __pure bool fpu_state_size_dynamic(void) +{ + return false; +} +#endif + #endif diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b05f6a3b2057..4018083c5b36 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -25,6 +25,10 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_X86_64 +DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); +#endif + /* The FPU state configuration data for kernel and user space */ struct fpu_state_config fpu_kernel_cfg __ro_after_init; struct fpu_state_config fpu_user_cfg __ro_after_init; -- Gitee From 46d5060353d52f2282e05396a8946f062010eb35 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:55:12 -0700 Subject: [PATCH 162/188] x86/signal: Use fpu::__state_user_size for sigalt stack validation mainline inclusion from mainline-v5.16-rc1 commit 4b7ca609a33dd8696bcbd2f1ad949e26a591592f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 4b7ca609a33d x86/signal: Use fpu::__state_user_size for sigalt stack validation. -------------------------------- Use the current->group_leader->fpu to check for pending permissions to use extended features and validate against the resulting user space size which is stored in the group leaders fpu struct as well. This prevents a task from installing a too small sized sigaltstack after permissions to use dynamically enabled features have been granted, but the task has not (yet) used a related instruction. Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-9-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/signal.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index a64536b8bffb..bc8a42d0adb8 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -719,12 +720,15 @@ SYSCALL_DEFINE0(rt_sigreturn) /* max_frame_size tells userspace the worst case signal stack size. */ static unsigned long __ro_after_init max_frame_size; +static unsigned int __ro_after_init fpu_default_state_size; void __init init_sigframe_size(void) { + fpu_default_state_size = fpu__get_fpstate_size(); + max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING; - max_frame_size += fpu__get_fpstate_size() + MAX_XSAVE_PADDING; + max_frame_size += fpu_default_state_size + MAX_XSAVE_PADDING; /* Userspace expects an aligned size. */ max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT); @@ -927,15 +931,38 @@ __setup("strict_sas_size", strict_sas_size); * sigaltstack they just continued to work. While always checking against * the real size would be correct, this might be considered a regression. * - * Therefore avoid the sanity check, unless enforced by kernel config or - * command line option. + * Therefore avoid the sanity check, unless enforced by kernel + * configuration or command line option. + * + * When dynamic FPU features are supported, the check is also enforced when + * the task has permissions to use dynamic features. Tasks which have no + * permission are checked against the size of the non-dynamic feature set + * if strict checking is enabled. This avoids forcing all tasks on the + * system to allocate large sigaltstacks even if they are never going + * to use a dynamic feature. As this is serialized via sighand::siglock + * any permission request for a dynamic feature either happened already + * or will see the newly install sigaltstack size in the permission checks. */ bool sigaltstack_size_valid(size_t ss_size) { + unsigned long fsize = max_frame_size - fpu_default_state_size; + u64 mask; + lockdep_assert_held(¤t->sighand->siglock); + if (!fpu_state_size_dynamic() && !strict_sigaltstack_size) + return true; + + fsize += current->group_leader->thread.fpu.perm.__user_state_size; + if (likely(ss_size > fsize)) + return true; + if (strict_sigaltstack_size) - return ss_size > get_sigframe_size(); + return ss_size > fsize; + + mask = current->group_leader->thread.fpu.perm.__state_perm; + if (mask & XFEATURE_MASK_USER_DYNAMIC) + return ss_size > fsize; return true; } -- Gitee From 91dd9873317a12104f711e59afda048265581b79 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:13 -0700 Subject: [PATCH 163/188] x86/fpu/signal: Prepare for variable sigframe length mainline inclusion from mainline-v5.16-rc1 commit 53599b4d54b9b8dda1d537a558946869d2acbddc category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 53599b4d54b9 x86/fpu/signal: Prepare for variable sigframe length. -------------------------------- The software reserved portion of the fxsave frame in the signal frame is copied from structures which have been set up at boot time. With dynamically enabled features the content of these structures is no longer correct because the xfeatures and size can be different per task. Calculate the software reserved portion at runtime and fill in the xfeatures and size values from the tasks active fpstate. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-10-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/internal.h | 3 -- arch/x86/kernel/fpu/signal.c | 62 ++++++++++++++-------------------- arch/x86/kernel/fpu/xstate.c | 1 - 3 files changed, 26 insertions(+), 40 deletions(-) diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h index e1d8a352f12d..dbdb31f55fc7 100644 --- a/arch/x86/kernel/fpu/internal.h +++ b/arch/x86/kernel/fpu/internal.h @@ -21,9 +21,6 @@ static __always_inline __pure bool use_fxsr(void) # define WARN_ON_FPU(x) ({ (void)(x); 0; }) #endif -/* Init functions */ -extern void fpu__init_prepare_fx_sw_frame(void); - /* Used in init.c */ extern void fpstate_init_user(struct fpstate *fpstate); extern void fpstate_reset(struct fpu *fpu); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 3e42e6e8b56c..3b7f7d07c0b5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -20,9 +20,6 @@ #include "legacy.h" #include "xstate.h" -static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init; -static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init; - /* * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. @@ -98,23 +95,42 @@ static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) return true; } +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed to by the fpstate pointer in the sigcontext. + * This is saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +static inline void save_sw_bytes(struct _fpx_sw_bytes *sw_bytes, bool ia32_frame, + struct fpstate *fpstate) +{ + sw_bytes->magic1 = FP_XSTATE_MAGIC1; + sw_bytes->extended_size = fpstate->user_size + FP_XSTATE_MAGIC2_SIZE; + sw_bytes->xfeatures = fpstate->user_xfeatures; + sw_bytes->xstate_size = fpstate->user_size; + + if (ia32_frame) + sw_bytes->extended_size += sizeof(struct fregs_state); +} + static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, - unsigned int usize) + struct fpstate *fpstate) { struct xregs_state __user *x = buf; - struct _fpx_sw_bytes *sw_bytes; + struct _fpx_sw_bytes sw_bytes; u32 xfeatures; int err; /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ - sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; - err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); + save_sw_bytes(&sw_bytes, ia32_frame, fpstate); + err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes)); if (!use_xsave()) return !err; err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *)(buf + usize)); + (__u32 __user *)(buf + fpstate->user_size)); /* * Read the xfeatures which we copied (directly from the cpu or @@ -173,7 +189,7 @@ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { struct task_struct *tsk = current; struct fpstate *fpstate = tsk->thread.fpu.fpstate; - int ia32_fxstate = (buf != buf_fx); + bool ia32_fxstate = (buf != buf_fx); int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || @@ -226,8 +242,7 @@ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf)) return false; - if (use_fxsr() && - !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate->user_size)) + if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate)) return false; return true; @@ -523,28 +538,3 @@ unsigned long __init fpu__get_fpstate_size(void) return ret; } -/* - * Prepare the SW reserved portion of the fxsave memory layout, indicating - * the presence of the extended state information in the memory layout - * pointed by the fpstate pointer in the sigcontext. - * This will be saved when ever the FP and extended state context is - * saved on the user stack during the signal handler delivery to the user. - */ -void __init fpu__init_prepare_fx_sw_frame(void) -{ - int size = fpu_user_cfg.default_size + FP_XSTATE_MAGIC2_SIZE; - - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = size; - fx_sw_reserved.xfeatures = fpu_user_cfg.default_features; - fx_sw_reserved.xstate_size = fpu_user_cfg.default_size; - - if (IS_ENABLED(CONFIG_IA32_EMULATION) || - IS_ENABLED(CONFIG_X86_32)) { - int fsave_header_size = sizeof(struct fregs_state); - - fx_sw_reserved_ia32 = fx_sw_reserved; - fx_sw_reserved_ia32.extended_size = size + fsave_header_size; - } -} - diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 5c6dace080a8..b3e2e0a19a69 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -830,7 +830,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) update_regset_xstate_info(fpu_user_cfg.max_size, fpu_user_cfg.max_features); - fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp_offsets(); setup_supervisor_only_offsets(); -- Gitee From e8da61bf4b35610269e64bb74e8037642bdaf0d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:55:14 -0700 Subject: [PATCH 164/188] x86/fpu: Prepare fpu_clone() for dynamically enabled features mainline inclusion from mainline-v5.16-rc1 commit 9e798e9aa14c45fb94e47b30bf6347b369ce9df7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 9e798e9aa14c x86/fpu: Prepare fpu_clone() for dynamically enabled features. -------------------------------- The default portion of the parent's FPU state is saved in a child task. With dynamic features enabled, the non-default portion is not saved in a child's fpstate because these register states are defined to be caller-saved. The new task's fpstate is therefore the default buffer. Fork inherits the permission of the parent. Also, do not use memcpy() when TIF_NEED_FPU_LOAD is set because it is invalid when the parent has dynamic features. Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-11-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/sched.h | 2 +- arch/x86/kernel/fpu/core.c | 35 +++++++++++++++++++++++--------- arch/x86/kernel/process.c | 2 +- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index cdb78d590c86..99a8820e8cc4 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -11,7 +11,7 @@ extern void save_fpregs_to_fpstate(struct fpu *fpu); extern void fpu__drop(struct fpu *fpu); -extern int fpu_clone(struct task_struct *dst); +extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags); extern void fpu_flush_thread(void); /* diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 4018083c5b36..1ff6b83094a1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -423,8 +423,20 @@ void fpstate_reset(struct fpu *fpu) fpu->perm.__user_state_size = fpu_user_cfg.default_size; } +static inline void fpu_inherit_perms(struct fpu *dst_fpu) +{ + if (fpu_state_size_dynamic()) { + struct fpu *src_fpu = ¤t->group_leader->thread.fpu; + + spin_lock_irq(¤t->sighand->siglock); + /* Fork also inherits the permissions of the parent */ + dst_fpu->perm = src_fpu->perm; + spin_unlock_irq(¤t->sighand->siglock); + } +} + /* Clone current's FPU state on fork */ -int fpu_clone(struct task_struct *dst) +int fpu_clone(struct task_struct *dst, unsigned long clone_flags) { struct fpu *src_fpu = ¤t->thread.fpu; struct fpu *dst_fpu = &dst->thread.fpu; @@ -455,17 +467,20 @@ int fpu_clone(struct task_struct *dst) } /* - * If the FPU registers are not owned by current just memcpy() the - * state. Otherwise save the FPU registers directly into the - * child's FPU context, without any memory-to-memory copying. + * Save the default portion of the current FPU state into the + * clone. Assume all dynamic features to be defined as caller- + * saved, which enables skipping both the expansion of fpstate + * and the copying of any dynamic state. + * + * Do not use memcpy() when TIF_NEED_FPU_LOAD is set because + * copying is not valid when current uses non-default states. */ fpregs_lock(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) { - memcpy(&dst_fpu->fpstate->regs, &src_fpu->fpstate->regs, - dst_fpu->fpstate->size); - } else { - save_fpregs_to_fpstate(dst_fpu); - } + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + fpregs_restore_userregs(); + save_fpregs_to_fpstate(dst_fpu); + if (!(clone_flags & CLONE_THREAD)) + fpu_inherit_perms(dst_fpu); fpregs_unlock(); trace_x86_fpu_copy_src(src_fpu); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index da4f44e00fe4..c517dd76cf84 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -164,7 +164,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, frame->flags = X86_EFLAGS_FIXED; #endif - fpu_clone(p); + fpu_clone(p, clone_flags); /* Kernel thread ? */ if (unlikely(p->flags & PF_KTHREAD)) { -- Gitee From 78df6be41cacff77d53a234c8260e50e5aadabc7 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:15 -0700 Subject: [PATCH 165/188] x86/fpu: Reset permission and fpstate on exec() mainline inclusion from mainline-v5.16-rc1 commit e61d6310a0f80cb986fd2076d432760b3619fb6d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit e61d6310a0f8 x86/fpu: Reset permission and fpstate on exec(). -------------------------------- On exec(), extended register states saved in the buffer is cleared. With dynamic features, each task carries variables besides the register states. The struct fpu has permission information and struct fpstate contains buffer size and feature masks. They are all dynamically updated with dynamic features. Reset the current task's entire FPU data before an exec() so that the new task starts with default permission and fpstate. Rename the register state reset function because the old naming confuses as it does not reset struct fpstate. Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-12-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1ff6b83094a1..3349068cef7d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -544,7 +544,7 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask) /* * Reset current->fpu memory state to the init values. */ -static void fpu_reset_fpstate(void) +static void fpu_reset_fpregs(void) { struct fpu *fpu = ¤t->thread.fpu; @@ -579,7 +579,7 @@ void fpu__clear_user_states(struct fpu *fpu) fpregs_lock(); if (!cpu_feature_enabled(X86_FEATURE_FPU)) { - fpu_reset_fpstate(); + fpu_reset_fpregs(); fpregs_unlock(); return; } @@ -609,7 +609,8 @@ void fpu__clear_user_states(struct fpu *fpu) void fpu_flush_thread(void) { - fpu_reset_fpstate(); + fpstate_reset(¤t->thread.fpu); + fpu_reset_fpregs(); } /* * Load FPU context before returning to userspace. -- Gitee From 9736b1205da97d8f0fe1c54e454b5972648f9ff2 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:16 -0700 Subject: [PATCH 166/188] x86/cpufeatures: Add eXtended Feature Disabling (XFD) feature bit mainline inclusion from mainline-v5.16-rc1 commit c351101678ce54492b6e09810ec02efc0df036a9 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit c351101678ce x86/cpufeatures: Add eXtended Feature Disabling (XFD) feature bit. -------------------------------- Intel's eXtended Feature Disable (XFD) feature is an extension of the XSAVE architecture. XFD allows the kernel to enable a feature state in XCR0 and to receive a #NM trap when a task uses instructions accessing that state. This is going to be used to postpone the allocation of a larger XSTATE buffer for a task to the point where it is actually using a related instruction after the permission to use that facility has been granted. XFD is not used by the kernel, but only applied to userspace. This is a matter of policy as the kernel knows how a fpstate is reallocated and the XFD state. The compacted XSAVE format is adjustable for dynamic features. Make XFD depend on XSAVES. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-13-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/cpuid-deps.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6e149ab5db2d..b73b7e7c7ae3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -276,6 +276,7 @@ #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index defda61f372d..3d8b81215b14 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -75,6 +75,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_SGX_LC, X86_FEATURE_SGX }, { X86_FEATURE_SGX1, X86_FEATURE_SGX }, { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, + { X86_FEATURE_XFD, X86_FEATURE_XSAVES }, {} }; -- Gitee From 9b2245e9ece3ed3c65b30a3855419cbfe382145e Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:17 -0700 Subject: [PATCH 167/188] x86/msr-index: Add MSRs for XFD mainline inclusion from mainline-v5.16-rc1 commit dae1bd58389615d401a84aedc38fa075ef8f7de6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit dae1bd583896 x86/msr-index: Add MSRs for XFD. -------------------------------- XFD introduces two MSRs: - IA32_XFD to enable/disable a feature controlled by XFD - IA32_XFD_ERR to expose to the #NM trap handler which feature was tried to be used for the first time. Both use the same xstate-component bitmap format, used by XCR0. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-14-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/msr-index.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2f0ca77d24bc..b93228bb1ca4 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -644,6 +644,8 @@ #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_APICBASE 0x0000001b -- Gitee From aef806ba3490ee0db078c18d0f604eb6cd279164 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:18 -0700 Subject: [PATCH 168/188] x86/fpu: Add XFD state to fpstate mainline inclusion from mainline-v5.16-rc1 commit 8bf26758ca9659866b844dd51037314b4c0fa6bd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 8bf26758ca96 x86/fpu: Add XFD state to fpstate. -------------------------------- Add storage for XFD register state to struct fpstate. This will be used to store the XFD MSR state. This will be used for switching the XFD MSR when FPU content is restored. Add a per-CPU variable to cache the current MSR value so the MSR has only to be written when the values are different. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-15-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 3 +++ arch/x86/kernel/fpu/core.c | 2 ++ arch/x86/kernel/fpu/xstate.h | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 595122fcaf51..b1897638d68d 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -322,6 +322,9 @@ struct fpstate { /* @user_xfeatures: xfeatures valid in UABI buffers */ u64 user_xfeatures; + /* @xfd: xfeatures disabled to trap userspace use. */ + u64 xfd; + /* @is_valloc: Indicator for dynamically allocated state */ unsigned int is_valloc : 1; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3349068cef7d..3b72cddf990d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -27,6 +27,7 @@ #ifdef CONFIG_X86_64 DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); +DEFINE_PER_CPU(u64, xfd_state); #endif /* The FPU state configuration data for kernel and user space */ @@ -409,6 +410,7 @@ static void __fpstate_reset(struct fpstate *fpstate) fpstate->user_size = fpu_user_cfg.default_size; fpstate->xfeatures = fpu_kernel_cfg.default_features; fpstate->user_xfeatures = fpu_user_cfg.default_features; + fpstate->xfd = init_fpstate.xfd; } void fpstate_reset(struct fpu *fpu) diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 4ce1dc030f38..32a4dee4de3b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -5,6 +5,10 @@ #include #include +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU(u64, xfd_state); +#endif + static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) { /* -- Gitee From c7ec82af7c4dbd377f7b24a0788c69772a0e94fd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:55:19 -0700 Subject: [PATCH 169/188] x86/fpu: Add sanity checks for XFD mainline inclusion from mainline-v5.16-rc1 commit 5529acf47ec31ece0815f69d43f5e6a1e485a0f3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 5529acf47ec3 x86/fpu: Add sanity checks for XFD. -------------------------------- Add debug functionality to ensure that the XFD MSR is up to date for XSAVE* and XRSTOR* operations. [ tglx: Improve comment. ] Signed-off-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211021225527.10184-16-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 9 +++--- arch/x86/kernel/fpu/signal.c | 6 ++-- arch/x86/kernel/fpu/xstate.c | 58 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/fpu/xstate.h | 34 ++++++++++++++++++--- 4 files changed, 95 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3b72cddf990d..b5f5b08b84d7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -166,7 +166,7 @@ void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask) */ mask = fpu_kernel_cfg.max_features & mask; - os_xrstor(&fpstate->regs.xsave, mask); + os_xrstor(fpstate, mask); } else { if (use_fxsr()) fxrstor(&fpstate->regs.fxsave); @@ -534,7 +534,7 @@ void fpu__drop(struct fpu *fpu) static inline void restore_fpregs_from_init_fpstate(u64 features_mask) { if (use_xsave()) - os_xrstor(&init_fpstate.regs.xsave, features_mask); + os_xrstor(&init_fpstate, features_mask); else if (use_fxsr()) fxrstor(&init_fpstate.regs.fxsave); else @@ -591,9 +591,8 @@ void fpu__clear_user_states(struct fpu *fpu) * corresponding registers. */ if (xfeatures_mask_supervisor() && - !fpregs_state_valid(fpu, smp_processor_id())) { - os_xrstor(&fpu->fpstate->regs.xsave, xfeatures_mask_supervisor()); - } + !fpregs_state_valid(fpu, smp_processor_id())) + os_xrstor_supervisor(fpu->fpstate); /* Reset user states in registers. */ restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 3b7f7d07c0b5..16fdecd02341 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -261,7 +261,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, ret = fxrstor_from_user_sigframe(buf); if (!ret && unlikely(init_bv)) - os_xrstor(&init_fpstate.regs.xsave, init_bv); + os_xrstor(&init_fpstate, init_bv); return ret; } else if (use_fxsr()) { return fxrstor_from_user_sigframe(buf); @@ -322,7 +322,7 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, * been restored from a user buffer directly. */ if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor()) - os_xrstor(&fpu->fpstate->regs.xsave, xfeatures_mask_supervisor()); + os_xrstor_supervisor(fpu->fpstate); fpregs_mark_activate(); fpregs_unlock(); @@ -432,7 +432,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, u64 mask = user_xfeatures | xfeatures_mask_supervisor(); fpregs->xsave.header.xfeatures &= mask; - success = !os_xrstor_safe(&fpregs->xsave, + success = !os_xrstor_safe(fpu->fpstate, fpu_kernel_cfg.max_features); } else { success = !fxrstor_safe(&fpregs->fxsave); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b3e2e0a19a69..096517ef1aa6 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1301,6 +1301,64 @@ EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); #endif #ifdef CONFIG_X86_64 + +#ifdef CONFIG_X86_DEBUG_FPU +/* + * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask + * can safely operate on the @fpstate buffer. + */ +static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor) +{ + u64 xfd = __this_cpu_read(xfd_state); + + if (fpstate->xfd == xfd) + return true; + + /* + * The XFD MSR does not match fpstate->xfd. That's invalid when + * the passed in fpstate is current's fpstate. + */ + if (fpstate->xfd == current->thread.fpu.fpstate->xfd) + return false; + + /* + * XRSTOR(S) from init_fpstate are always correct as it will just + * bring all components into init state and not read from the + * buffer. XSAVE(S) raises #PF after init. + */ + if (fpstate == &init_fpstate) + return rstor; + + /* + * XSAVE(S): clone(), fpu_swap_kvm_fpu() + * XRSTORS(S): fpu_swap_kvm_fpu() + */ + + /* + * No XSAVE/XRSTOR instructions (except XSAVE itself) touch + * the buffer area for XFD-disabled state components. + */ + mask &= ~xfd; + + /* + * Remove features which are valid in fpstate. They + * have space allocated in fpstate. + */ + mask &= ~fpstate->xfeatures; + + /* + * Any remaining state components in 'mask' might be written + * by XSAVE/XRSTOR. Fail validation it found. + */ + return !mask; +} + +void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) +{ + WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor)); +} +#endif /* CONFIG_X86_DEBUG_FPU */ + static int validate_sigaltstack(unsigned int usize) { struct task_struct *thread, *leader = current->group_leader; diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 32a4dee4de3b..29024244965b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -130,6 +130,12 @@ static inline u64 xfeatures_mask_independent(void) : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU) +extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor); +#else +static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { } +#endif + /* * Save processor xstate to xsave area. * @@ -144,6 +150,7 @@ static inline void os_xsave(struct fpstate *fpstate) int err; WARN_ON_FPU(!alternatives_patched); + xfd_validate_state(fpstate, mask, false); XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err); @@ -156,12 +163,23 @@ static inline void os_xsave(struct fpstate *fpstate) * * Uses XRSTORS when XSAVES is used, XRSTOR otherwise. */ -static inline void os_xrstor(struct xregs_state *xstate, u64 mask) +static inline void os_xrstor(struct fpstate *fpstate, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + + xfd_validate_state(fpstate, mask, true); + XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask); +} + +/* Restore of supervisor state. Does not require XFD */ +static inline void os_xrstor_supervisor(struct fpstate *fpstate) { + u64 mask = xfeatures_mask_supervisor(); u32 lmask = mask; u32 hmask = mask >> 32; - XSTATE_XRESTORE(xstate, lmask, hmask); + XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask); } /* @@ -184,11 +202,14 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) * internally, e.g. PKRU. That's user space ABI and also required * to allow the signal handler to modify PKRU. */ - u64 mask = current->thread.fpu.fpstate->user_xfeatures; + struct fpstate *fpstate = current->thread.fpu.fpstate; + u64 mask = fpstate->user_xfeatures; u32 lmask = mask; u32 hmask = mask >> 32; int err; + xfd_validate_state(fpstate, mask, false); + stac(); XSTATE_OP(XSAVE, buf, lmask, hmask, err); clac(); @@ -206,6 +227,8 @@ static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 u32 hmask = mask >> 32; int err; + xfd_validate_state(current->thread.fpu.fpstate, mask, true); + stac(); XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); clac(); @@ -217,12 +240,15 @@ static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 * Restore xstate from kernel space xsave area, return an error code instead of * an exception. */ -static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) +static inline int os_xrstor_safe(struct fpstate *fpstate, u64 mask) { + struct xregs_state *xstate = &fpstate->regs.xsave; u32 lmask = mask; u32 hmask = mask >> 32; int err; + /* Must enforce XFD update here */ + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else -- Gitee From 3cad1819a62635afeea42f5db67816f4342366bf Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:20 -0700 Subject: [PATCH 170/188] x86/fpu: Update XFD state where required mainline inclusion from mainline-v5.16-rc1 commit 672365477ae8afca5a1cca98c1deb733235e4525 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 672365477ae8 x86/fpu: Update XFD state where required. -------------------------------- The IA32_XFD_MSR allows to arm #NM traps for XSTATE components which are enabled in XCR0. The register has to be restored before the tasks XSTATE is restored. The life time rules are the same as for FPU state. XFD is updated on return to userspace only when the FPU state of the task is not up to date in the registers. It's updated before the XRSTORS so that eventually enabled dynamic features are restored as well and not brought into init state. Also in signal handling for restoring FPU state from user space the correctness of the XFD state has to be ensured. Add it to CPU initialization and resume as well. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211021225527.10184-17-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/context.h | 2 ++ arch/x86/kernel/fpu/core.c | 28 +++++++++++++++++++++++++++- arch/x86/kernel/fpu/signal.c | 2 ++ arch/x86/kernel/fpu/xstate.c | 12 ++++++++++++ arch/x86/kernel/fpu/xstate.h | 19 ++++++++++++++++++- 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index a06ebf315d83..958accf2ccf0 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -69,6 +69,8 @@ static inline void fpregs_restore_userregs(void) * correct because it was either set in switch_to() or in * flush_thread(). So it is excluded because it might be * not up to date in current->thread.fpu.xsave state. + * + * XFD state is handled in restore_fpregs_from_fpstate(). */ restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b5f5b08b84d7..12ca174891dc 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -155,6 +155,23 @@ void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask) } if (use_xsave()) { + /* + * Dynamically enabled features are enabled in XCR0, but + * usage requires also that the corresponding bits in XFD + * are cleared. If the bits are set then using a related + * instruction will raise #NM. This allows to do the + * allocation of the larger FPU buffer lazy from #NM or if + * the task has no permission to kill it which would happen + * via #UD if the feature is disabled in XCR0. + * + * XFD state is following the same life time rules as + * XSTATE and to restore state correctly XFD has to be + * updated before XRSTORS otherwise the component would + * stay in or go into init state even if the bits are set + * in fpstate::regs::xsave::xfeatures. + */ + xfd_update_state(fpstate); + /* * Restoring state always needs to modify all features * which are in @mask even if the current task cannot use @@ -241,8 +258,17 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) cur_fps = fpu->fpstate; - if (!cur_fps->is_confidential) + if (!cur_fps->is_confidential) { + /* Includes XFD update */ restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE); + } else { + /* + * XSTATE is restored by firmware from encrypted + * memory. Make sure XFD state is correct while + * running with guest fpstate + */ + xfd_update_state(cur_fps); + } fpregs_mark_activate(); fpregs_unlock(); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 16fdecd02341..cc977da6e128 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -282,6 +282,8 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, retry: fpregs_lock(); + /* Ensure that XFD is up to date */ + xfd_update_state(fpu->fpstate); pagefault_disable(); ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures, xrestore, fx_only); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 096517ef1aa6..3a7691325713 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -136,6 +136,15 @@ void fpu__init_cpu_xstate(void) cr4_set_bits(X86_CR4_OSXSAVE); + /* + * Must happen after CR4 setup and before xsetbv() to allow KVM + * lazy passthrough. Write independent of the dynamic state static + * key as that does not work on the boot CPU. This also ensures + * that any stale state is wiped out from XFD. + */ + if (cpu_feature_enabled(X86_FEATURE_XFD)) + wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); + /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user @@ -875,6 +884,9 @@ void fpu__resume_cpu(void) wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } + + if (fpu_state_size_dynamic()) + wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd); } /* diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 29024244965b..e18210dff88c 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -136,6 +136,22 @@ extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor); static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { } #endif +#ifdef CONFIG_X86_64 +static inline void xfd_update_state(struct fpstate *fpstate) +{ + if (fpu_state_size_dynamic()) { + u64 xfd = fpstate->xfd; + + if (__this_cpu_read(xfd_state) != xfd) { + wrmsrl(MSR_IA32_XFD, xfd); + __this_cpu_write(xfd_state, xfd); + } + } +} +#else +static inline void xfd_update_state(struct fpstate *fpstate) { } +#endif + /* * Save processor xstate to xsave area. * @@ -247,7 +263,8 @@ static inline int os_xrstor_safe(struct fpstate *fpstate, u64 mask) u32 hmask = mask >> 32; int err; - /* Must enforce XFD update here */ + /* Ensure that XFD is up to date */ + xfd_update_state(fpstate); if (cpu_feature_enabled(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); -- Gitee From b2cd63e951c14036c3e9417d5c11150021cf9508 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:21 -0700 Subject: [PATCH 171/188] x86/fpu/xstate: Add XFD #NM handler mainline inclusion from mainline-v5.16-rc1 commit 783e87b404956f8958657aed8a6a72aa98d5b7e1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 783e87b40495 x86/fpu/xstate: Add XFD #NM handler. -------------------------------- If the XFD MSR has feature bits set then #NM will be raised when user space attempts to use an instruction related to one of these features. When the task has no permissions to use that feature, raise SIGILL, which is the same behavior as #UD. If the task has permissions, calculate the new buffer size for the extended feature set and allocate a larger fpstate. In the unlikely case that vzalloc() fails, SIGSEGV is raised. The allocation function will be added in the next step. Provide a stub which fails for now. [ tglx: Updated serialization ] Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211021225527.10184-18-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 2 ++ arch/x86/kernel/fpu/xstate.c | 47 +++++++++++++++++++++++++++++++ arch/x86/kernel/traps.c | 38 +++++++++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index cf285464eabe..b7b145cad019 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -99,6 +99,8 @@ int xfeature_size(int xfeature_nr); void xsaves(struct xregs_state *xsave, u64 mask); void xrstors(struct xregs_state *xsave, u64 mask); +int xfd_enable_feature(u64 xfd_err); + #ifdef CONFIG_X86_64 DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); #endif diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3a7691325713..2974de731bbf 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1464,6 +1464,53 @@ static int xstate_request_perm(unsigned long idx) spin_unlock_irq(¤t->sighand->siglock); return ret; } + +/* Place holder for now */ +static int fpstate_realloc(u64 xfeatures, unsigned int ksize, + unsigned int usize) +{ + return -ENOMEM; +} + +int xfd_enable_feature(u64 xfd_err) +{ + u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; + unsigned int ksize, usize; + struct fpu *fpu; + + if (!xfd_event) { + pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); + return 0; + } + + /* Protect against concurrent modifications */ + spin_lock_irq(¤t->sighand->siglock); + + /* If not permitted let it die */ + if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) { + spin_unlock_irq(¤t->sighand->siglock); + return -EPERM; + } + + fpu = ¤t->group_leader->thread.fpu; + ksize = fpu->perm.__state_size; + usize = fpu->perm.__user_state_size; + /* + * The feature is permitted. State size is sufficient. Dropping + * the lock is safe here even if more features are added from + * another task, the retrieved buffer sizes are valid for the + * currently requested feature(s). + */ + spin_unlock_irq(¤t->sighand->siglock); + + /* + * Try to allocate a new fpstate. If that fails there is no way + * out. + */ + if (fpstate_realloc(xfd_event, ksize, usize)) + return -EFAULT; + return 0; +} #else /* CONFIG_X86_64 */ static inline int xstate_request_perm(unsigned long idx) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 96c8351ce678..d9132c996601 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1151,10 +1151,48 @@ DEFINE_IDTENTRY(exc_spurious_interrupt_bug) */ } +static bool handle_xfd_event(struct pt_regs *regs) +{ + u64 xfd_err; + int err; + + if (!IS_ENABLED(CONFIG_X86_64) || !cpu_feature_enabled(X86_FEATURE_XFD)) + return false; + + rdmsrl(MSR_IA32_XFD_ERR, xfd_err); + if (!xfd_err) + return false; + + wrmsrl(MSR_IA32_XFD_ERR, 0); + + /* Die if that happens in kernel space */ + if (WARN_ON(!user_mode(regs))) + return false; + + local_irq_enable(); + + err = xfd_enable_feature(xfd_err); + + switch (err) { + case -EPERM: + force_sig_fault(SIGILL, ILL_ILLOPC, error_get_trap_addr(regs)); + break; + case -EFAULT: + force_sig(SIGSEGV); + break; + } + + local_irq_disable(); + return true; +} + DEFINE_IDTENTRY(exc_device_not_available) { unsigned long cr0 = read_cr0(); + if (handle_xfd_event(regs)) + return; + #ifdef CONFIG_MATH_EMULATION if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) { struct math_emu_info info = { }; -- Gitee From cb0b181ef4954a609419745ca505a22fa5fa926b Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:22 -0700 Subject: [PATCH 172/188] x86/fpu/xstate: Add fpstate_realloc()/free() mainline inclusion from mainline-v5.16-rc1 commit 500afbf645a040a39e1af0dba2fdf6ebf224bd47 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 500afbf645a0 x86/fpu/xstate: Add fpstate_realloc()/free(). -------------------------------- The fpstate embedded in struct fpu is the default state for storing the FPU registers. It's sized so that the default supported features can be stored. For dynamically enabled features the register buffer is too small. The #NM handler detects first use of a feature which is disabled in the XFD MSR. After handling permission checks it recalculates the size for kernel space and user space state and invokes fpstate_realloc() which tries to reallocate fpstate and install it. Provide the allocator function which checks whether the current buffer size is sufficient and if not allocates one. If allocation is successful the new fpstate is initialized with the new features and sizes and the now enabled features is removed from the task's XFD mask. realloc_fpstate() uses vzalloc(). If use of this mechanism grows to re-allocate buffers larger than 64KB, a more sophisticated allocation scheme that includes purpose-built reclaim capability might be justified. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211021225527.10184-19-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 7 +++ arch/x86/kernel/fpu/xstate.c | 97 +++++++++++++++++++++++++++++++--- arch/x86/kernel/process.c | 10 ++++ 3 files changed, 106 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index ba11c13acefb..d6e8933faafe 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -103,6 +103,13 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {} /* State tracking */ DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); +/* Process cleanup */ +#ifdef CONFIG_X86_64 +extern void fpstate_free(struct fpu *fpu); +#else +static inline void fpstate_free(struct fpu *fpu) { } +#endif + /* fpstate-related functions which are exported to KVM */ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 2974de731bbf..8d8c3f9a6171 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,7 @@ #include #include +#include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" @@ -1371,6 +1373,91 @@ void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) } #endif /* CONFIG_X86_DEBUG_FPU */ +void fpstate_free(struct fpu *fpu) +{ + if (fpu->fpstate || fpu->fpstate != &fpu->__fpstate) + vfree(fpu->fpstate); +} + +/** + * fpu_install_fpstate - Update the active fpstate in the FPU + * + * @fpu: A struct fpu * pointer + * @newfps: A struct fpstate * pointer + * + * Returns: A null pointer if the last active fpstate is the embedded + * one or the new fpstate is already installed; + * otherwise, a pointer to the old fpstate which has to + * be freed by the caller. + */ +static struct fpstate *fpu_install_fpstate(struct fpu *fpu, + struct fpstate *newfps) +{ + struct fpstate *oldfps = fpu->fpstate; + + if (fpu->fpstate == newfps) + return NULL; + + fpu->fpstate = newfps; + return oldfps != &fpu->__fpstate ? oldfps : NULL; +} + +/** + * fpstate_realloc - Reallocate struct fpstate for the requested new features + * + * @xfeatures: A bitmap of xstate features which extend the enabled features + * of that task + * @ksize: The required size for the kernel buffer + * @usize: The required size for user space buffers + * + * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer + * terminates quickly, vfree()-induced IPIs may be a concern, but tasks + * with large states are likely to live longer. + * + * Returns: 0 on success, -ENOMEM on allocation error. + */ +static int fpstate_realloc(u64 xfeatures, unsigned int ksize, + unsigned int usize) +{ + struct fpu *fpu = ¤t->thread.fpu; + struct fpstate *curfps, *newfps = NULL; + unsigned int fpsize; + + curfps = fpu->fpstate; + fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64); + + newfps = vzalloc(fpsize); + if (!newfps) + return -ENOMEM; + newfps->size = ksize; + newfps->user_size = usize; + newfps->is_valloc = true; + + fpregs_lock(); + /* + * Ensure that the current state is in the registers before + * swapping fpstate as that might invalidate it due to layout + * changes. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + fpregs_restore_userregs(); + + newfps->xfeatures = curfps->xfeatures | xfeatures; + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + newfps->xfd = curfps->xfd & ~xfeatures; + + curfps = fpu_install_fpstate(fpu, newfps); + + /* Do the final updates within the locked region */ + xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); + xfd_update_state(newfps); + + fpregs_unlock(); + + vfree(curfps); + return 0; +} + static int validate_sigaltstack(unsigned int usize) { struct task_struct *thread, *leader = current->group_leader; @@ -1393,7 +1480,8 @@ static int __xstate_request_perm(u64 permitted, u64 requested) /* * This deliberately does not exclude !XSAVES as we still might * decide to optionally context switch XCR0 or talk the silicon - * vendors into extending XFD for the pre AMX states. + * vendors into extending XFD for the pre AMX states, especially + * AVX512. */ bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); struct fpu *fpu = ¤t->group_leader->thread.fpu; @@ -1465,13 +1553,6 @@ static int xstate_request_perm(unsigned long idx) return ret; } -/* Place holder for now */ -static int fpstate_realloc(u64 xfeatures, unsigned int ksize, - unsigned int usize) -{ - return -ENOMEM; -} - int xfd_enable_feature(u64 xfd_err) { u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c517dd76cf84..8271444b81a2 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -96,9 +97,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) #endif /* Drop the copied pointer to current's fpstate */ dst->thread.fpu.fpstate = NULL; + return 0; } +#ifdef CONFIG_X86_64 +void arch_release_task_struct(struct task_struct *tsk) +{ + if (fpu_state_size_dynamic()) + fpstate_free(&tsk->thread.fpu); +} +#endif + /* * Free thread data structures etc.. */ -- Gitee From 37891574f390b6ec83a38f2ef4e01e4c9f62aebf Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:23 -0700 Subject: [PATCH 173/188] x86/fpu/xstate: Prepare XSAVE feature table for gaps in state component numbers mainline inclusion from mainline-v5.16-rc1 commit 70c3f1671b0cbc386b387f1de33b7837e276a195 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 70c3f1671b0c x86/fpu/xstate: Prepare XSAVE feature table for gaps in state component numbers. -------------------------------- The kernel checks at boot time which features are available by walking a XSAVE feature table which contains the CPUID feature bit numbers which need to be checked whether a feature is available on a CPU or not. So far the feature numbers have been linear, but AMX will create a gap which the current code cannot handle. Make the table entries explicitly indexed and adjust the loop code accordingly to prepare for that. No functional change. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Reviewed-by: Len Brown Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211021225527.10184-20-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 8d8c3f9a6171..bd503a5f0fb5 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -53,18 +53,18 @@ static const char *xfeature_names[] = "unknown xstate feature" , }; -static short xsave_cpuid_features[] __initdata = { - X86_FEATURE_FPU, - X86_FEATURE_XMM, - X86_FEATURE_AVX, - X86_FEATURE_MPX, - X86_FEATURE_MPX, - X86_FEATURE_AVX512F, - X86_FEATURE_AVX512F, - X86_FEATURE_AVX512F, - X86_FEATURE_INTEL_PT, - X86_FEATURE_PKU, - X86_FEATURE_ENQCMD, +static unsigned short xsave_cpuid_features[] __initdata = { + [XFEATURE_FP] = X86_FEATURE_FPU, + [XFEATURE_SSE] = X86_FEATURE_XMM, + [XFEATURE_YMM] = X86_FEATURE_AVX, + [XFEATURE_BNDREGS] = X86_FEATURE_MPX, + [XFEATURE_BNDCSR] = X86_FEATURE_MPX, + [XFEATURE_OPMASK] = X86_FEATURE_AVX512F, + [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, + [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, + [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, + [XFEATURE_PKRU] = X86_FEATURE_PKU, + [XFEATURE_PASID] = X86_FEATURE_ENQCMD, }; static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = @@ -809,7 +809,10 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) * Clear XSAVE features that are disabled in the normal CPUID. */ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { - if (!boot_cpu_has(xsave_cpuid_features[i])) + unsigned short cid = xsave_cpuid_features[i]; + + /* Careful: X86_FEATURE_FPU is 0! */ + if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid)) fpu_kernel_cfg.max_features &= ~BIT_ULL(i); } -- Gitee From af5963c9d4e7bcd5858eeb1e90c643219c903c5f Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:24 -0700 Subject: [PATCH 174/188] x86/fpu/amx: Define AMX state components and have it used for boot-time checks mainline inclusion from mainline-v5.16-rc1 commit eec2113eabd92b7bfbaf1033fa82dc8eb4951203 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit eec2113eabd92 x86/fpu/amx: Define AMX state components and have it used for boot-time checks. -------------------------------- The XSTATE initialization uses check_xstate_against_struct() to sanity check the size of XSTATE-enabled features. AMX is a XSAVE-enabled feature, and its size is not hard-coded but discoverable at run-time via CPUID. The AMX state is composed of state components 17 and 18, which are all user state components. The first component is the XTILECFG state of a 64-byte tile-related control register. The state component 18, called XTILEDATA, contains the actual tile data, and the state size varies on implementations. The architectural maximum, as defined in the CPUID(0x1d, 1): EAX[15:0], is a byte less than 64KB. The first implementation supports 8KB. Check the XTILEDATA state size dynamically. The feature introduces the new tile register, TMM. Define one register struct only and read the number of registers from CPUID. Cross-check the overall size with CPUID again. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211021225527.10184-21-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/fpu/types.h | 32 ++++++++++++ arch/x86/include/asm/fpu/xstate.h | 2 + arch/x86/kernel/fpu/xstate.c | 80 +++++++++++++++++++++++++++++- 4 files changed, 114 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b73b7e7c7ae3..db5d264157c5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -298,6 +298,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index b1897638d68d..3c06c82ab355 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -120,6 +120,9 @@ enum xfeature { XFEATURE_RSRVD_COMP_13, XFEATURE_RSRVD_COMP_14, XFEATURE_LBR, + XFEATURE_RSRVD_COMP_16, + XFEATURE_XTILE_CFG, + XFEATURE_XTILE_DATA, XFEATURE_MAX, }; @@ -136,12 +139,21 @@ enum xfeature { #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) #define XFEATURE_MASK_PASID (1 << XFEATURE_PASID) #define XFEATURE_MASK_LBR (1 << XFEATURE_LBR) +#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG) +#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA) #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ | XFEATURE_MASK_ZMM_Hi256 \ | XFEATURE_MASK_Hi16_ZMM) +#ifdef CONFIG_X86_64 +# define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA \ + | XFEATURE_MASK_XTILE_CFG) +#else +# define XFEATURE_MASK_XTILE (0) +#endif + #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM struct reg_128_bit { @@ -153,6 +165,9 @@ struct reg_256_bit { struct reg_512_bit { u8 regbytes[512/8]; }; +struct reg_1024_byte { + u8 regbytes[1024]; +}; /* * State component 2: @@ -255,6 +270,23 @@ struct arch_lbr_state { u64 ler_to; u64 ler_info; struct lbr_entry entries[]; +}; + +/* + * State component 17: 64-byte tile configuration register. + */ +struct xtile_cfg { + u64 tcfg[8]; +} __packed; + +/* + * State component 18: 1KB tile data register. + * Each register represents 16 64-byte rows of the matrix + * data. But the number of registers depends on the actual + * implementation. + */ +struct xtile_data { + struct reg_1024_byte tmm; } __packed; /* diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index b7b145cad019..10adf1376f0c 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -14,6 +14,8 @@ #define XSTATE_CPUID 0x0000000d +#define TILE_CPUID 0x0000001d + #define FXSAVE_SIZE 512 #define XSAVE_HDR_SIZE 64 diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index bd503a5f0fb5..944718927ed2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -51,6 +51,14 @@ static const char *xfeature_names[] = "Protection Keys User registers", "PASID state", "unknown xstate feature" , + "unknown xstate feature" , + "unknown xstate feature" , + "unknown xstate feature" , + "unknown xstate feature" , + "unknown xstate feature" , + "AMX Tile config" , + "AMX Tile data" , + "unknown xstate feature" , }; static unsigned short xsave_cpuid_features[] __initdata = { @@ -65,6 +73,8 @@ static unsigned short xsave_cpuid_features[] __initdata = { [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, [XFEATURE_PKRU] = X86_FEATURE_PKU, [XFEATURE_PASID] = X86_FEATURE_ENQCMD, + [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, + [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, }; static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = @@ -240,6 +250,8 @@ static void __init print_xstate_features(void) print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); print_xstate_feature(XFEATURE_MASK_PKRU); print_xstate_feature(XFEATURE_MASK_PASID); + print_xstate_feature(XFEATURE_MASK_XTILE_CFG); + print_xstate_feature(XFEATURE_MASK_XTILE_DATA); } /* @@ -523,6 +535,67 @@ static void __init __xstate_dump_leaves(void) } \ } while (0) +/** + * check_xtile_data_against_struct - Check tile data state size. + * + * Calculate the state size by multiplying the single tile size which is + * recorded in a C struct, and the number of tiles that the CPU informs. + * Compare the provided size with the calculation. + * + * @size: The tile data state size + * + * Returns: 0 on success, -EINVAL on mismatch. + */ +static int __init check_xtile_data_against_struct(int size) +{ + u32 max_palid, palid, state_size; + u32 eax, ebx, ecx, edx; + u16 max_tile; + + /* + * Check the maximum palette id: + * eax: the highest numbered palette subleaf. + */ + cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx); + + /* + * Cross-check each tile size and find the maximum number of + * supported tiles. + */ + for (palid = 1, max_tile = 0; palid <= max_palid; palid++) { + u16 tile_size, max; + + /* + * Check the tile size info: + * eax[31:16]: bytes per title + * ebx[31:16]: the max names (or max number of tiles) + */ + cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx); + tile_size = eax >> 16; + max = ebx >> 16; + + if (tile_size != sizeof(struct xtile_data)) { + pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n", + __stringify(XFEATURE_XTILE_DATA), + sizeof(struct xtile_data), tile_size); + __xstate_dump_leaves(); + return -EINVAL; + } + + if (max > max_tile) + max_tile = max; + } + + state_size = sizeof(struct xtile_data) * max_tile; + if (size != state_size) { + pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n", + __stringify(XFEATURE_XTILE_DATA), state_size, size); + __xstate_dump_leaves(); + return -EINVAL; + } + return 0; +} + /* * We have a C struct for each 'xstate'. We need to ensure * that our software representation matches what the CPU @@ -546,6 +619,11 @@ static bool __init check_xstate_against_struct(int nr) XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state); XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state); + XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg); + + /* The tile data size varies between implementations. */ + if (nr == XFEATURE_XTILE_DATA) + check_xtile_data_against_struct(sz); /* * Make *SURE* to add any feature numbers in below if @@ -555,7 +633,7 @@ static bool __init check_xstate_against_struct(int nr) if ((nr < XFEATURE_YMM) || (nr >= XFEATURE_MAX) || (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) || - ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) { + ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) { WARN_ONCE(1, "no structure for xstate: %d\n", nr); XSTATE_WARN_ON(1); return false; -- Gitee From 970156da1e34b61a3f501f2af87656e91f48360b Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:25 -0700 Subject: [PATCH 175/188] x86/fpu: Calculate the default sizes independently mainline inclusion from mainline-v5.16-rc1 commit 2ae996e0c1a38ca57a52438ab9deec6761dcba62 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2ae996e0c1a3 x86/fpu: Calculate the default sizes independently. -------------------------------- When dynamically enabled states are supported the maximum and default sizes for the kernel buffers and user space interfaces are not longer identical. Put the necessary calculations in place which only take the default enabled features into account. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211021225527.10184-22-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 944718927ed2..29d544328ade 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -781,35 +781,40 @@ static bool __init is_supported_xstate_size(unsigned int test_xstate_size) static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ - unsigned int user_size, kernel_size; + unsigned int user_size, kernel_size, kernel_default_size; + bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); /* Uncompacted user space size */ user_size = get_xsave_size_user(); /* * XSAVES kernel size includes supervisor states and - * uses compacted format. + * uses compacted format when available. * * XSAVE does not support supervisor states so * kernel and user size is identical. */ - if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + if (compacted) kernel_size = get_xsaves_size_no_independent(); else kernel_size = user_size; - /* Ensure we have the space to store all enabled features. */ - if (!is_supported_xstate_size(kernel_size)) + kernel_default_size = + xstate_calculate_size(fpu_kernel_cfg.default_features, compacted); + + /* Ensure we have the space to store all default enabled features. */ + if (!is_supported_xstate_size(kernel_default_size)) return -EINVAL; if (!paranoid_xstate_size_valid(kernel_size)) return -EINVAL; - /* Keep it the same for now */ fpu_kernel_cfg.max_size = kernel_size; - fpu_kernel_cfg.default_size = kernel_size; fpu_user_cfg.max_size = user_size; - fpu_user_cfg.default_size = user_size; + + fpu_kernel_cfg.default_size = kernel_default_size; + fpu_user_cfg.default_size = + xstate_calculate_size(fpu_user_cfg.default_features, false); return 0; } @@ -894,15 +899,21 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) fpu_kernel_cfg.max_features &= ~BIT_ULL(i); } + if (!cpu_feature_enabled(X86_FEATURE_XFD)) + fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; + fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED; fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; - /* Identical for now */ + /* Clean out dynamic features from default */ fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features; + fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; + fpu_user_cfg.default_features = fpu_user_cfg.max_features; + fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; /* Store it for paranoia check at the end */ xfeatures = fpu_kernel_cfg.max_features; @@ -913,6 +924,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) if (err) goto out_disable; + /* Reset the state for the current task */ fpstate_reset(¤t->thread.fpu); /* -- Gitee From 25ac2d0c550014aa0d692db1b46fd01ca59b2c7a Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:26 -0700 Subject: [PATCH 176/188] x86/fpu: Add XFD handling for dynamic states mainline inclusion from mainline-v5.16-rc1 commit db3e7321b4b84b1cb39598ff79b90d1252481378 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit db3e7321b4b8 x86/fpu: Add XFD handling for dynamic states. -------------------------------- To handle the dynamic sizing of buffers on first use the XFD MSR has to be armed. Store the delta between the maximum available and the default feature bits in init_fpstate where it can be retrieved for task creation. If the delta is non zero then dynamic features are enabled. This needs also to enable the static key which guards the XFD updates. This is delayed to an initcall because the FPU setup runs before jump labels are initialized. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211021225527.10184-23-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 29d544328ade..d847048d30b3 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -835,6 +835,12 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) fpu_user_cfg.max_size = legacy_size; fpu_user_cfg.default_size = legacy_size; + /* + * Prevent enabling the static branch which enables writes to the + * XFD MSR. + */ + init_fpstate.xfd = 0; + fpstate_reset(¤t->thread.fpu); } @@ -918,6 +924,14 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) /* Store it for paranoia check at the end */ xfeatures = fpu_kernel_cfg.max_features; + /* + * Initialize the default XFD state in initfp_state and enable the + * dynamic sizing mechanism if dynamic states are available. The + * static key cannot be enabled here because this runs before + * jump_label_init(). This is delayed to an initcall. + */ + init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; + /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); err = init_xstate_size(); @@ -1466,9 +1480,21 @@ void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) } #endif /* CONFIG_X86_DEBUG_FPU */ +static int __init xfd_update_static_branch(void) +{ + /* + * If init_fpstate.xfd has bits set then dynamic features are + * available and the dynamic sizing must be enabled. + */ + if (init_fpstate.xfd) + static_branch_enable(&__fpu_state_size_dynamic); + return 0; +} +arch_initcall(xfd_update_static_branch) + void fpstate_free(struct fpu *fpu) { - if (fpu->fpstate || fpu->fpstate != &fpu->__fpstate) + if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate) vfree(fpu->fpstate); } -- Gitee From 987f0402f8adf702097e022cae11c6233d1f2f5e Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Thu, 21 Oct 2021 15:55:27 -0700 Subject: [PATCH 177/188] x86/fpu/amx: Enable the AMX feature in 64-bit mode mainline inclusion from mainline-v5.16-rc1 commit 2308ee57d93d896618dd65c996429c9d3e469fe0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 2308ee57d93d x86/fpu/amx: Enable the AMX feature in 64-bit mode. -------------------------------- Add the AMX state components in XFEATURE_MASK_USER_SUPPORTED and the TILE_DATA component to the dynamic states and update the permission check table accordingly. This is only effective on 64 bit kernels as for 32bit kernels XFEATURE_MASK_TILE is defined as 0. TILE_DATA is caller-saved state and the only dynamic state. Add build time sanity check to ensure the assumption that every dynamic feature is caller- saved. Make AMX state depend on XFD as it is dynamic feature. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211021225527.10184-24-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/xstate.h | 5 +++-- arch/x86/kernel/cpu/cpuid-deps.c | 1 + arch/x86/kernel/fpu/core.c | 6 ++++++ arch/x86/kernel/fpu/xstate.c | 5 +++-- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 10adf1376f0c..0f8b90ab18c9 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -35,7 +35,8 @@ XFEATURE_MASK_Hi16_ZMM | \ XFEATURE_MASK_PKRU | \ XFEATURE_MASK_BNDREGS | \ - XFEATURE_MASK_BNDCSR) + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_XTILE) /* * Features which are restored when returning to user space. @@ -46,7 +47,7 @@ (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) /* Features which are dynamically enabled for a process on request */ -#define XFEATURE_MASK_USER_DYNAMIC 0ULL +#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID) diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 3d8b81215b14..2e11c8bd1ef9 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -76,6 +76,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_SGX1, X86_FEATURE_SGX }, { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, { X86_FEATURE_XFD, X86_FEATURE_XSAVES }, + { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD }, {} }; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 12ca174891dc..290836d1f2a7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -494,6 +494,12 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags) return 0; } + /* + * If a new feature is added, ensure all dynamic features are + * caller-saved from here! + */ + BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA); + /* * Save the default portion of the current FPU state into the * clone. Assume all dynamic features to be defined as caller- diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d847048d30b3..98f27706d9e5 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -404,7 +404,8 @@ static __init void os_xrstor_booting(struct xregs_state *xstate) XFEATURE_MASK_PKRU | \ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR | \ - XFEATURE_MASK_PASID) + XFEATURE_MASK_PASID | \ + XFEATURE_MASK_XTILE) /* * setup the xstate image representing the init state @@ -1636,7 +1637,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested) * Permissions array to map facilities with more than one component */ static const u64 xstate_prctl_req[XFEATURE_MAX] = { - /* [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE, */ + [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA, }; static int xstate_request_perm(unsigned long idx) -- Gitee From 4e1d0952a76b160d1692885d3831ac2179f1cdd1 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 26 Oct 2021 05:25:24 -0700 Subject: [PATCH 178/188] selftests/x86/amx: Add test cases for AMX state management mainline inclusion from mainline-v5.16-rc1 commit 6a3e0651b4a00daa314c59d6e4228dfa7a986983 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 6a3e0651b4a0 selftests/x86/amx: Add test cases for AMX state management. -------------------------------- AMX TILEDATA is a very large XSAVE feature. It could have caused nasty XSAVE buffer space waste in two places: * Signal stacks * Kernel task_struct->fpu buffers To avoid this waste, neither of these buffers have AMX state by default. The non-default features are called "dynamic" features. There is an arch_prctl(ARCH_REQ_XCOMP_PERM) which allows a task to declare that it wants to use AMX or other "dynamic" XSAVE features. This arch_prctl() ensures that sufficient sigaltstack space is available before it will succeed. It also expands the task_struct buffer. Functions of this test: * Test arch_prctl(ARCH_REQ_XCOMP_PERM). Ensure that it checks for proper sigaltstack sizing and that the sizing is enforced for future sigaltstack calls. * Ensure that ARCH_REQ_XCOMP_PERM is inherited across fork() * Ensure that TILEDATA use before the prctl() is fatal * Ensure that TILEDATA is cleared across fork() Note: Generally, compiler support is needed to do something with AMX. Instead, directly load AMX state from userspace with a plain XSAVE. Do not depend on the compiler. [ dhansen: bunches of cleanups ] Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211026122524.7BEDAA95@davehans-spike.ostc.intel.com Signed-off-by: Lin Wang --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/amx.c | 697 +++++++++++++++++++++++++++ 2 files changed, 698 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/amx.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 15496633f7e4..949d1287e0c9 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ - corrupt_xstate_header + corrupt_xstate_header amx # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c new file mode 100644 index 000000000000..ce012ad15fa5 --- /dev/null +++ b/tools/testing/selftests/x86/amx.c @@ -0,0 +1,697 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +#define XSAVE_HDR_OFFSET 512 +#define XSAVE_HDR_SIZE 64 + +struct xsave_buffer { + union { + struct { + char legacy[XSAVE_HDR_OFFSET]; + char header[XSAVE_HDR_SIZE]; + char extended[0]; + }; + char bytes[0]; + }; +}; + +static inline uint64_t xgetbv(uint32_t index) +{ + uint32_t eax, edx; + + asm volatile("xgetbv;" + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((uint64_t)edx << 32); +} + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) +{ + asm volatile("cpuid;" + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xsave (%%rdi)" + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi) + : "memory"); +} + +static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xrstor (%%rdi)" + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)); +} + +/* err() exits and will not return */ +#define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__) + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + fatal_error("sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + fatal_error("sigaction"); +} + +#define XFEATURE_XTILECFG 17 +#define XFEATURE_XTILEDATA 18 +#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) +#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) +#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) + +#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26) +#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27) +static inline void check_cpuid_xsave(void) +{ + uint32_t eax, ebx, ecx, edx; + + /* + * CPUID.1:ECX.XSAVE[bit 26] enumerates general + * support for the XSAVE feature set, including + * XGETBV. + */ + eax = 1; + ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) + fatal_error("cpuid: no CPU xsave support"); + if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) + fatal_error("cpuid: no OS xsave support"); +} + +static uint32_t xbuf_size; + +static struct { + uint32_t xbuf_offset; + uint32_t size; +} xtiledata; + +#define CPUID_LEAF_XSTATE 0xd +#define CPUID_SUBLEAF_XSTATE_USER 0x0 +#define TILE_CPUID 0x1d +#define TILE_PALETTE_ID 0x1 + +static void check_cpuid_xtiledata(void) +{ + uint32_t eax, ebx, ecx, edx; + + eax = CPUID_LEAF_XSTATE; + ecx = CPUID_SUBLEAF_XSTATE_USER; + cpuid(&eax, &ebx, &ecx, &edx); + + /* + * EBX enumerates the size (in bytes) required by the XSAVE + * instruction for an XSAVE area containing all the user state + * components corresponding to bits currently set in XCR0. + * + * Stash that off so it can be used to allocate buffers later. + */ + xbuf_size = ebx; + + eax = CPUID_LEAF_XSTATE; + ecx = XFEATURE_XTILEDATA; + + cpuid(&eax, &ebx, &ecx, &edx); + /* + * eax: XTILEDATA state component size + * ebx: XTILEDATA state component offset in user buffer + */ + if (!eax || !ebx) + fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", + eax, ebx); + + xtiledata.size = eax; + xtiledata.xbuf_offset = ebx; +} + +/* The helpers for managing XSAVE buffer and tile states: */ + +struct xsave_buffer *alloc_xbuf(void) +{ + struct xsave_buffer *xbuf; + + /* XSAVE buffer should be 64B-aligned. */ + xbuf = aligned_alloc(64, xbuf_size); + if (!xbuf) + fatal_error("aligned_alloc()"); + return xbuf; +} + +static inline void clear_xstate_header(struct xsave_buffer *buffer) +{ + memset(&buffer->header, 0, sizeof(buffer->header)); +} + +static inline uint64_t get_xstatebv(struct xsave_buffer *buffer) +{ + /* XSTATE_BV is at the beginning of the header: */ + return *(uint64_t *)&buffer->header; +} + +static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv) +{ + /* XSTATE_BV is at the beginning of the header: */ + *(uint64_t *)(&buffer->header) = bv; +} + +static void set_rand_tiledata(struct xsave_buffer *xbuf) +{ + int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset]; + int data; + int i; + + /* + * Ensure that 'data' is never 0. This ensures that + * the registers are never in their initial configuration + * and thus never tracked as being in the init state. + */ + data = rand() | 1; + + for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++) + *ptr = data; +} + +struct xsave_buffer *stashed_xsave; + +static void init_stashed_xsave(void) +{ + stashed_xsave = alloc_xbuf(); + if (!stashed_xsave) + fatal_error("failed to allocate stashed_xsave\n"); + clear_xstate_header(stashed_xsave); +} + +static void free_stashed_xsave(void) +{ + free(stashed_xsave); +} + +/* See 'struct _fpx_sw_bytes' at sigcontext.h */ +#define SW_BYTES_OFFSET 464 +/* N.B. The struct's field name varies so read from the offset. */ +#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8) + +static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer) +{ + return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET); +} + +static inline uint64_t get_fpx_sw_bytes_features(void *buffer) +{ + return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET); +} + +/* Work around printf() being unsafe in signals: */ +#define SIGNAL_BUF_LEN 1000 +char signal_message_buffer[SIGNAL_BUF_LEN]; +void sig_print(char *msg) +{ + int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1; + + strncat(signal_message_buffer, msg, left); +} + +static volatile bool noperm_signaled; +static int noperm_errs; + +/* + * Signal handler for when AMX is used but + * permission has not been obtained. + */ +static void handle_noperm(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + void *xbuf = ctx->uc_mcontext.fpregs; + struct _fpx_sw_bytes *sw_bytes; + uint64_t features; + + /* Reset the signal message buffer: */ + signal_message_buffer[0] = '\0'; + sig_print("\tAt SIGILL handler,\n"); + + if (si->si_code != ILL_ILLOPC) { + noperm_errs++; + sig_print("[FAIL]\tInvalid signal code.\n"); + } else { + sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n"); + } + + sw_bytes = get_fpx_sw_bytes(xbuf); + /* + * Without permission, the signal XSAVE buffer should not + * have room for AMX register state (aka. xtiledata). + * Check that the size does not overlap with where xtiledata + * will reside. + * + * This also implies that no state components *PAST* + * XTILEDATA (features >=19) can be present in the buffer. + */ + if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) { + sig_print("[OK]\tValid xstate size\n"); + } else { + noperm_errs++; + sig_print("[FAIL]\tInvalid xstate size\n"); + } + + features = get_fpx_sw_bytes_features(xbuf); + /* + * Without permission, the XTILEDATA feature + * bit should not be set. + */ + if ((features & XFEATURE_MASK_XTILEDATA) == 0) { + sig_print("[OK]\tValid xstate mask\n"); + } else { + noperm_errs++; + sig_print("[FAIL]\tInvalid xstate mask\n"); + } + + noperm_signaled = true; + ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */ +} + +/* Return true if XRSTOR is successful; otherwise, false. */ +static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask) +{ + noperm_signaled = false; + xrstor(xbuf, mask); + + /* Print any messages produced by the signal code: */ + printf("%s", signal_message_buffer); + /* + * Reset the buffer to make sure any future printing + * only outputs new messages: + */ + signal_message_buffer[0] = '\0'; + + if (noperm_errs) + fatal_error("saw %d errors in noperm signal handler\n", noperm_errs); + + return !noperm_signaled; +} + +/* + * Use XRSTOR to populate the XTILEDATA registers with + * random data. + * + * Return true if successful; otherwise, false. + */ +static inline bool load_rand_tiledata(struct xsave_buffer *xbuf) +{ + clear_xstate_header(xbuf); + set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA); + set_rand_tiledata(xbuf); + return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA); +} + +/* Return XTILEDATA to its initial configuration. */ +static inline void init_xtiledata(void) +{ + clear_xstate_header(stashed_xsave); + xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA); +} + +enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED }; + +/* arch_prctl() and sigaltstack() test */ + +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 + +static void req_xtiledata_perm(void) +{ + syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA); +} + +static void validate_req_xcomp_perm(enum expected_result exp) +{ + unsigned long bitmask; + long rc; + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA); + if (exp == FAIL_EXPECTED) { + if (rc) { + printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n"); + return; + } + + fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n"); + } else if (rc) { + fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n"); + } + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask); + if (rc) { + fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc); + } else if (bitmask & XFEATURE_MASK_XTILE) { + printf("\tARCH_REQ_XCOMP_PERM is successful.\n"); + } +} + +static void validate_xcomp_perm(enum expected_result exp) +{ + bool load_success = load_rand_tiledata(stashed_xsave); + + if (exp == FAIL_EXPECTED) { + if (load_success) { + noperm_errs++; + printf("[FAIL]\tLoad tiledata succeeded.\n"); + } else { + printf("[OK]\tLoad tiledata failed.\n"); + } + } else if (exp == SUCCESS_EXPECTED) { + if (load_success) { + printf("[OK]\tLoad tiledata succeeded.\n"); + } else { + noperm_errs++; + printf("[FAIL]\tLoad tiledata failed.\n"); + } + } +} + +#ifndef AT_MINSIGSTKSZ +# define AT_MINSIGSTKSZ 51 +#endif + +static void *alloc_altstack(unsigned int size) +{ + void *altstack; + + altstack = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + + if (altstack == MAP_FAILED) + fatal_error("mmap() for altstack"); + + return altstack; +} + +static void setup_altstack(void *addr, unsigned long size, enum expected_result exp) +{ + stack_t ss; + int rc; + + memset(&ss, 0, sizeof(ss)); + ss.ss_size = size; + ss.ss_sp = addr; + + rc = sigaltstack(&ss, NULL); + + if (exp == FAIL_EXPECTED) { + if (rc) { + printf("[OK]\tsigaltstack() failed.\n"); + } else { + fatal_error("sigaltstack() succeeded unexpectedly.\n"); + } + } else if (rc) { + fatal_error("sigaltstack()"); + } +} + +static void test_dynamic_sigaltstack(void) +{ + unsigned int small_size, enough_size; + unsigned long minsigstksz; + void *altstack; + + minsigstksz = getauxval(AT_MINSIGSTKSZ); + printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz); + /* + * getauxval() itself can return 0 for failure or + * success. But, in this case, AT_MINSIGSTKSZ + * will always return a >=0 value if implemented. + * Just check for 0. + */ + if (minsigstksz == 0) { + printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n"); + return; + } + + enough_size = minsigstksz * 2; + + altstack = alloc_altstack(enough_size); + printf("\tAllocate memory for altstack (%u bytes).\n", enough_size); + + /* + * Try setup_altstack() with a size which can not fit + * XTILEDATA. ARCH_REQ_XCOMP_PERM should fail. + */ + small_size = minsigstksz - xtiledata.size; + printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size); + setup_altstack(altstack, small_size, SUCCESS_EXPECTED); + validate_req_xcomp_perm(FAIL_EXPECTED); + + /* + * Try setup_altstack() with a size derived from + * AT_MINSIGSTKSZ. It should be more than large enough + * and thus ARCH_REQ_XCOMP_PERM should succeed. + */ + printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size); + setup_altstack(altstack, enough_size, SUCCESS_EXPECTED); + validate_req_xcomp_perm(SUCCESS_EXPECTED); + + /* + * Try to coerce setup_altstack() to again accept a + * too-small altstack. This ensures that big-enough + * sigaltstacks can not shrink to a too-small value + * once XTILEDATA permission is established. + */ + printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size); + setup_altstack(altstack, small_size, FAIL_EXPECTED); +} + +static void test_dynamic_state(void) +{ + pid_t parent, child, grandchild; + + parent = fork(); + if (parent < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (parent > 0) { + int status; + /* fork() succeeded. Now in the parent. */ + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("arch_prctl test parent exit"); + return; + } + /* fork() succeeded. Now in the child . */ + + printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n"); + + printf("\tFork a child.\n"); + child = fork(); + if (child < 0) { + fatal_error("fork"); + } else if (child > 0) { + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("arch_prctl test child exit"); + _exit(0); + } + + /* + * The permission request should fail without an + * XTILEDATA-compatible signal stack + */ + printf("\tTest XCOMP_PERM at child.\n"); + validate_xcomp_perm(FAIL_EXPECTED); + + /* + * Set up an XTILEDATA-compatible signal stack and + * also obtain permission to populate XTILEDATA. + */ + printf("\tTest dynamic sigaltstack at child:\n"); + test_dynamic_sigaltstack(); + + /* Ensure that XTILEDATA can be populated. */ + printf("\tTest XCOMP_PERM again at child.\n"); + validate_xcomp_perm(SUCCESS_EXPECTED); + + printf("\tFork a grandchild.\n"); + grandchild = fork(); + if (grandchild < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (!grandchild) { + /* fork() succeeded. Now in the (grand)child. */ + printf("\tTest XCOMP_PERM at grandchild.\n"); + + /* + * Ensure that the grandchild inherited + * permission and a compatible sigaltstack: + */ + validate_xcomp_perm(SUCCESS_EXPECTED); + } else { + int status; + /* fork() succeeded. Now in the parent. */ + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test grandchild"); + } + + _exit(0); +} + +/* + * Save current register state and compare it to @xbuf1.' + * + * Returns false if @xbuf1 matches the registers. + * Returns true if @xbuf1 differs from the registers. + */ +static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1) +{ + struct xsave_buffer *xbuf2; + int ret; + + xbuf2 = alloc_xbuf(); + if (!xbuf2) + fatal_error("failed to allocate XSAVE buffer\n"); + + xsave(xbuf2, XFEATURE_MASK_XTILEDATA); + ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset], + &xbuf2->bytes[xtiledata.xbuf_offset], + xtiledata.size); + + free(xbuf2); + + if (ret == 0) + return false; + return true; +} + +static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf) +{ + int ret = __validate_tiledata_regs(xbuf); + + if (ret != 0) + fatal_error("TILEDATA registers changed"); +} + +static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf) +{ + int ret = __validate_tiledata_regs(xbuf); + + if (ret == 0) + fatal_error("TILEDATA registers did not change"); +} + +/* tiledata inheritance test */ + +static void test_fork(void) +{ + pid_t child, grandchild; + + child = fork(); + if (child < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (child > 0) { + /* fork() succeeded. Now in the parent. */ + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test child"); + return; + } + /* fork() succeeded. Now in the child. */ + printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n"); + + load_rand_tiledata(stashed_xsave); + + grandchild = fork(); + if (grandchild < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (grandchild > 0) { + /* fork() succeeded. Still in the first child. */ + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test grand child"); + _exit(0); + } + /* fork() succeeded. Now in the (grand)child. */ + + /* + * TILEDATA registers are not preserved across fork(). + * Ensure that their value has changed: + */ + validate_tiledata_regs_changed(stashed_xsave); + + _exit(0); +} + +int main(void) +{ + /* Check hardware availability at first */ + check_cpuid_xsave(); + check_cpuid_xtiledata(); + + init_stashed_xsave(); + sethandler(SIGILL, handle_noperm, 0); + + test_dynamic_state(); + + /* Request permission for the following tests */ + req_xtiledata_perm(); + + test_fork(); + + clearhandler(SIGILL); + free_stashed_xsave(); + + return 0; +} -- Gitee From 109a8d3a0f5c9c8ab61c512b5c39be950d5c7608 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 26 Oct 2021 05:25:25 -0700 Subject: [PATCH 179/188] selftests/x86/amx: Add context switch test mainline inclusion from mainline-v5.16-rc1 commit 101c669d165d341b8c35424eb3878138044394ef category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 101c669d165d selftests/x86/amx: Add context switch test. -------------------------------- XSAVE state is thread-local. The kernel switches between thread state at context switch time. Generally, running a selftest for a while will naturally expose it to some context switching and and will test the XSAVE code. Instead of just hoping that the tests get context-switched at random times, force context-switches on purpose. Spawn off a few userspace threads and force context-switches between them. Ensure that the kernel correctly context switches each thread's unique AMX state. [ dhansen: bunches of cleanups ] Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211026122525.6EFD5758@davehans-spike.ostc.intel.com Signed-off-by: Lin Wang --- tools/testing/selftests/x86/amx.c | 160 +++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index ce012ad15fa5..3615ef4a48bb 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE #include #include +#include #include #include #include @@ -10,8 +11,6 @@ #include #include -#include - #include #include #include @@ -259,7 +258,6 @@ void sig_print(char *msg) static volatile bool noperm_signaled; static int noperm_errs; - /* * Signal handler for when AMX is used but * permission has not been obtained. @@ -674,6 +672,158 @@ static void test_fork(void) _exit(0); } +/* Context switching test */ + +static struct _ctxtswtest_cfg { + unsigned int iterations; + unsigned int num_threads; +} ctxtswtest_config; + +struct futex_info { + pthread_t thread; + int nr; + pthread_mutex_t mutex; + struct futex_info *next; +}; + +static void *check_tiledata(void *info) +{ + struct futex_info *finfo = (struct futex_info *)info; + struct xsave_buffer *xbuf; + int i; + + xbuf = alloc_xbuf(); + if (!xbuf) + fatal_error("unable to allocate XSAVE buffer"); + + /* + * Load random data into 'xbuf' and then restore + * it to the tile registers themselves. + */ + load_rand_tiledata(xbuf); + for (i = 0; i < ctxtswtest_config.iterations; i++) { + pthread_mutex_lock(&finfo->mutex); + + /* + * Ensure the register values have not + * diverged from those recorded in 'xbuf'. + */ + validate_tiledata_regs_same(xbuf); + + /* Load new, random values into xbuf and registers */ + load_rand_tiledata(xbuf); + + /* + * The last thread's last unlock will be for + * thread 0's mutex. However, thread 0 will + * have already exited the loop and the mutex + * will already be unlocked. + * + * Because this is not an ERRORCHECK mutex, + * that inconsistency will be silently ignored. + */ + pthread_mutex_unlock(&finfo->next->mutex); + } + + free(xbuf); + /* + * Return this thread's finfo, which is + * a unique value for this thread. + */ + return finfo; +} + +static int create_threads(int num, struct futex_info *finfo) +{ + int i; + + for (i = 0; i < num; i++) { + int next_nr; + + finfo[i].nr = i; + /* + * Thread 'i' will wait on this mutex to + * be unlocked. Lock it immediately after + * initialization: + */ + pthread_mutex_init(&finfo[i].mutex, NULL); + pthread_mutex_lock(&finfo[i].mutex); + + next_nr = (i + 1) % num; + finfo[i].next = &finfo[next_nr]; + + if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i])) + fatal_error("pthread_create()"); + } + return 0; +} + +static void affinitize_cpu0(void) +{ + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + fatal_error("sched_setaffinity to CPU 0"); +} + +static void test_context_switch(void) +{ + struct futex_info *finfo; + int i; + + /* Affinitize to one CPU to force context switches */ + affinitize_cpu0(); + + req_xtiledata_perm(); + + printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n", + ctxtswtest_config.iterations, + ctxtswtest_config.num_threads); + + + finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads); + if (!finfo) + fatal_error("malloc()"); + + create_threads(ctxtswtest_config.num_threads, finfo); + + /* + * This thread wakes up thread 0 + * Thread 0 will wake up 1 + * Thread 1 will wake up 2 + * ... + * the last thread will wake up 0 + * + * ... this will repeat for the configured + * number of iterations. + */ + pthread_mutex_unlock(&finfo[0].mutex); + + /* Wait for all the threads to finish: */ + for (i = 0; i < ctxtswtest_config.num_threads; i++) { + void *thread_retval; + int rc; + + rc = pthread_join(finfo[i].thread, &thread_retval); + + if (rc) + fatal_error("pthread_join() failed for thread %d err: %d\n", + i, rc); + + if (thread_retval != &finfo[i]) + fatal_error("unexpected thread retval for thread %d: %p\n", + i, thread_retval); + + } + + printf("[OK]\tNo incorrect case was found.\n"); + + free(finfo); +} + int main(void) { /* Check hardware availability at first */ @@ -690,6 +840,10 @@ int main(void) test_fork(); + ctxtswtest_config.iterations = 10; + ctxtswtest_config.num_threads = 5; + test_context_switch(); + clearhandler(SIGILL); free_stashed_xsave(); -- Gitee From e24f27c81da92185b7bc5c5aa630a07e49d59458 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 26 Oct 2021 02:11:57 -0700 Subject: [PATCH 180/188] Documentation/x86: Add documentation for using dynamic XSTATE features mainline inclusion from mainline-v5.16-rc1 commit d7a9590f608dbedd917eb0857a074accdf0d3919 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit d7a9590f608d Documentation/x86: Add documentation for using dynamic XSTATE features. -------------------------------- Explain how dynamic XSTATE features can be enabled via the architecture-specific prctl() along with dynamic sigframe size and first use trap handling. Fix: Documentation/x86/xstate.rst:15: WARNING: Title underline too short. as reported by Stephen Rothwell Originally-by: Thomas Gleixner Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211026091157.16711-1-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- Documentation/x86/index.rst | 1 + Documentation/x86/xstate.rst | 65 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 Documentation/x86/xstate.rst diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index 6b1384c13852..cac3ee1038b9 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -36,3 +36,4 @@ x86-specific Documentation sva sgx elf_auxvec + xstate diff --git a/Documentation/x86/xstate.rst b/Documentation/x86/xstate.rst new file mode 100644 index 000000000000..65de3f054ba5 --- /dev/null +++ b/Documentation/x86/xstate.rst @@ -0,0 +1,65 @@ +Using XSTATE features in user space applications +================================================ + +The x86 architecture supports floating-point extensions which are +enumerated via CPUID. Applications consult CPUID and use XGETBV to +evaluate which features have been enabled by the kernel XCR0. + +Up to AVX-512 and PKRU states, these features are automatically enabled by +the kernel if available. Features like AMX TILE_DATA (XSTATE component 18) +are enabled by XCR0 as well, but the first use of related instruction is +trapped by the kernel because by default the required large XSTATE buffers +are not allocated automatically. + +Using dynamically enabled XSTATE features in user space applications +-------------------------------------------------------------------- + +The kernel provides an arch_prctl(2) based mechanism for applications to +request the usage of such features. The arch_prctl(2) options related to +this are: + +-ARCH_GET_XCOMP_SUPP + + arch_prctl(ARCH_GET_XCOMP_SUPP, &features); + + ARCH_GET_XCOMP_SUPP stores the supported features in userspace storage of + type uint64_t. The second argument is a pointer to that storage. + +-ARCH_GET_XCOMP_PERM + + arch_prctl(ARCH_GET_XCOMP_PERM, &features); + + ARCH_GET_XCOMP_PERM stores the features for which the userspace process + has permission in userspace storage of type uint64_t. The second argument + is a pointer to that storage. + +-ARCH_REQ_XCOMP_PERM + + arch_prctl(ARCH_REQ_XCOMP_PERM, feature_nr); + + ARCH_REQ_XCOMP_PERM allows to request permission for a dynamically enabled + feature or a feature set. A feature set can be mapped to a facility, e.g. + AMX, and can require one or more XSTATE components to be enabled. + + The feature argument is the number of the highest XSTATE component which + is required for a facility to work. + +When requesting permission for a feature, the kernel checks the +availability. The kernel ensures that sigaltstacks in the process's tasks +are large enough to accommodate the resulting large signal frame. It +enforces this both during ARCH_REQ_XCOMP_SUPP and during any subsequent +sigaltstack(2) calls. If an installed sigaltstack is smaller than the +resulting sigframe size, ARCH_REQ_XCOMP_SUPP results in -ENOSUPP. Also, +sigaltstack(2) results in -ENOMEM if the requested altstack is too small +for the permitted features. + +Permission, when granted, is valid per process. Permissions are inherited +on fork(2) and cleared on exec(3). + +The first use of an instruction related to a dynamically enabled feature is +trapped by the kernel. The trap handler checks whether the process has +permission to use the feature. If the process has no permission then the +kernel sends SIGILL to the application. If the process has permission then +the handler allocates a larger xstate buffer for the task so the large +state can be context switched. In the unlikely cases that the allocation +fails, the kernel sends SIGSEGV. -- Gitee From 2b81aa3d8d10736da79a9b46972896c0f42de07a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 2 Nov 2021 15:47:50 -0700 Subject: [PATCH 181/188] x86/fpu: Optimize out sigframe xfeatures when in init state mainline inclusion from mainline-v5.16-rc1 commit 30d02551ba4f681cfa605cedacf231b8641169f0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 30d02551ba4f x86/fpu: Optimize out sigframe xfeatures when in init state. -------------------------------- tl;dr: AMX state is ~8k. Signal frames can have space for this ~8k and each signal entry writes out all 8k even if it is zeros. Skip writing zeros for AMX to speed up signal delivery by about 4% overall when AMX is in its init state. This is a user-visible change to the sigframe ABI. == Hardware XSAVE Background == XSAVE state components may be tracked by the processor as being in their initial configuration. Software can detect which features are in this configuration by looking at the XSTATE_BV field in an XSAVE buffer or with the XGETBV(1) instruction. Both the XSAVE and XSAVEOPT instructions enumerate features s being in the initial configuration via the XSTATE_BV field in the XSAVE header, However, XSAVEOPT declines to actually write features in their initial configuration to the buffer. XSAVE writes the feature unconditionally, regardless of whether it is in the initial configuration or not. Basically, XSAVE users never need to inspect XSTATE_BV to determine if the feature has been written to the buffer. XSAVEOPT users *do* need to inspect XSTATE_BV. They might also need to clear out the buffer if they want to make an isolated change to the state, like modifying one register. == Software Signal / XSAVE Background == Signal frames have historically been written with XSAVE itself. Each state is written in its entirety, regardless of being in its initial configuration. In other words, the signal frame ABI uses the XSAVE behavior, not the XSAVEOPT behavior. == Problem == This means that any application which has acquired permission to use AMX via ARCH_REQ_XCOMP_PERM will write 8k of state to the signal frame. This 8k write will occur even when AMX was in its initial configuration and software *knows* this because of XSTATE_BV. This problem also exists to a lesser degree with AVX-512 and its 2k of state. However, AVX-512 use does not require ARCH_REQ_XCOMP_PERM and is more likely to have existing users which would be impacted by any change in behavior. == Solution == Stop writing out AMX xfeatures which are in their initial state to the signal frame. This effectively makes the signal frame XSAVE buffer look as if it were written with a combination of XSAVEOPT and XSAVE behavior. Userspace which handles XSAVEOPT- style buffers should be able to handle this naturally. For now, include only the AMX xfeatures: XTILE and XTILEDATA in this new behavior. These require new ABI to use anyway, which makes their users very unlikely to be broken. This XSAVEOPT-like behavior should be expected for all future dynamic xfeatures. It may also be extended to legacy features like AVX-512 in the future. Only attempt this optimization on systems with dynamic features. Disable dynamic feature support (XFD) if XGETBV1 is unavailable by adding a CPUID dependency. This has been measured to reduce the *overall* cycle cost of signal delivery by about 4%. Fixes: 2308ee57d93d ("x86/fpu/amx: Enable the AMX feature in 64-bit mode") Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Tested-by: "Chang S. Bae" Link: https://lore.kernel.org/r/20211102224750.FA412E26@davehans-spike.ostc.intel.com Signed-off-by: Lin Wang --- Documentation/x86/xstate.rst | 9 ++++++++ arch/x86/include/asm/fpu/xcr.h | 12 ++++++++++ arch/x86/include/asm/fpu/xstate.h | 7 ++++++ arch/x86/kernel/cpu/cpuid-deps.c | 1 + arch/x86/kernel/fpu/xstate.h | 37 +++++++++++++++++++++++++++++-- 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/Documentation/x86/xstate.rst b/Documentation/x86/xstate.rst index 65de3f054ba5..5cec7fb558d6 100644 --- a/Documentation/x86/xstate.rst +++ b/Documentation/x86/xstate.rst @@ -63,3 +63,12 @@ kernel sends SIGILL to the application. If the process has permission then the handler allocates a larger xstate buffer for the task so the large state can be context switched. In the unlikely cases that the allocation fails, the kernel sends SIGSEGV. + +Dynamic features in signal frames +--------------------------------- + +Dynamcally enabled features are not written to the signal frame upon signal +entry if the feature is in its initial configuration. This differs from +non-dynamic features which are always written regardless of their +configuration. Signal handlers can examine the XSAVE buffer's XSTATE_BV +field to determine if a features was written. diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h index 79f95d3787e2..9656a5bc6fea 100644 --- a/arch/x86/include/asm/fpu/xcr.h +++ b/arch/x86/include/asm/fpu/xcr.h @@ -3,6 +3,7 @@ #define _ASM_X86_FPU_XCR_H #define XCR_XFEATURE_ENABLED_MASK 0x00000000 +#define XCR_XFEATURE_IN_USE_MASK 0x00000001 static inline u64 xgetbv(u32 index) { @@ -20,4 +21,15 @@ static inline void xsetbv(u32 index, u64 value) asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } +/* + * Return a mask of xfeatures which are currently being tracked + * by the processor as being in the initial configuration. + * + * Callers should check X86_FEATURE_XGETBV1. + */ +static inline u64 xfeatures_in_use(void) +{ + return xgetbv(XCR_XFEATURE_IN_USE_MASK); +} + #endif /* _ASM_X86_FPU_XCR_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 0f8b90ab18c9..cd3dd170e23a 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -92,6 +92,13 @@ #define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ XFEATURE_MASK_SUPERVISOR_SUPPORTED) +/* + * Features in this mask have space allocated in the signal frame, but may not + * have that space initialized when the feature is in its init state. + */ +#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \ + XFEATURE_MASK_USER_DYNAMIC) + extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 2e11c8bd1ef9..a73ab25ad388 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -76,6 +76,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_SGX1, X86_FEATURE_SGX }, { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, { X86_FEATURE_XFD, X86_FEATURE_XSAVES }, + { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 }, { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD }, {} }; diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index e18210dff88c..86ea7c0fa2f6 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -4,6 +4,7 @@ #include #include +#include #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u64, xfd_state); @@ -198,6 +199,32 @@ static inline void os_xrstor_supervisor(struct fpstate *fpstate) XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask); } +/* + * XSAVE itself always writes all requested xfeatures. Removing features + * from the request bitmap reduces the features which are written. + * Generate a mask of features which must be written to a sigframe. The + * unset features can be optimized away and not written. + * + * This optimization is user-visible. Only use for states where + * uninitialized sigframe contents are tolerable, like dynamic features. + * + * Users of buffers produced with this optimization must check XSTATE_BV + * to determine which features have been optimized out. + */ +static inline u64 xfeatures_need_sigframe_write(void) +{ + u64 xfeaures_to_write; + + /* In-use features must be written: */ + xfeaures_to_write = xfeatures_in_use(); + + /* Also write all non-optimizable sigframe features: */ + xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED & + ~XFEATURE_MASK_SIGFRAME_INITOPT; + + return xfeaures_to_write; +} + /* * Save xstate to user space xsave area. * @@ -220,10 +247,16 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) */ struct fpstate *fpstate = current->thread.fpu.fpstate; u64 mask = fpstate->user_xfeatures; - u32 lmask = mask; - u32 hmask = mask >> 32; + u32 lmask; + u32 hmask; int err; + /* Optimize away writing unnecessary xfeatures: */ + if (fpu_state_size_dynamic()) + mask &= xfeatures_need_sigframe_write(); + + lmask = mask; + hmask = mask >> 32; xfd_validate_state(fpstate, mask, false); stac(); -- Gitee From 25b3e952c8caba16383248cfa0cb7ec9059041a9 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 26 Nov 2021 13:47:46 +0100 Subject: [PATCH 182/188] x86/fpu/signal: Initialize sw_bytes in save_xstate_epilog() mainline inclusion from mainline-v5.16-rc4 commit 52d0b8b18776f184c53632c5e0068201491cdb61 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 52d0b8b18776 x86/fpu/signal: Initialize sw_bytes in save_xstate_epilog(). -------------------------------- save_sw_bytes() did not fully initialize sw_bytes, which caused KMSAN to report an infoleak (see below). Initialize sw_bytes explicitly to avoid this. KMSAN report follows: ===================================================== BUG: KMSAN: kernel-infoleak in instrument_copy_to_user ./include/linux/instrumented.h:121 BUG: KMSAN: kernel-infoleak in __copy_to_user ./include/linux/uaccess.h:154 BUG: KMSAN: kernel-infoleak in save_xstate_epilog+0x2df/0x510 arch/x86/kernel/fpu/signal.c:127 instrument_copy_to_user ./include/linux/instrumented.h:121 __copy_to_user ./include/linux/uaccess.h:154 save_xstate_epilog+0x2df/0x510 arch/x86/kernel/fpu/signal.c:127 copy_fpstate_to_sigframe+0x861/0xb60 arch/x86/kernel/fpu/signal.c:245 get_sigframe+0x656/0x7e0 arch/x86/kernel/signal.c:296 __setup_rt_frame+0x14d/0x2a60 arch/x86/kernel/signal.c:471 setup_rt_frame arch/x86/kernel/signal.c:781 handle_signal arch/x86/kernel/signal.c:825 arch_do_signal_or_restart+0x417/0xdd0 arch/x86/kernel/signal.c:870 handle_signal_work kernel/entry/common.c:149 exit_to_user_mode_loop+0x1f6/0x490 kernel/entry/common.c:173 exit_to_user_mode_prepare kernel/entry/common.c:208 __syscall_exit_to_user_mode_work kernel/entry/common.c:290 syscall_exit_to_user_mode+0x7e/0xc0 kernel/entry/common.c:302 do_syscall_64+0x60/0xd0 arch/x86/entry/common.c:88 entry_SYSCALL_64_after_hwframe+0x44/0xae ??:? Local variable sw_bytes created at: save_xstate_epilog+0x80/0x510 arch/x86/kernel/fpu/signal.c:121 copy_fpstate_to_sigframe+0x861/0xb60 arch/x86/kernel/fpu/signal.c:245 Bytes 20-47 of 48 are uninitialized Memory access of size 48 starts at ffff8880801d3a18 Data copied to user address 00007ffd90e2ef50 ===================================================== Link: https://lore.kernel.org/all/CAG_fn=V9T6OKPonSjsi9PmWB0hMHFC=yawozdft8i1-MSxrv=w@mail.gmail.com/ Fixes: 53599b4d54b9b8dd ("x86/fpu/signal: Prepare for variable sigframe length") Reported-by: Alexander Potapenko Signed-off-by: Marco Elver Signed-off-by: Alexander Potapenko Signed-off-by: Dave Hansen Tested-by: Alexander Potapenko Link: https://lkml.kernel.org/r/20211126124746.761278-1-glider@google.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index cc977da6e128..b35643ad17f5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -118,7 +118,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, struct fpstate *fpstate) { struct xregs_state __user *x = buf; - struct _fpx_sw_bytes sw_bytes; + struct _fpx_sw_bytes sw_bytes = {}; u32 xfeatures; int err; -- Gitee From eb9ed4a0398817a0a5ccb3129fef31cb72cc902e Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Fri, 10 Dec 2021 14:55:03 -0800 Subject: [PATCH 183/188] signal: Skip the altstack update when not needed mainline inclusion from mainline-v5.16-rc6 commit 6c3118c32129b4197999a8928ba776bcabd0f5c4 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 6c3118c32129 signal: Skip the altstack update when not needed. -------------------------------- == Background == Support for large, "dynamic" fpstates was recently merged. This included code to ensure that sigaltstacks are sufficiently sized for these large states. A new lock was added to remove races between enabling large features and setting up sigaltstacks. == Problem == The new lock (sigaltstack_lock()) is acquired in the sigreturn path before restoring the old sigaltstack. Unfortunately, contention on the new lock causes a measurable signal handling performance regression [1]. However, the common case is that no *changes* are made to the sigaltstack state at sigreturn. == Solution == do_sigaltstack() acquires sigaltstack_lock() and is used for both sys_sigaltstack() and restoring the sigaltstack in sys_sigreturn(). Check for changes to the sigaltstack before taking the lock. If no changes were made, return before acquiring the lock. This removes lock contention from the common-case sigreturn path. [1] https://lore.kernel.org/lkml/20211207012128.GA16074@xsang-OptiPlex-9020/ Fixes: 3aac3ebea08f ("x86/signal: Implement sigaltstack size validation") Reported-by: kernel test robot Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20211210225503.12734-1-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- kernel/signal.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index ce95a76fe60a..917b92c3b77f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4056,6 +4056,15 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, ss_mode != 0)) return -EINVAL; + /* + * Return before taking any locks if no actual + * sigaltstack changes were requested. + */ + if (t->sas_ss_sp == (unsigned long)ss_sp && + t->sas_ss_size == ss_size && + t->sas_ss_flags == ss_flags) + return 0; + sigaltstack_lock(); if (ss_mode == SS_DISABLE) { ss_size = 0; -- Gitee From edaa8e6e97416d5c84e51e3761a59617e1dd9c37 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 14 Feb 2022 13:05:49 +0100 Subject: [PATCH 184/188] x86/ptrace: Fix xfpregs_set()'s incorrect xmm clearing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v5.17-rc5 commit 44cad52cc14ae10062f142ec16ede489bccf4469 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 44cad52cc14a x86/ptrace: Fix xfpregs_set()'s incorrect xmm clearing. -------------------------------- xfpregs_set() handles 32-bit REGSET_XFP and 64-bit REGSET_FP. The actual code treats these regsets as modern FX state (i.e. the beginning part of XSTATE). The declarations of the regsets thought they were the legacy i387 format. The code thought they were the 32-bit (no xmm8..15) variant of XSTATE and, for good measure, made the high bits disappear by zeroing the wrong part of the buffer. The latter broke ptrace, and everything else confused anyone trying to understand the code. In particular, the nonsense definitions of the regsets confused me when I wrote this code. Clean this all up. Change the declarations to match reality (which shouldn't change the generated code, let alone the ABI) and fix xfpregs_set() to clear the correct bits and to only do so for 32-bit callers. Fixes: 6164331d15f7 ("x86/fpu: Rewrite xfpregs_set()") Reported-by: Luís Ferreira Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=215524 Link: https://lore.kernel.org/r/YgpFnZpF01WwR8wU@zn.tnic Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/regset.c | 9 ++++----- arch/x86/kernel/ptrace.c | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 437d7c930c0b..75ffaef8c299 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - struct user32_fxsr_struct newstate; + struct fxregs_state newstate; int ret; - BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state)); - if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; @@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, /* Copy the state */ memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate)); - /* Clear xmm8..15 */ + /* Clear xmm8..15 for 32-bit callers */ BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16); - memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16); + if (in_ia32_syscall()) + memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16); /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7e58d2103bf3..805608e88718 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1213,7 +1213,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(long), + .n = sizeof(struct fxregs_state) / sizeof(long), .size = sizeof(long), .align = sizeof(long), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, @@ -1260,7 +1260,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, - .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), + .n = sizeof(struct fxregs_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, -- Gitee From f7d8e961a1309f1126512f22e8b7e18d29aed8d1 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 3 Feb 2022 11:43:07 -0800 Subject: [PATCH 185/188] x86/cpufeatures: Put the AMX macros in the word 18 block mainline inclusion from mainline-v5.18-rc1 commit fa31a4d669bd471e9510db1abf9b91e1a6be6ff7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit fa31a4d669bd x86/cpufeatures: Put the AMX macros in the word 18 block. -------------------------------- These macros are for bits in CPUID.(EAX=7,ECX=0):EDX, not for bits in CPUID(EAX=7,ECX=1):EAX. Put them with their brethren. [ bp: Sort word 18 bits properly, as caught by Like Xu ] Signed-off-by: Jim Mattson Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20220203194308.2469117-1-jmattson@google.com Signed-off-by: Lin Wang --- arch/x86/include/asm/cpufeatures.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index db5d264157c5..d79507141ce1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -298,7 +298,6 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -382,7 +381,10 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -- Gitee From bea72171a7e3eebeac4eea643b8e575ea46e8342 Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Sat, 29 Jan 2022 09:36:46 -0800 Subject: [PATCH 186/188] x86/fpu/xstate: Fix the ARCH_REQ_XCOMP_PERM implementation mainline inclusion from mainline-v5.18-rc1 commit 063452fd94d153d4eb38ad58f210f3d37a09cca4 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 063452fd94d1 x86/fpu/xstate: Fix the ARCH_REQ_XCOMP_PERM implementation. -------------------------------- ARCH_REQ_XCOMP_PERM is supposed to add the requested feature to the permission bitmap of thread_group_leader()->fpu. But the code overwrites the bitmap with the requested feature bit only rather than adding it. Fix the code to add the requested feature bit to the master bitmask. Fixes: db8268df0983 ("x86/arch_prctl: Add controls for dynamic XSTATE components") Signed-off-by: Yang Zhong Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Cc: Paolo Bonzini Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220129173647.27981-2-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 98f27706d9e5..b66bb37f2bcf 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1626,7 +1626,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested) return ret; /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ - WRITE_ONCE(fpu->perm.__state_perm, requested); + WRITE_ONCE(fpu->perm.__state_perm, mask); /* Protected by sighand lock */ fpu->perm.__state_size = ksize; fpu->perm.__user_state_size = usize; -- Gitee From 0e935dd7a562975481d9bcf3da348a1f0d82ed0d Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Sat, 29 Jan 2022 09:36:47 -0800 Subject: [PATCH 187/188] selftests/x86/amx: Update the ARCH_REQ_XCOMP_PERM test mainline inclusion from mainline-v5.18-rc1 commit 20df737561484cb2d42e537663c03a7311d2b3c1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 20df73756148 selftests/x86/amx: Update the ARCH_REQ_XCOMP_PERM test. -------------------------------- Update the arch_prctl test to check the permission bitmap whether the requested feature is added as expected or not. Every non-dynamic feature that is enabled is permitted already for use. TILECFG is not dynamic feature. Ensure the bit is always on from ARCH_GET_XCOMP_PERM. Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220129173647.27981-3-chang.seok.bae@intel.com Signed-off-by: Lin Wang --- tools/testing/selftests/x86/amx.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 3615ef4a48bb..2189f0322d8b 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -368,9 +368,16 @@ static void req_xtiledata_perm(void) static void validate_req_xcomp_perm(enum expected_result exp) { - unsigned long bitmask; + unsigned long bitmask, expected_bitmask; long rc; + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask); + if (rc) { + fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc); + } else if (!(bitmask & XFEATURE_MASK_XTILECFG)) { + fatal_error("ARCH_GET_XCOMP_PERM returns XFEATURE_XTILECFG off."); + } + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA); if (exp == FAIL_EXPECTED) { if (rc) { @@ -383,10 +390,15 @@ static void validate_req_xcomp_perm(enum expected_result exp) fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n"); } + expected_bitmask = bitmask | XFEATURE_MASK_XTILEDATA; + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask); if (rc) { fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc); - } else if (bitmask & XFEATURE_MASK_XTILE) { + } else if (bitmask != expected_bitmask) { + fatal_error("ARCH_REQ_XCOMP_PERM set a wrong bitmask: %lx, expected: %lx.\n", + bitmask, expected_bitmask); + } else { printf("\tARCH_REQ_XCOMP_PERM is successful.\n"); } } -- Gitee From 84c143ebf6904de4514ba0a7a0e5d672f5a40212 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 1 May 2022 21:31:43 +0200 Subject: [PATCH 188/188] x86/fpu: Prevent FPU state corruption mainline inclusion from mainline-v5.18-rc6 commit 59f5ede3bc0f00eb856425f636dab0c10feb06d8 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590ZC CVE: NA Intel-SIG: commit 59f5ede3bc0f x86/fpu: Prevent FPU state corruption. -------------------------------- The FPU usage related to task FPU management is either protected by disabling interrupts (switch_to, return to user) or via fpregs_lock() which is a wrapper around local_bh_disable(). When kernel code wants to use the FPU then it has to check whether it is possible by calling irq_fpu_usable(). But the condition in irq_fpu_usable() is wrong. It allows FPU to be used when: !in_interrupt() || interrupted_user_mode() || interrupted_kernel_fpu_idle() The latter is checking whether some other context already uses FPU in the kernel, but if that's not the case then it allows FPU to be used unconditionally even if the calling context interrupted a fpregs_lock() critical region. If that happens then the FPU state of the interrupted context becomes corrupted. Allow in kernel FPU usage only when no other context has in kernel FPU usage and either the calling context is not hard interrupt context or the hard interrupt did not interrupt a local bottomhalf disabled region. It's hard to find a proper Fixes tag as the condition was broken in one way or the other for a very long time and the eager/lazy FPU changes caused a lot of churn. Picked something remotely connected from the history. This survived undetected for quite some time as FPU usage in interrupt context is rare, but the recent changes to the random code unearthed it at least on a kernel which had FPU debugging enabled. There is probably a higher rate of silent corruption as not all issues can be detected by the FPU debugging code. This will be addressed in a subsequent change. Fixes: 5d2bd7009f30 ("x86, fpu: decouple non-lazy/eager fpu restore from xsave") Reported-by: Filipe Manana Signed-off-by: Thomas Gleixner Tested-by: Filipe Manana Reviewed-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220501193102.588689270@linutronix.de Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 67 +++++++++++++++----------------------- 1 file changed, 26 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 290836d1f2a7..1db281e8ef21 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -40,17 +40,7 @@ struct fpu_state_config fpu_user_cfg __ro_after_init; */ struct fpstate init_fpstate __ro_after_init; -/* - * Track whether the kernel is using the FPU state - * currently. - * - * This flag is used: - * - * - by IRQ context code to potentially use the FPU - * if it's unused. - * - * - to debug kernel_fpu_begin()/end() correctness - */ +/* Track in-kernel FPU usage */ static DEFINE_PER_CPU(bool, in_kernel_fpu); /* @@ -58,42 +48,37 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -static bool kernel_fpu_disabled(void) -{ - return this_cpu_read(in_kernel_fpu); -} - -static bool interrupted_kernel_fpu_idle(void) -{ - return !kernel_fpu_disabled(); -} - -/* - * Were we in user mode (or vm86 mode) when we were - * interrupted? - * - * Doing kernel_fpu_begin/end() is ok if we are running - * in an interrupt context from user mode - we'll just - * save the FPU state as required. - */ -static bool interrupted_user_mode(void) -{ - struct pt_regs *regs = get_irq_regs(); - return regs && user_mode(regs); -} - /* * Can we use the FPU in kernel mode with the * whole "kernel_fpu_begin/end()" sequence? - * - * It's always ok in process context (ie "not interrupt") - * but it is sometimes ok even from an irq. */ bool irq_fpu_usable(void) { - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); + if (WARN_ON_ONCE(in_nmi())) + return false; + + /* In kernel FPU usage already active? */ + if (this_cpu_read(in_kernel_fpu)) + return false; + + /* + * When not in NMI or hard interrupt context, FPU can be used in: + * + * - Task context except from within fpregs_lock()'ed critical + * regions. + * + * - Soft interrupt processing context which cannot happen + * while in a fpregs_lock()'ed critical region. + */ + if (!hardirq_count()) + return true; + + /* + * In hard interrupt context it's safe when soft interrupts + * are enabled, which means the interrupt did not hit in + * a fpregs_lock()'ed critical region. + */ + return !softirq_count(); } EXPORT_SYMBOL(irq_fpu_usable); -- Gitee