From 8d7cb145de511fdaf43334a31cd7163559147891 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 13 Jul 2022 12:19:58 +0800 Subject: [PATCH 01/16] mm/hwpoison: do not lock page again when me_huge_page() successfully recovers mainline inclusion from mainline-v5.13 commit ea6d0630100b285f059d0a8d8e86f38a46407536 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel_SIG: commit ea6d0630100b mm/hwpoison: do not lock page again when me_huge_page() successfully recovers. Backport for MCA recovery enhancing & bug fix. -------------------------------- Currently me_huge_page() temporary unlocks page to perform some actions then locks it again later. My testcase (which calls hard-offline on some tail page in a hugetlb, then accesses the address of the hugetlb range) showed that page allocation code detects this page lock on buddy page and printed out "BUG: Bad page state" message. check_new_page_bad() does not consider a page with __PG_HWPOISON as bad page, so this flag works as kind of filter, but this filtering doesn't work in this case because the "bad page" is not the actual hwpoisoned page. So stop locking page again. Actions to be taken depend on the page type of the error, so page unlocking should be done in ->action() callbacks. So let's make it assumed and change all existing callbacks that way. Link: https://lkml.kernel.org/r/20210609072029.74645-1-nao.horiguchi@gmail.com Fixes: commit 78bb920344b8 ("mm: hwpoison: dissolve in-use hugepage in unrecoverable memory error") Signed-off-by: Naoya Horiguchi Cc: Oscar Salvador Cc: Michal Hocko Cc: Tony Luck Cc: "Aneesh Kumar K.V" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- mm/memory-failure.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index bfa6d1478a75..aabb952404e9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -658,6 +658,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn, */ static int me_kernel(struct page *p, unsigned long pfn) { + unlock_page(p); return MF_IGNORED; } @@ -667,6 +668,7 @@ static int me_kernel(struct page *p, unsigned long pfn) static int me_unknown(struct page *p, unsigned long pfn) { pr_err("Memory failure: %#lx: Unknown page state\n", pfn); + unlock_page(p); return MF_FAILED; } @@ -675,6 +677,7 @@ static int me_unknown(struct page *p, unsigned long pfn) */ static int me_pagecache_clean(struct page *p, unsigned long pfn) { + int ret; struct address_space *mapping; delete_from_lru_cache(p); @@ -683,8 +686,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * For anonymous pages we're done the only reference left * should be the one m_f() holds. */ - if (PageAnon(p)) - return MF_RECOVERED; + if (PageAnon(p)) { + ret = MF_RECOVERED; + goto out; + } /* * Now truncate the page in the page cache. This is really @@ -698,7 +703,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) /* * Page has been teared down in the meanwhile */ - return MF_FAILED; + ret = MF_FAILED; + goto out; } /* @@ -706,7 +712,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * * Open: to take i_mutex or not for this? Right now we don't. */ - return truncate_error_page(p, pfn, mapping); + ret = truncate_error_page(p, pfn, mapping); +out: + unlock_page(p); + return ret; } /* @@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) */ static int me_swapcache_dirty(struct page *p, unsigned long pfn) { + int ret; + ClearPageDirty(p); /* Trigger EIO in shmem: */ ClearPageUptodate(p); - if (!delete_from_lru_cache(p)) - return MF_DELAYED; - else - return MF_FAILED; + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; + unlock_page(p); + return ret; } static int me_swapcache_clean(struct page *p, unsigned long pfn) { + int ret; + delete_from_swap_cache(p); - if (!delete_from_lru_cache(p)) - return MF_RECOVERED; - else - return MF_FAILED; + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; + unlock_page(p); + return ret; } /* @@ -820,6 +831,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) mapping = page_mapping(hpage); if (mapping) { res = truncate_error_page(hpage, pfn, mapping); + unlock_page(hpage); } else { res = MF_FAILED; unlock_page(hpage); @@ -834,7 +846,6 @@ static int me_huge_page(struct page *p, unsigned long pfn) page_ref_inc(p); res = MF_RECOVERED; } - lock_page(hpage); } return res; @@ -866,6 +877,8 @@ static struct page_state { unsigned long mask; unsigned long res; enum mf_action_page_type type; + + /* Callback ->action() has to unlock the relevant page inside it. */ int (*action)(struct page *p, unsigned long pfn); } error_states[] = { { reserved, reserved, MF_MSG_KERNEL, me_kernel }, @@ -929,6 +942,7 @@ static int page_action(struct page_state *ps, struct page *p, int result; int count; + /* page p should be unlocked after returning from ps->action(). */ result = ps->action(p, pfn); count = page_count(p) - 1; @@ -1246,7 +1260,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) goto out; } - res = identify_page_state(pfn, p, page_flags); + return identify_page_state(pfn, p, page_flags); out: unlock_page(head); return res; @@ -1536,6 +1550,8 @@ int memory_failure(unsigned long pfn, int flags) identify_page_state: res = identify_page_state(pfn, p, page_flags); + mutex_unlock(&mf_mutex); + return res; unlock_page: unlock_page(p); unlock_mutex: -- Gitee From 88749b21bc6c937adef37ddd5c78e0f5023fa3fe Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 28 Jun 2021 19:43:14 -0700 Subject: [PATCH 02/16] mm,hwpoison: send SIGBUS with error virutal address mainline inclusion from mainline-v5.14-rc1 commit a3f5d80ea401ac857f2910e28b15f35b2cf902f4 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit a3f5d80ea401 mm,hwpoison: send SIGBUS with error virutal address. Backport for MCA recovery enhancing & bug fix. -------------------------------- Now an action required MCE in already hwpoisoned address surely sends a SIGBUS to current process, but the SIGBUS doesn't convey error virtual address. That's not optimal for hwpoison-aware applications. To fix the issue, make memory_failure() call kill_accessing_process(), that does pagetable walk to find the error virtual address. It could find multiple virtual addresses for the same error page, and it seems hard to tell which virtual address is correct one. But that's rare and sending incorrect virtual address could be better than no address. So let's report the first found virtual address for now. [naoya.horiguchi@nec.com: fix walk_page_range() return] Link: https://lkml.kernel.org/r/20210603051055.GA244241@hori.linux.bs1.fc.nec.co.jp Link: https://lkml.kernel.org/r/20210521030156.2612074-4-nao.horiguchi@gmail.com Signed-off-by: Naoya Horiguchi Cc: Tony Luck Cc: Aili Yao Cc: Oscar Salvador Cc: David Hildenbrand Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Jue Wang Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/mce/core.c | 13 ++- include/linux/swapops.h | 5 ++ mm/memory-failure.c | 150 ++++++++++++++++++++++++++++++++- 3 files changed, 165 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 00ee1756ec92..fec9b6f014d3 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1253,6 +1253,7 @@ static void kill_me_maybe(struct callback_head *cb) { struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); int flags = MF_ACTION_REQUIRED; + int ret; p->mce_count = 0; pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); @@ -1260,13 +1261,21 @@ static void kill_me_maybe(struct callback_head *cb) if (!p->mce_ripv) flags |= MF_MUST_KILL; - if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) && - !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) { + ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags); + if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) { set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); sync_core(); return; } + /* + * -EHWPOISON from memory_failure() means that it already sent SIGBUS + * to the current process with the proper error info, so no need to + * send SIGBUS here again. + */ + if (ret == -EHWPOISON) + return; + if (p->mce_vaddr != (void __user *)-1l) { force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT); } else { diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 0d429a102d41..708fbeb21dd3 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -332,6 +332,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } +static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline void num_poisoned_pages_inc(void) { atomic_long_inc(&num_poisoned_pages); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index aabb952404e9..128fcf00be45 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "internal.h" #include "ras/ras_event.h" @@ -554,6 +555,148 @@ void collect_procs(struct page *page, struct list_head *tokill, } EXPORT_SYMBOL_GPL(collect_procs); +struct hwp_walk { + struct to_kill tk; + unsigned long pfn; + int flags; +}; + +static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift) +{ + tk->addr = addr; + tk->size_shift = shift; +} + +static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, + unsigned long poisoned_pfn, struct to_kill *tk) +{ + unsigned long pfn = 0; + + if (pte_present(pte)) { + pfn = pte_pfn(pte); + } else { + swp_entry_t swp = pte_to_swp_entry(pte); + + if (is_hwpoison_entry(swp)) + pfn = hwpoison_entry_to_pfn(swp); + } + + if (!pfn || pfn != poisoned_pfn) + return 0; + + set_to_kill(tk, addr, shift); + return 1; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, + struct hwp_walk *hwp) +{ + pmd_t pmd = *pmdp; + unsigned long pfn; + unsigned long hwpoison_vaddr; + + if (!pmd_present(pmd)) + return 0; + pfn = pmd_pfn(pmd); + if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) { + hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT); + set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT); + return 1; + } + return 0; +} +#else +static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, + struct hwp_walk *hwp) +{ + return 0; +} +#endif + +static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct hwp_walk *hwp = (struct hwp_walk *)walk->private; + int ret = 0; + pte_t *ptep; + spinlock_t *ptl; + + ptl = pmd_trans_huge_lock(pmdp, walk->vma); + if (ptl) { + ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp); + spin_unlock(ptl); + goto out; + } + + if (pmd_trans_unstable(pmdp)) + goto out; + + ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl); + for (; addr != end; ptep++, addr += PAGE_SIZE) { + ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT, + hwp->pfn, &hwp->tk); + if (ret == 1) + break; + } + pte_unmap_unlock(ptep - 1, ptl); +out: + cond_resched(); + return ret; +} + +#ifdef CONFIG_HUGETLB_PAGE +static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct hwp_walk *hwp = (struct hwp_walk *)walk->private; + pte_t pte = huge_ptep_get(ptep); + struct hstate *h = hstate_vma(walk->vma); + + return check_hwpoisoned_entry(pte, addr, huge_page_shift(h), + hwp->pfn, &hwp->tk); +} +#else +#define hwpoison_hugetlb_range NULL +#endif + +static struct mm_walk_ops hwp_walk_ops = { + .pmd_entry = hwpoison_pte_range, + .hugetlb_entry = hwpoison_hugetlb_range, +}; + +/* + * Sends SIGBUS to the current process with error info. + * + * This function is intended to handle "Action Required" MCEs on already + * hardware poisoned pages. They could happen, for example, when + * memory_failure() failed to unmap the error page at the first call, or + * when multiple local machine checks happened on different CPUs. + * + * MCE handler currently has no easy access to the error virtual address, + * so this function walks page table to find it. The returned virtual address + * is proper in most cases, but it could be wrong when the application + * process has multiple entries mapping the error page. + */ +static int kill_accessing_process(struct task_struct *p, unsigned long pfn, + int flags) +{ + int ret; + struct hwp_walk priv = { + .pfn = pfn, + }; + priv.tk.tsk = p; + + mmap_read_lock(p->mm); + ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops, + (void *)&priv); + if (ret == 1 && priv.tk.addr) + kill_proc(&priv.tk, pfn, flags); + mmap_read_unlock(p->mm); + return ret ? -EFAULT : -EHWPOISON; +} + static const char *action_name[] = { [MF_IGNORED] = "Ignored", [MF_FAILED] = "Failed", @@ -1204,7 +1347,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (TestSetPageHWPoison(head)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return -EHWPOISON; + res = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) + res = kill_accessing_process(current, page_to_pfn(head), flags); + return res; } num_poisoned_pages_inc(); @@ -1416,6 +1562,8 @@ int memory_failure(unsigned long pfn, int flags) pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); res = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) + res = kill_accessing_process(current, pfn, flags); goto unlock_mutex; } -- Gitee From fa487b30890e3257520a2eb05937399eacf37055 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 17 Aug 2021 17:29:41 -0700 Subject: [PATCH 03/16] x86/mce: Change to not send SIGBUS error during copy from user mainline inclusion from mainline-v5.16-rc1 commit a6e3cf70b772541c2388abdb86e5a562cfe18e63 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit a6e3cf70b772 x86/mce: Change to not send SIGBUS error during copy from user. Backport for MCA recovery enhance and bug fix. -------------------------------- Sending a SIGBUS for a copy from user is not the correct semantic. System calls should return -EFAULT (or a short count for write(2)). Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210818002942.1607544-3-tony.luck@intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/mce/core.c | 36 +++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index fec9b6f014d3..da03b02c84c7 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1262,7 +1262,7 @@ static void kill_me_maybe(struct callback_head *cb) flags |= MF_MUST_KILL; ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags); - if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) { + if (!ret) { set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); sync_core(); return; @@ -1276,15 +1276,21 @@ static void kill_me_maybe(struct callback_head *cb) if (ret == -EHWPOISON) return; - if (p->mce_vaddr != (void __user *)-1l) { - force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT); - } else { - pr_err("Memory error not recovered"); - kill_me_now(cb); - } + pr_err("Memory error not recovered"); + kill_me_now(cb); } -static void queue_task_work(struct mce *m, char *msg, int kill_current_task) +static void kill_me_never(struct callback_head *cb) +{ + struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); + + p->mce_count = 0; + pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr); + if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0)) + set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); +} + +static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *)) { int count = ++current->mce_count; @@ -1294,11 +1300,7 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task) current->mce_kflags = m->kflags; current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); current->mce_whole_page = whole_page(m); - - if (kill_current_task) - current->mce_kill_me.func = kill_me_now; - else - current->mce_kill_me.func = kill_me_maybe; + current->mce_kill_me.func = func; } /* Ten is likely overkill. Don't expect more than two faults before task_work() */ @@ -1468,8 +1470,10 @@ noinstr void do_machine_check(struct pt_regs *regs) /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); - queue_task_work(&m, msg, kill_it); - + if (kill_it) + queue_task_work(&m, msg, kill_me_now); + else + queue_task_work(&m, msg, kill_me_maybe); } else { /* * Handle an MCE which has happened in kernel space but from @@ -1486,7 +1490,7 @@ noinstr void do_machine_check(struct pt_regs *regs) } if (m.kflags & MCE_IN_KERNEL_COPYIN) - queue_task_work(&m, msg, kill_it); + queue_task_work(&m, msg, kill_me_never); } instrumentation_end(); -- Gitee From b88d64576da3f83a11064a27b351cab0e959e9e2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 31 May 2021 00:32:44 -0400 Subject: [PATCH 04/16] generic_perform_write()/iomap_write_actor(): saner logics for short copy mainline inclusion from mainline-v5.13 commit bc1bb416bbb9203e250f5c49aaf1d11b5d9c8adb category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit bc1bb416bbb9 generic_perform_write()/iomap_write_actor(): saner logics for short copy. Backport for MCA recovery enhance and bug fix. -------------------------------- if we run into a short copy and ->write_end() refuses to advance at all, use the amount we'd managed to copy for the next iteration to handle. Signed-off-by: Al Viro Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- fs/iomap/buffered-io.c | 25 ++++++++++--------------- mm/filemap.c | 24 +++++++++--------------- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 3ec494f5d7ee..d5246e277f17 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -771,10 +771,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. */ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; @@ -791,25 +787,24 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); - copied = iomap_write_end(inode, pos, bytes, copied, page, iomap, + status = iomap_write_end(inode, pos, bytes, copied, page, iomap, srcmap); cond_resched(); - iov_iter_advance(i, copied); - if (unlikely(copied == 0)) { + if (unlikely(status == 0)) { /* - * If we were unable to copy any data at all, we must - * fall back to a single segment length write. - * - * If we didn't fallback here, we could livelock - * because not all segments in the iov can be copied at - * once without a pagefault. + * A short copy made iomap_write_end() reject the + * thing entirely. Might be memory poisoning + * halfway through, might be a race with munmap, + * might be severe memory pressure. */ - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_single_seg_count(i)); + if (copied) + bytes = copied; goto again; } + copied = status; + iov_iter_advance(i, copied); pos += copied; written += copied; length -= copied; diff --git a/mm/filemap.c b/mm/filemap.c index edb94663c5df..a00fc493f5cf 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3518,10 +3518,6 @@ ssize_t generic_perform_write(struct file *file, * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. */ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; @@ -3548,24 +3544,22 @@ ssize_t generic_perform_write(struct file *file, page, fsdata); if (unlikely(status < 0)) break; - copied = status; cond_resched(); - iov_iter_advance(i, copied); - if (unlikely(copied == 0)) { + if (unlikely(status == 0)) { /* - * If we were unable to copy any data at all, we must - * fall back to a single segment length write. - * - * If we didn't fallback here, we could livelock - * because not all segments in the iov can be copied at - * once without a pagefault. + * A short copy made ->write_end() reject the + * thing entirely. Might be memory poisoning + * halfway through, might be a race with munmap, + * might be severe memory pressure. */ - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_single_seg_count(i)); + if (copied) + bytes = copied; goto again; } + copied = status; + iov_iter_advance(i, copied); pos += copied; written += copied; -- Gitee From d79d9f61e24ce5722cd92c4853eeedbc258d1dae Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 17 Aug 2021 17:29:42 -0700 Subject: [PATCH 05/16] x86/mce: Drop copyin special case for #MC mainline inclusion from mainline-v5.16-rc1 commit 690658471b5f28d306e6492c4585d748cb5304e8 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit 690658471b5f x86/mce: Drop copyin special case for #MC. Backport for MCA recovery enhancing & bug fix. -------------------------------- Fixes to the iterator code to handle faults that are not on page boundaries mean that the special case for machine check during copy from user is no longer needed. For a full list of those fixes, see the output of: git log --oneline v5.14 ^v5.13 -- lib/iov_iter.c Intel-SIG: commit 690658471b5f x86/mce: Drop copyin special case for #MC. backport for MCA recovery enhance and bug fix. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210818002942.1607544-4-tony.luck@intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- arch/x86/lib/copy_user_64.S | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 77b9b2a3b5c8..e0e71ca023ce 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -234,24 +234,11 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string) */ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) movl %edx,%ecx - cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */ - je 3f 1: rep movsb 2: mov %ecx,%eax ASM_CLAC ret - /* - * Return zero to pretend that this copy succeeded. This - * is counter-intuitive, but needed to prevent the code - * in lib/iov_iter.c from retrying and running back into - * the poison cache line again. The machine check handler - * will ensure that a SIGBUS is sent to the task. - */ -3: xorl %eax,%eax - ASM_CLAC - ret - _ASM_EXTABLE_CPY(1b, 2b) SYM_CODE_END(.Lcopy_user_handle_tail) -- Gitee From af754cd057d343b3a741bd2c44c3fdd70a0926bd Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 23 Dec 2021 12:07:01 -0800 Subject: [PATCH 06/16] x86/mce: Reduce number of machine checks taken during recovery mainline inclusion from mainline-v5.17-rc1 commit 3376136300a00df9a864b88fa969177d6c3be8e5 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit 3376136300a0 x86/mce: Reduce number of machine checks taken during recovery. Backport for MCA recovery enhancing & bug fix. -------------------------------- When any of the copy functions in arch/x86/lib/copy_user_64.S take a fault, the fixup code copies the remaining byte count from %ecx to %edx and unconditionally jumps to .Lcopy_user_handle_tail to continue the copy in case any more bytes can be copied. If the fault was #PF this may copy more bytes (because the page fault handler might have fixed the fault). But when the fault is a machine check the original copy code will have copied all the way to the poisoned cache line. So .Lcopy_user_handle_tail will just take another machine check for no good reason. Every code path to .Lcopy_user_handle_tail comes from an exception fixup path, so add a check there to check the trap type (in %eax) and simply return the count of remaining bytes if the trap was a machine check. Doing this reduces the number of machine checks taken during synthetic tests from four to three. As well as reducing the number of machine checks, this also allows Skylake generation Xeons to recover some cases that currently fail. The is because REP; MOVSB is only recoverable when source and destination are well aligned and the byte count is large. That useless call to .Lcopy_user_handle_tail may violate one or more of these conditions and generate a fatal machine check. [ Tony: Add more details to commit message. ] [ bp: Fixup comment. Also, another tip patchset which is adding straight-line speculation mitigation changes the "ret" instruction to an all-caps macro "RET". But, since gas is case-insensitive, use "RET" in the newly added asm block already in order to simplify tip branch merging on its way upstream. ] Signed-off-by: Youquan Song Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/YcTW5dh8yTGucDd+@agluck-desk2.amr.corp.intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- arch/x86/lib/copy_user_64.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index e0e71ca023ce..403ba3eb4b84 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -225,6 +225,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string) * Don't try to copy the tail if machine check happened * * Input: + * eax trap number written by ex_handler_copy() * rdi destination * rsi source * rdx count @@ -233,12 +234,20 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string) * eax uncopied bytes or 0 if successful. */ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) + cmp $X86_TRAP_MC,%eax + je 3f + movl %edx,%ecx 1: rep movsb 2: mov %ecx,%eax ASM_CLAC ret +3: + movl %edx,%eax + ASM_CLAC + RET + _ASM_EXTABLE_CPY(1b, 2b) SYM_CODE_END(.Lcopy_user_handle_tail) -- Gitee From 7bf4f178c3d9b9e798788bda5062117dc83a34bd Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Sun, 6 Feb 2022 15:10:14 +0800 Subject: [PATCH 07/16] mm/hwpoison: fix error page recovered but reported "not recovered" mainline inclusion from mainline-v5.18-rc1 commit 046545a661af2beec21de7b90ca0e35f05088a81 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit 046545a661af mm/hwpoison: fix error page recovered but reported "not recovered". Backport for MCA recovery enhancing & bug fix. -------------------------------- When an uncorrected memory error is consumed there is a race between the CMCI from the memory controller reporting an uncorrected error with a UCNA signature, and the core reporting and SRAR signature machine check when the data is about to be consumed. If the CMCI wins that race, the page is marked poisoned when uc_decode_notifier() calls memory_failure() and the machine check processing code finds the page already poisoned. It calls kill_accessing_process() to make sure a SIGBUS is sent. But returns the wrong error code. Console log looks like this: mce: Uncorrected hardware memory error in user-access at 3710b3400 Memory failure: 0x3710b3: recovery action for dirty LRU page: Recovered Memory failure: 0x3710b3: already hardware poisoned Memory failure: 0x3710b3: Sending SIGBUS to einj_mem_uc:361438 due to hardware memory corruption mce: Memory error not recovered kill_accessing_process() is supposed to return -EHWPOISON to notify that SIGBUS is already set to the process and kill_me_maybe() doesn't have to send it again. But current code simply fails to do this, so fix it to make sure to work as intended. This change avoids the noise message "Memory error not recovered" and skips duplicate SIGBUSs. [tony.luck@intel.com: reword some parts of commit message] Link: https://lkml.kernel.org/r/20220113231117.1021405-1-naoya.horiguchi@linux.dev Fixes: a3f5d80ea401 ("mm,hwpoison: send SIGBUS with error virutal address") Signed-off-by: Naoya Horiguchi Reported-by: Youquan Song Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- mm/memory-failure.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 128fcf00be45..97a00a8e6f79 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -693,8 +693,10 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, (void *)&priv); if (ret == 1 && priv.tk.addr) kill_proc(&priv.tk, pfn, flags); + else + ret = 0; mmap_read_unlock(p->mm); - return ret ? -EFAULT : -EHWPOISON; + return ret > 0 ? -EHWPOISON : -EFAULT; } static const char *action_name[] = { -- Gitee From fc045cf236a2f4a7538e2b80005bef1d808b8cd1 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 17 Nov 2020 20:49:52 +0800 Subject: [PATCH 08/16] EDAC: Add DDR5 new memory type mainline inclusion from mainline-v5.13 commit bc1c99a5971aa7571e8b9731c28fa32abe12cab8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel_SIG: commit bc1c99a5971a EDAC: Add DDR5 new memory type. Backport for EDAC enhancing & bug fix. -------------------------------- Add a new entry to 'enum mem_type' and a new string to 'edac_mem_types[]' for DDR5 new memory type. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/edac_mc.c | 1 + include/linux/edac.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index f4eb071327be..34514c638f19 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -161,6 +161,7 @@ const char * const edac_mem_types[] = { [MEM_DDR4] = "Unbuffered-DDR4", [MEM_RDDR4] = "Registered-DDR4", [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", + [MEM_DDR5] = "Unbuffered-DDR5", [MEM_NVDIMM] = "Non-volatile-RAM", }; EXPORT_SYMBOL_GPL(edac_mem_types); diff --git a/include/linux/edac.h b/include/linux/edac.h index 15e8f3d8a895..6c4565cc6273 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -179,6 +179,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. + * @MEM_DDR5: Unbuffered DDR5 RAM * @MEM_NVDIMM: Non-volatile RAM */ enum mem_type { @@ -203,6 +204,7 @@ enum mem_type { MEM_DDR4, MEM_RDDR4, MEM_LRDDR4, + MEM_DDR5, MEM_NVDIMM, }; @@ -226,6 +228,7 @@ enum mem_type { #define MEM_FLAG_DDR4 BIT(MEM_DDR4) #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_DDR5 BIT(MEM_DDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) /** -- Gitee From 63875de9ea5e9ed2fc1ad9d8930fa810c00c3e08 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 17 Nov 2020 20:49:53 +0800 Subject: [PATCH 09/16] EDAC/i10nm: Add Intel Sapphire Rapids server support mainline inclusion from mainline-v5.11-rc1 commit 479f58dda25bb46daeb937f124718e8b4aea6781 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel_SIG: commit 479f58dda25b EDAC/i10nm: Add Intel Sapphire Rapids server support. Backport for add EDAC SPR suppporting. -------------------------------- The Sapphire Rapids CPU model shares the same memory controller architecture with Ice Lake server. There are some configurations different from Ice Lake server as below: - The device ID for configuration agent. - The size for per channel memory-mapped I/O. - The DDR5 memory support. So add the above configurations and the Sapphire Rapids CPU model ID for EDAC support. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/i10nm_base.c | 34 +++++++++++++++++++++++++--------- drivers/edac/skx_base.c | 6 +++--- drivers/edac/skx_common.c | 23 ++++++++++++++++++----- drivers/edac/skx_common.h | 16 ++++++++++++---- 4 files changed, 58 insertions(+), 21 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 3a7362f968c9..4f7f9970a901 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -13,7 +13,7 @@ #include "edac_module.h" #include "skx_common.h" -#define I10NM_REVISION "v0.0.3" +#define I10NM_REVISION "v0.0.4" #define EDAC_MOD_STR "i10nm_edac" /* Debug macros */ @@ -25,11 +25,13 @@ #define I10NM_GET_IMC_BAR(d, i, reg) \ pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) #define I10NM_GET_DIMMMTR(m, i, j) \ - readl((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4) + readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4) #define I10NM_GET_MCDDRTCFG(m, i) \ - readl((m)->mbase + 0x20970 + (i) * 0x4000) + readl((m)->mbase + 0x20970 + (i) * (m)->chan_mmio_sz) #define I10NM_GET_MCMTR(m, i) \ - readl((m)->mbase + 0x20ef8 + (i) * 0x4000) + readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz) +#define I10NM_GET_AMAP(m, i) \ + readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz) #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) @@ -129,12 +131,22 @@ static struct res_config i10nm_cfg0 = { .type = I10NM, .decs_did = 0x3452, .busno_cfg_offset = 0xcc, + .ddr_chan_mmio_sz = 0x4000, }; static struct res_config i10nm_cfg1 = { .type = I10NM, .decs_did = 0x3452, .busno_cfg_offset = 0xd0, + .ddr_chan_mmio_sz = 0x4000, +}; + +static struct res_config spr_cfg = { + .type = SPR, + .decs_did = 0x3252, + .busno_cfg_offset = 0xd0, + .ddr_chan_mmio_sz = 0x8000, + .support_ddr5 = true, }; static const struct x86_cpu_id i10nm_cpuids[] = { @@ -143,6 +155,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); @@ -157,12 +170,13 @@ static bool i10nm_check_ecc(struct skx_imc *imc, int chan) return !!GET_BITFIELD(mcmtr, 2, 2); } -static int i10nm_get_dimm_config(struct mem_ctl_info *mci) +static int i10nm_get_dimm_config(struct mem_ctl_info *mci, + struct res_config *cfg) { struct skx_pvt *pvt = mci->pvt_info; struct skx_imc *imc = pvt->imc; + u32 mtr, amap, mcddrtcfg; struct dimm_info *dimm; - u32 mtr, mcddrtcfg; int i, j, ndimms; for (i = 0; i < I10NM_NUM_CHANNELS; i++) { @@ -171,6 +185,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci) ndimms = 0; mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); + amap = I10NM_GET_AMAP(imc, i); for (j = 0; j < I10NM_NUM_DIMMS; j++) { dimm = edac_get_dimm(mci, i, j, 0); mtr = I10NM_GET_DIMMMTR(imc, i, j); @@ -178,8 +193,8 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci) mtr, mcddrtcfg, imc->mc, i, j); if (IS_DIMM_PRESENT(mtr)) - ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, - imc, i, j); + ndimms += skx_get_dimm_info(mtr, 0, amap, dimm, + imc, i, j, cfg); else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) ndimms += skx_get_nvdimm_info(dimm, imc, i, j, EDAC_MOD_STR); @@ -306,10 +321,11 @@ static int __init i10nm_init(void) d->imc[i].lmc = i; d->imc[i].src_id = src_id; d->imc[i].node_id = node_id; + d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, "Intel_10nm Socket", EDAC_MOD_STR, - i10nm_get_dimm_config); + i10nm_get_dimm_config, cfg); if (rc < 0) goto fail; } diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index f887e3166651..4dbd46575bfb 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -174,7 +174,7 @@ static bool skx_check_ecc(u32 mcmtr) return !!GET_BITFIELD(mcmtr, 2, 2); } -static int skx_get_dimm_config(struct mem_ctl_info *mci) +static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) { struct skx_pvt *pvt = mci->pvt_info; u32 mtr, mcmtr, amap, mcddrtcfg; @@ -195,7 +195,7 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci) pci_read_config_dword(imc->chan[i].cdev, 0x80 + 4 * j, &mtr); if (IS_DIMM_PRESENT(mtr)) { - ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j); + ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j, cfg); } else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) { ndimms += skx_get_nvdimm_info(dimm, imc, i, j, EDAC_MOD_STR); @@ -705,7 +705,7 @@ static int __init skx_init(void) d->imc[i].node_id = node_id; rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev, "Skylake Socket", EDAC_MOD_STR, - skx_get_dimm_config); + skx_get_dimm_config, cfg); if (rc < 0) goto fail; } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 2b4ce8e5ac2f..81c3e2ec6f56 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -304,15 +304,25 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, - struct skx_imc *imc, int chan, int dimmno) + struct skx_imc *imc, int chan, int dimmno, + struct res_config *cfg) { - int banks = 16, ranks, rows, cols, npages; + int banks, ranks, rows, cols, npages; + enum mem_type mtype; u64 size; ranks = numrank(mtr); rows = numrow(mtr); cols = numcol(mtr); + if (cfg->support_ddr5 && (amap & 0x8)) { + banks = 32; + mtype = MEM_DDR5; + } else { + banks = 16; + mtype = MEM_DDR4; + } + /* * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) */ @@ -332,7 +342,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, dimm->nr_pages = npages; dimm->grain = 32; dimm->dtype = get_width(mtr); - dimm->mtype = MEM_DDR4; + dimm->mtype = mtype; dimm->edac_mode = EDAC_SECDED; /* likely better than this */ snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", imc->src_id, imc->lmc, chan, dimmno); @@ -390,7 +400,8 @@ int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, const char *ctl_name, const char *mod_str, - get_dimm_config_f get_dimm_config) + get_dimm_config_f get_dimm_config, + struct res_config *cfg) { struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; @@ -425,13 +436,15 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, } mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM; + if (cfg->support_ddr5) + mci->mtype_cap |= MEM_FLAG_DDR5; mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = mod_str; mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; - rc = get_dimm_config(mci); + rc = get_dimm_config(mci, cfg); if (rc < 0) goto fail; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 78f8c1de0b71..bf56bebff138 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -59,6 +59,7 @@ struct skx_dev { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ void __iomem *mbase; /* for i10nm CPU */ + int chan_mmio_sz; /* for i10nm CPU */ u8 mc; /* system wide mc# */ u8 lmc; /* socket relative mc# */ u8 src_id, node_id; @@ -82,7 +83,8 @@ struct skx_pvt { enum type { SKX, - I10NM + I10NM, + SPR }; enum { @@ -118,9 +120,13 @@ struct res_config { unsigned int decs_did; /* Default bus number configuration register offset */ int busno_cfg_offset; + /* Per DDR channel memory-mapped I/O size */ + int ddr_chan_mmio_sz; + bool support_ddr5; }; -typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci); +typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, + struct res_config *cfg); typedef bool (*skx_decode_f)(struct decoded_addr *res); typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len); @@ -136,14 +142,16 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, - struct skx_imc *imc, int chan, int dimmno); + struct skx_imc *imc, int chan, int dimmno, + struct res_config *cfg); int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, const char *mod_str); int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, const char *ctl_name, const char *mod_str, - get_dimm_config_f get_dimm_config); + get_dimm_config_f get_dimm_config, + struct res_config *cfg); int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data); -- Gitee From 50fc0e9277f761573f250c47dc864a886488ba13 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Wed, 18 Aug 2021 10:57:01 -0700 Subject: [PATCH 10/16] EDAC/i10nm: Retrieve and print retry_rd_err_log registers mainline inclusion from mainline-v5.15-rc1 commit cf4e6d52f58399c777276172ec250502e19d5e63 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit cf4e6d52f583 EDAC/i10nm: Retrieve and print retry_rd_err_log registers. Backport for EDAC retry_rd_err_log for ICX/SPR support. -------------------------------- Retrieve and print retry_rd_err_log registers like the earlier change: commit e80634a75aba ("EDAC, skx: Retrieve and print retry_rd_err_log registers") This is a little trickier than on Skylake because of potential interference with BIOS use of the same registers. The default behavior is to ignore these registers. A module parameter retry_rd_err_log(default=0) controls the mode of operation: - 0=off : Default. - 1=bios : Linux doesn't reset any control bits, but just reports values. This is "no harm" mode, but it may miss reporting some data. - 2=linux: Linux tries to take control and resets mode bits, clears valid/UC bits after reading. This should be more reliable (especially if BIOS interference is reduced by disabling eMCA reporting mode in BIOS setup). Co-developed-by: Qiuxu Zhuo Signed-off-by: Qiuxu Zhuo Signed-off-by: Youquan Song Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20210818175701.1611513-3-tony.luck@intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/i10nm_base.c | 146 ++++++++++++++++++++++++++++++++++++++ drivers/edac/skx_base.c | 3 +- drivers/edac/skx_common.c | 4 +- drivers/edac/skx_common.h | 7 +- 4 files changed, 157 insertions(+), 3 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 4f7f9970a901..7798de1e2aed 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -32,14 +32,137 @@ readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz) #define I10NM_GET_AMAP(m, i) \ readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz) +#define I10NM_GET_REG32(m, i, offset) \ + readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) +#define I10NM_GET_REG64(m, i, offset) \ + readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) +#define I10NM_SET_REG32(m, i, offset, v) \ + writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ GET_BITFIELD(reg, 0, 10) + 1) << 12) +#define RETRY_RD_ERR_LOG_UC BIT(1) +#define RETRY_RD_ERR_LOG_NOOVER BIT(14) +#define RETRY_RD_ERR_LOG_EN BIT(15) +#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) +#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) + static struct list_head *i10nm_edac_list; +static struct res_config *res_cfg; +static int retry_rd_err_log; + +static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; +static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; +static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; +static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; + +static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable) +{ + u32 s, d; + + if (!imc->mbase) + return; + + s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]); + d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]); + + if (enable) { + /* Save default configurations */ + imc->chan[chan].retry_rd_err_log_s = s; + imc->chan[chan].retry_rd_err_log_d = d; + + s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; + s |= RETRY_RD_ERR_LOG_EN; + d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; + d |= RETRY_RD_ERR_LOG_EN; + } else { + /* Restore default configurations */ + if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) + s |= RETRY_RD_ERR_LOG_UC; + if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) + s |= RETRY_RD_ERR_LOG_NOOVER; + if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) + s &= ~RETRY_RD_ERR_LOG_EN; + if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) + d |= RETRY_RD_ERR_LOG_UC; + if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) + d |= RETRY_RD_ERR_LOG_NOOVER; + if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) + d &= ~RETRY_RD_ERR_LOG_EN; + } + + I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s); + I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d); +} + +static void enable_retry_rd_err_log(bool enable) +{ + struct skx_dev *d; + int i, j; + + edac_dbg(2, "\n"); + + list_for_each_entry(d, i10nm_edac_list, list) + for (i = 0; i < I10NM_NUM_IMC; i++) + for (j = 0; j < I10NM_NUM_CHANNELS; j++) + __enable_retry_rd_err_log(&d->imc[i], j, enable); +} + +static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, + int len, bool scrub_err) +{ + struct skx_imc *imc = &res->dev->imc[res->imc]; + u32 log0, log1, log2, log3, log4; + u32 corr0, corr1, corr2, corr3; + u64 log2a, log5; + u32 *offsets; + int n; + + if (!imc->mbase) + return; + + offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand; + + log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); + log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); + log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]); + log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); + log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); + + if (res_cfg->type == SPR) { + log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]", + log0, log1, log2a, log3, log4, log5); + } else { + log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", + log0, log1, log2, log3, log4, log5); + } + + corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); + + if (len - n > 0) + snprintf(msg + n, len - n, + " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", + corr0 & 0xffff, corr0 >> 16, + corr1 & 0xffff, corr1 >> 16, + corr2 & 0xffff, corr2 >> 16, + corr3 & 0xffff, corr3 >> 16); + + /* Clear status bits */ + if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { + log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; + I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + } +} + static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, unsigned int dev, unsigned int fun) { @@ -132,6 +255,8 @@ static struct res_config i10nm_cfg0 = { .decs_did = 0x3452, .busno_cfg_offset = 0xcc, .ddr_chan_mmio_sz = 0x4000, + .offsets_scrub = offsets_scrub_icx, + .offsets_demand = offsets_demand_icx, }; static struct res_config i10nm_cfg1 = { @@ -139,6 +264,8 @@ static struct res_config i10nm_cfg1 = { .decs_did = 0x3452, .busno_cfg_offset = 0xd0, .ddr_chan_mmio_sz = 0x4000, + .offsets_scrub = offsets_scrub_icx, + .offsets_demand = offsets_demand_icx, }; static struct res_config spr_cfg = { @@ -147,6 +274,8 @@ static struct res_config spr_cfg = { .busno_cfg_offset = 0xd0, .ddr_chan_mmio_sz = 0x8000, .support_ddr5 = true, + .offsets_scrub = offsets_scrub_spr, + .offsets_demand = offsets_demand_spr, }; static const struct x86_cpu_id i10nm_cpuids[] = { @@ -286,6 +415,7 @@ static int __init i10nm_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; + res_cfg = cfg; rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); if (rc) @@ -339,6 +469,12 @@ static int __init i10nm_init(void) mce_register_decode_chain(&i10nm_mce_dec); setup_i10nm_debug(); + if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + skx_set_decode(NULL, show_retry_rd_err_log); + if (retry_rd_err_log == 2) + enable_retry_rd_err_log(true); + } + i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); return 0; @@ -350,6 +486,13 @@ static int __init i10nm_init(void) static void __exit i10nm_exit(void) { edac_dbg(2, "\n"); + + if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + skx_set_decode(NULL, NULL); + if (retry_rd_err_log == 2) + enable_retry_rd_err_log(false); + } + teardown_i10nm_debug(); mce_unregister_decode_chain(&i10nm_mce_dec); skx_adxl_put(); @@ -359,5 +502,8 @@ static void __exit i10nm_exit(void) module_init(i10nm_init); module_exit(i10nm_exit); +module_param(retry_rd_err_log, int, 0444); +MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); + MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors"); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 4dbd46575bfb..1abc020d49ab 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -230,7 +230,8 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) #define SKX_ILV_TARGET(tgt) ((tgt) & 7) static void skx_show_retry_rd_err_log(struct decoded_addr *res, - char *msg, int len) + char *msg, int len, + bool scrub_err) { u32 log0, log1, log2, log3, log4; u32 corr0, corr1, corr2, corr3; diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 81c3e2ec6f56..05ffd39d6de9 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -494,6 +494,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); + bool scrub_err = false; bool recoverable; int len; u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); @@ -545,6 +546,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, break; case 4: optype = "memory scrubbing error"; + scrub_err = true; break; default: optype = "reserved"; @@ -567,7 +569,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, } if (skx_show_retry_rd_err_log) - skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len); + skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err); edac_dbg(0, "%s\n", skx_msg); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index bf56bebff138..22e174c740be 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -66,6 +66,8 @@ struct skx_dev { struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; + u32 retry_rd_err_log_s; + u32 retry_rd_err_log_d; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; @@ -123,12 +125,15 @@ struct res_config { /* Per DDR channel memory-mapped I/O size */ int ddr_chan_mmio_sz; bool support_ddr5; + /* Offsets of retry_rd_err_log registers */ + u32 *offsets_scrub; + u32 *offsets_demand; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, struct res_config *cfg); typedef bool (*skx_decode_f)(struct decoded_addr *res); -typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len); +typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); int __init skx_adxl_get(void); void __exit skx_adxl_put(void); -- Gitee From 1d6ae59aa3afc542367f4e6942cc63c9d6a02444 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 11 Jun 2021 10:01:18 -0700 Subject: [PATCH 11/16] EDAC/skx_common: Add new ADXL components for 2-level memory mainline inclusion from mainline-v5.14-rc1 commit 2f4348e5a86198704368a699a7c4cdeb21d569f5 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit 2f4348e5a861 EDAC/skx_common: Add new ADXL components for 2-level memory. Backport to add EDAC 2LM support. -------------------------------- Some Intel servers may configure memory in 2 levels, using fast "near" memory (e.g. DDR) as a cache for larger, slower, "far" memory (e.g. 3D X-point). In these configurations the BIOS ADXL address translation for an address in a 2-level memory range will provide details of both the "near" and far components. Current exported ADXL components are only for 1-level memory system or for 2nd level memory of 2-level memory system. So add new ADXL components for 1st level memory of 2-level memory system to fully support 2-level memory system and the detection of memory error source(1st level memory or 2nd level memory). Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20210611170123.1057025-2-tony.luck@intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/skx_common.c | 67 ++++++++++++++++++++++++++++++++------- drivers/edac/skx_common.h | 11 +++++++ 2 files changed, 67 insertions(+), 11 deletions(-) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 05ffd39d6de9..3d0b391a400d 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -23,10 +23,13 @@ #include "skx_common.h" static const char * const component_names[] = { - [INDEX_SOCKET] = "ProcessorSocketId", - [INDEX_MEMCTRL] = "MemoryControllerId", - [INDEX_CHANNEL] = "ChannelId", - [INDEX_DIMM] = "DimmSlotId", + [INDEX_SOCKET] = "ProcessorSocketId", + [INDEX_MEMCTRL] = "MemoryControllerId", + [INDEX_CHANNEL] = "ChannelId", + [INDEX_DIMM] = "DimmSlotId", + [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", + [INDEX_NM_CHANNEL] = "NmChannelId", + [INDEX_NM_DIMM] = "NmDimmSlotId", }; static int component_indices[ARRAY_SIZE(component_names)]; @@ -34,12 +37,14 @@ static int adxl_component_count; static const char * const *adxl_component_names; static u64 *adxl_values; static char *adxl_msg; +static unsigned long adxl_nm_bitmap; static char skx_msg[MSG_SIZE]; static skx_decode_f skx_decode; static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); +static bool skx_mem_cfg_2lm; int __init skx_adxl_get(void) { @@ -56,14 +61,25 @@ int __init skx_adxl_get(void) for (j = 0; names[j]; j++) { if (!strcmp(component_names[i], names[j])) { component_indices[i] = j; + + if (i >= INDEX_NM_FIRST) + adxl_nm_bitmap |= 1 << i; + break; } } - if (!names[j]) + if (!names[j] && i < INDEX_NM_FIRST) goto err; } + if (skx_mem_cfg_2lm) { + if (!adxl_nm_bitmap) + skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n"); + else + edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap); + } + adxl_component_names = names; while (*names++) adxl_component_count++; @@ -99,7 +115,7 @@ void __exit skx_adxl_put(void) kfree(adxl_msg); } -static bool skx_adxl_decode(struct decoded_addr *res) +static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem) { struct skx_dev *d; int i, len = 0; @@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res) } res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; - res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; - res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; - res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + if (error_in_1st_level_mem) { + res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? + (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; + res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? + (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; + res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? + (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; + } else { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; + res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + } - if (res->imc > NUM_IMC - 1) { + if (res->imc > NUM_IMC - 1 || res->imc < 0) { skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); return false; } @@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res) return true; } +void skx_set_mem_cfg(bool mem_cfg_2lm) +{ + skx_mem_cfg_2lm = mem_cfg_2lm; +} + void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { skx_decode = decode; @@ -580,6 +610,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, optype, skx_msg); } +static bool skx_error_in_1st_level_mem(const struct mce *m) +{ + u32 errcode; + + if (!skx_mem_cfg_2lm) + return false; + + errcode = GET_BITFIELD(m->status, 0, 15); + + if ((errcode & 0xef80) != 0x280) + return false; + + return true; +} + int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { @@ -599,7 +644,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, res.addr = mce->addr; if (adxl_component_count) { - if (!skx_adxl_decode(&res)) + if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))) return NOTIFY_DONE; } else if (!skx_decode || !skx_decode(&res)) { return NOTIFY_DONE; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 22e174c740be..de9f82977f0e 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -9,6 +9,8 @@ #ifndef _SKX_COMM_EDAC_H #define _SKX_COMM_EDAC_H +#include + #define MSG_SIZE 1024 /* @@ -94,9 +96,17 @@ enum { INDEX_MEMCTRL, INDEX_CHANNEL, INDEX_DIMM, + INDEX_NM_FIRST, + INDEX_NM_MEMCTRL = INDEX_NM_FIRST, + INDEX_NM_CHANNEL, + INDEX_NM_DIMM, INDEX_MAX }; +#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) +#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) +#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) + struct decoded_addr { struct skx_dev *dev; u64 addr; @@ -138,6 +148,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le int __init skx_adxl_get(void); void __exit skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); +void skx_set_mem_cfg(bool mem_cfg_2lm); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); -- Gitee From a875fa6a6adc15e5fb03e2046807bc63fc96aea9 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 11 Jun 2021 10:01:19 -0700 Subject: [PATCH 12/16] EDAC/i10nm: Add detection of memory levels for ICX/SPR servers mainline inclusion from mainline-v5.14-rc1 commit 4bd4d32e9a38d7ffb091b4109ab63c8f601e5678 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit 4bd4d32e9a38 EDAC/i10nm: Add detection of memory levels for ICX/SPR servers. Backport to add EDAC 2LM support. -------------------------------- Current i10nm_edac driver is only for system configured in 1-level memory. If the system is configured in 2-level memory, the driver doesn't report the 1st level memory DIMM for the error address, even if the error occurs in the 1st level memory. Both Ice Lake servers and Sapphire Rapids servers can be configured in 2-level memory. Add detection of memory levels to i10nm_edac for the two kinds of servers so that the driver can report the 2nd level memory DIMM or the 1st level memory DIMM according to error source. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20210611170123.1057025-3-tony.luck@intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/i10nm_base.c | 39 +++++++++++++++++++++++++++++++++++++++ drivers/edac/skx_common.h | 3 +++ 2 files changed, 42 insertions(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 7798de1e2aed..4e2a4d8396da 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -24,6 +24,8 @@ pci_read_config_dword((d)->uracu, 0xd0, &(reg)) #define I10NM_GET_IMC_BAR(d, i, reg) \ pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) +#define I10NM_GET_SAD(d, offset, i, reg)\ + pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg)) #define I10NM_GET_DIMMMTR(m, i, j) \ readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4) #define I10NM_GET_MCDDRTCFG(m, i) \ @@ -50,6 +52,10 @@ #define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) +#define I10NM_MAX_SAD 16 +#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) +#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) + static struct list_head *i10nm_edac_list; static struct res_config *res_cfg; @@ -186,6 +192,31 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, return pdev; } +static bool i10nm_check_2lm(struct res_config *cfg) +{ + struct skx_dev *d; + u32 reg; + int i; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1], + PCI_SLOT(cfg->sad_all_devfn), + PCI_FUNC(cfg->sad_all_devfn)); + if (!d->sad_all) + continue; + + for (i = 0; i < I10NM_MAX_SAD; i++) { + I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg); + if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) { + edac_dbg(2, "2-level memory configuration.\n"); + return true; + } + } + } + + return false; +} + static int i10nm_get_all_munits(void) { struct pci_dev *mdev; @@ -255,6 +286,8 @@ static struct res_config i10nm_cfg0 = { .decs_did = 0x3452, .busno_cfg_offset = 0xcc, .ddr_chan_mmio_sz = 0x4000, + .sad_all_devfn = PCI_DEVFN(29, 0), + .sad_all_offset = 0x108, .offsets_scrub = offsets_scrub_icx, .offsets_demand = offsets_demand_icx, }; @@ -264,6 +297,8 @@ static struct res_config i10nm_cfg1 = { .decs_did = 0x3452, .busno_cfg_offset = 0xd0, .ddr_chan_mmio_sz = 0x4000, + .sad_all_devfn = PCI_DEVFN(29, 0), + .sad_all_offset = 0x108, .offsets_scrub = offsets_scrub_icx, .offsets_demand = offsets_demand_icx, }; @@ -274,6 +309,8 @@ static struct res_config spr_cfg = { .busno_cfg_offset = 0xd0, .ddr_chan_mmio_sz = 0x8000, .support_ddr5 = true, + .sad_all_devfn = PCI_DEVFN(10, 0), + .sad_all_offset = 0x300, .offsets_scrub = offsets_scrub_spr, .offsets_demand = offsets_demand_spr, }; @@ -429,6 +466,8 @@ static int __init i10nm_init(void) return -ENODEV; } + skx_set_mem_cfg(i10nm_check_2lm(cfg)); + rc = i10nm_get_all_munits(); if (rc < 0) goto fail; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index de9f82977f0e..1fb7540a7092 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -135,6 +135,9 @@ struct res_config { /* Per DDR channel memory-mapped I/O size */ int ddr_chan_mmio_sz; bool support_ddr5; + /* SAD device number and function number */ + unsigned int sad_all_devfn; + int sad_all_offset; /* Offsets of retry_rd_err_log registers */ u32 *offsets_scrub; u32 *offsets_demand; -- Gitee From 9ab1951c13df982c522cb56fcf4c7535780ca82c Mon Sep 17 00:00:00 2001 From: root Date: Wed, 13 Jul 2022 15:28:12 +0800 Subject: [PATCH 13/16] EDAC/i10nm: Add support for high bandwidth memory mainline inclusion from mainline-v5.14-rc1 commit c945088384d00e6eb61535cc4ba25bc062090909 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit c945088384d0 EDAC/i10nm: Add support for high bandwidth memory. Backport to add EDAC HBM support. -------------------------------- A future Xeon processor will include in-package HBM (high bandwidth memory). The in-package HBM memory controller shares the same architecture with the regular DDR memory controller. Add the HBM memory controller devices for EDAC support. Tested-by: Hongyu Ning Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20210611170123.1057025-4-tony.luck@intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/i10nm_base.c | 132 ++++++++++++++++++++++++++++++++++---- drivers/edac/skx_common.c | 15 +++-- drivers/edac/skx_common.h | 20 +++++- 3 files changed, 148 insertions(+), 19 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 4e2a4d8396da..47d6d7282c58 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -13,7 +13,7 @@ #include "edac_module.h" #include "skx_common.h" -#define I10NM_REVISION "v0.0.4" +#define I10NM_REVISION "v0.0.5" #define EDAC_MOD_STR "i10nm_edac" /* Debug macros */ @@ -26,14 +26,22 @@ pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) #define I10NM_GET_SAD(d, offset, i, reg)\ pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg)) +#define I10NM_GET_HBM_IMC_BAR(d, reg) \ + pci_read_config_dword((d)->uracu, 0xd4, &(reg)) +#define I10NM_GET_CAPID3_CFG(d, reg) \ + pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg)) #define I10NM_GET_DIMMMTR(m, i, j) \ - readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4) + readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \ + (i) * (m)->chan_mmio_sz + (j) * 4) #define I10NM_GET_MCDDRTCFG(m, i) \ - readl((m)->mbase + 0x20970 + (i) * (m)->chan_mmio_sz) + readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ + (i) * (m)->chan_mmio_sz) #define I10NM_GET_MCMTR(m, i) \ - readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz) + readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \ + (i) * (m)->chan_mmio_sz) #define I10NM_GET_AMAP(m, i) \ - readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz) + readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \ + (i) * (m)->chan_mmio_sz) #define I10NM_GET_REG32(m, i, offset) \ readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) #define I10NM_GET_REG64(m, i, offset) \ @@ -45,6 +53,12 @@ #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ GET_BITFIELD(reg, 0, 10) + 1) << 12) +#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \ + ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) + +#define I10NM_HBM_IMC_MMIO_SIZE 0x9000 +#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) +#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) #define RETRY_RD_ERR_LOG_UC BIT(1) #define RETRY_RD_ERR_LOG_NOOVER BIT(14) @@ -217,7 +231,7 @@ static bool i10nm_check_2lm(struct res_config *cfg) return false; } -static int i10nm_get_all_munits(void) +static int i10nm_get_ddr_munits(void) { struct pci_dev *mdev; void __iomem *mbase; @@ -245,7 +259,7 @@ static int i10nm_get_all_munits(void) edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", j++, base, reg); - for (i = 0; i < I10NM_NUM_IMC; i++) { + for (i = 0; i < I10NM_NUM_DDR_IMC; i++) { mdev = pci_get_dev_wrapper(d->seg, d->bus[0], 12 + i, 0); if (i == 0 && !mdev) { @@ -281,6 +295,90 @@ static int i10nm_get_all_munits(void) return 0; } +static bool i10nm_check_hbm_imc(struct skx_dev *d) +{ + u32 reg; + + if (I10NM_GET_CAPID3_CFG(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n"); + return false; + } + + return I10NM_IS_HBM_PRESENT(reg) != 0; +} + +static int i10nm_get_hbm_munits(void) +{ + struct pci_dev *mdev; + void __iomem *mbase; + u32 reg, off, mcmtr; + struct skx_dev *d; + int i, lmc; + u64 base; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3); + if (!d->pcu_cr3) + return -ENODEV; + + if (!i10nm_check_hbm_imc(d)) { + i10nm_printk(KERN_DEBUG, "No hbm memory\n"); + return -ENODEV; + } + + if (I10NM_GET_SCK_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get socket bar\n"); + return -ENODEV; + } + base = I10NM_GET_SCK_MMIO_BASE(reg); + + if (I10NM_GET_HBM_IMC_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n"); + return -ENODEV; + } + base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg); + + lmc = I10NM_NUM_DDR_IMC; + + for (i = 0; i < I10NM_NUM_HBM_IMC; i++) { + mdev = pci_get_dev_wrapper(d->seg, d->bus[0], + 12 + i / 4, 1 + i % 4); + if (i == 0 && !mdev) { + i10nm_printk(KERN_ERR, "No hbm mc found\n"); + return -ENODEV; + } + if (!mdev) + continue; + + d->imc[lmc].mdev = mdev; + off = i * I10NM_HBM_IMC_MMIO_SIZE; + + edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n", + lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE); + + mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); + if (!mbase) { + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", + base + off); + return -ENOMEM; + } + + d->imc[lmc].mbase = mbase; + d->imc[lmc].hbm_mc = true; + + mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); + if (!I10NM_IS_HBM_IMC(mcmtr)) { + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); + return -ENODEV; + } + + lmc++; + } + } + + return 0; +} + static struct res_config i10nm_cfg0 = { .type = I10NM, .decs_did = 0x3452, @@ -308,6 +406,7 @@ static struct res_config spr_cfg = { .decs_did = 0x3252, .busno_cfg_offset = 0xd0, .ddr_chan_mmio_sz = 0x8000, + .hbm_chan_mmio_sz = 0x4000, .support_ddr5 = true, .sad_all_devfn = PCI_DEVFN(10, 0), .sad_all_offset = 0x300, @@ -345,14 +444,14 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, struct dimm_info *dimm; int i, j, ndimms; - for (i = 0; i < I10NM_NUM_CHANNELS; i++) { + for (i = 0; i < imc->num_channels; i++) { if (!imc->mbase) continue; ndimms = 0; mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); amap = I10NM_GET_AMAP(imc, i); - for (j = 0; j < I10NM_NUM_DIMMS; j++) { + for (j = 0; j < imc->num_dimms; j++) { dimm = edac_get_dimm(mci, i, j, 0); mtr = I10NM_GET_DIMMMTR(imc, i, j); edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n", @@ -468,8 +567,9 @@ static int __init i10nm_init(void) skx_set_mem_cfg(i10nm_check_2lm(cfg)); - rc = i10nm_get_all_munits(); - if (rc < 0) + rc = i10nm_get_ddr_munits(); + + if (i10nm_get_hbm_munits() && rc) goto fail; list_for_each_entry(d, i10nm_edac_list, list) { @@ -490,7 +590,15 @@ static int __init i10nm_init(void) d->imc[i].lmc = i; d->imc[i].src_id = src_id; d->imc[i].node_id = node_id; - d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; + if (d->imc[i].hbm_mc) { + d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; + d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS; + d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS; + } else { + d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; + d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS; + d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS; + } rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, "Intel_10nm Socket", EDAC_MOD_STR, diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 3d0b391a400d..0c133d32b777 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -343,9 +343,9 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, ranks = numrank(mtr); rows = numrow(mtr); - cols = numcol(mtr); + cols = imc->hbm_mc ? 6 : numcol(mtr); - if (cfg->support_ddr5 && (amap & 0x8)) { + if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) { banks = 32; mtype = MEM_DDR5; } else { @@ -374,8 +374,13 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, dimm->dtype = get_width(mtr); dimm->mtype = mtype; dimm->edac_mode = EDAC_SECDED; /* likely better than this */ - snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", - imc->src_id, imc->lmc, chan, dimmno); + + if (imc->hbm_mc) + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u", + imc->src_id, imc->lmc, chan); + else + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", + imc->src_id, imc->lmc, chan, dimmno); return 1; } @@ -705,6 +710,8 @@ void skx_remove(void) } if (d->util_all) pci_dev_put(d->util_all); + if (d->pcu_cr3) + pci_dev_put(d->pcu_cr3); if (d->sad_all) pci_dev_put(d->sad_all); if (d->uracu) diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 1fb7540a7092..03ac067a80b9 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -32,9 +32,17 @@ #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ -#define I10NM_NUM_IMC 4 -#define I10NM_NUM_CHANNELS 2 -#define I10NM_NUM_DIMMS 2 +#define I10NM_NUM_DDR_IMC 4 +#define I10NM_NUM_DDR_CHANNELS 2 +#define I10NM_NUM_DDR_DIMMS 2 + +#define I10NM_NUM_HBM_IMC 16 +#define I10NM_NUM_HBM_CHANNELS 2 +#define I10NM_NUM_HBM_DIMMS 1 + +#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC) +#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) +#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) @@ -56,12 +64,16 @@ struct skx_dev { struct pci_dev *sad_all; struct pci_dev *util_all; struct pci_dev *uracu; /* for i10nm CPU */ + struct pci_dev *pcu_cr3; /* for HBM memory detection */ u32 mcroute; struct skx_imc { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ void __iomem *mbase; /* for i10nm CPU */ int chan_mmio_sz; /* for i10nm CPU */ + int num_channels; /* channels per memory controller */ + int num_dimms; /* dimms per channel */ + bool hbm_mc; u8 mc; /* system wide mc# */ u8 lmc; /* socket relative mc# */ u8 src_id, node_id; @@ -134,6 +146,8 @@ struct res_config { int busno_cfg_offset; /* Per DDR channel memory-mapped I/O size */ int ddr_chan_mmio_sz; + /* Per HBM channel memory-mapped I/O size */ + int hbm_chan_mmio_sz; bool support_ddr5; /* SAD device number and function number */ unsigned int sad_all_devfn; -- Gitee From abe331ea915db3756cd86117eca06e2a8de2115e Mon Sep 17 00:00:00 2001 From: Naveen Krishna Chatradhi Date: Wed, 30 Jun 2021 20:58:24 +0530 Subject: [PATCH 14/16] EDAC/mc: Add new HBM2 memory type mainline inclusion from mainline-v5.13 commit e1ca90b7cc5cb5d3a38321cbb65ad36a59fcb574 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit e1ca90b7cc5c EDAC/mc: Add new HBM2 memory type. Backport to add EDAC HBM support. -------------------------------- Add a new entry to 'enum mem_type' and a new string to 'edac_mem_types[]' for HBM2 (High Bandwidth Memory Gen 2) new memory type. Reviewed-by: Yazen Ghannam Signed-off-by: Muralidhara M K Signed-off-by: Naveen Krishna Chatradhi Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20210630152828.162659-4-nchatrad@amd.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/edac_mc.c | 1 + include/linux/edac.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 34514c638f19..bf4297075c22 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -163,6 +163,7 @@ const char * const edac_mem_types[] = { [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", [MEM_DDR5] = "Unbuffered-DDR5", [MEM_NVDIMM] = "Non-volatile-RAM", + [MEM_HBM2] = "High-bandwidth-memory-Gen2", }; EXPORT_SYMBOL_GPL(edac_mem_types); diff --git a/include/linux/edac.h b/include/linux/edac.h index 6c4565cc6273..490134495008 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -181,6 +181,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_LRDDR4: Load-Reduced DDR4 memory. * @MEM_DDR5: Unbuffered DDR5 RAM * @MEM_NVDIMM: Non-volatile RAM + * @MEM_HBM2: High bandwidth Memory Gen 2. */ enum mem_type { MEM_EMPTY = 0, @@ -206,6 +207,7 @@ enum mem_type { MEM_LRDDR4, MEM_DDR5, MEM_NVDIMM, + MEM_HBM2, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -230,6 +232,7 @@ enum mem_type { #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) #define MEM_FLAG_DDR5 BIT(MEM_DDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) +#define MEM_FLAG_HBM2 BIT(MEM_HBM2) /** * enum edac-type - Error Detection and Correction capabilities and mode -- Gitee From e6009129483d20913a2fa89a88d9f4c2cf3613d1 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 20 Jul 2021 09:30:09 -0700 Subject: [PATCH 15/16] EDAC/skx_common: Set the memory type correctly for HBM memory mainline inclusion from mainline-v5.15-rc1 commit fd07a4a0d30b5468a1f4a0739e34f5f014df7d44 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit fd07a4a0d30b EDAC/skx_common: Set the memory type correctly for HBM memory. Backport to add EDAC HBM support. -------------------------------- Set the memory type to MEM_HBM2 if it's managed by the HBM2 memory controller. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20210720163009.GA1417532@agluck-desk2.amr.corp.intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/skx_common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 0c133d32b777..19c17c5198c5 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -345,7 +345,10 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, rows = numrow(mtr); cols = imc->hbm_mc ? 6 : numcol(mtr); - if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) { + if (imc->hbm_mc) { + banks = 32; + mtype = MEM_HBM2; + } else if (cfg->support_ddr5 && (amap & 0x8)) { banks = 32; mtype = MEM_DDR5; } else { -- Gitee From 2a1f0bb89be8bc8e0c781b9ce6397790ec14a7df Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 24 Dec 2021 04:11:26 -0500 Subject: [PATCH 16/16] EDAC/i10nm: Release mdev/mbase when failing to detect HBM mainline inclusion from mainline-v5.16 commit c370baa328022cbd46c59c821d1b467a97f047be category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HAC1 CVE: NA Intel-SIG: commit c370baa32802 EDAC/i10nm: Release mdev/mbase when failing to detect HBM. Backport to add EDAC HBM support. -------------------------------- On systems without HBM (High Bandwidth Memory) mdev/mbase are not released/unmapped. Add the code to release mdev/mbase when failing to detect HBM. [Tony: re-word commit message] Cc: Fixes: c945088384d0 ("EDAC/i10nm: Add support for high bandwidth memory") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20211224091126.1246-1-qiuxu.zhuo@intel.com Signed-off-by: Youquan Song Signed-off-by: Jason Zeng --- drivers/edac/i10nm_base.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 47d6d7282c58..d63ddc9c994d 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -358,6 +358,9 @@ static int i10nm_get_hbm_munits(void) mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); if (!mbase) { + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", base + off); return -ENOMEM; @@ -368,6 +371,12 @@ static int i10nm_get_hbm_munits(void) mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); if (!I10NM_IS_HBM_IMC(mcmtr)) { + iounmap(d->imc[lmc].mbase); + d->imc[lmc].mbase = NULL; + d->imc[lmc].hbm_mc = false; + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); return -ENODEV; } -- Gitee