From f47725fdbf6d962f86dcc591b65e231e114e8ac5 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Fri, 14 Jun 2024 11:05:43 +0300 Subject: [PATCH 01/15] memblock: use numa_valid_node() helper to check for invalid node ID mainline inclusion from mainline-v6.10-rc5 commit 8043832e2a123fd9372007a29192f2f3ba328cd6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8043832e2a123fd9372007a29192f2f3ba328cd6 ------------------------------------------------- Intel-SIG: commit 8043832e2a12 memblock: use numa_valid_node() helper to check for invalid node ID Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D Introduce numa_valid_node(nid) that verifies that nid is a valid node ID and use that instead of comparing nid parameter with either NUMA_NO_NODE or MAX_NUMNODES. This makes the checks for valid node IDs consistent and more robust and allows to get rid of multiple WARNings. Suggested-by: Linus Torvalds Signed-off-by: Mike Rapoport (IBM) [ Zhang Rui: resolve conflict and amend commit log ] Signed-off-by: Zhang Rui --- include/linux/numa.h | 5 +++++ mm/memblock.c | 18 ++++-------------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/include/linux/numa.h b/include/linux/numa.h index fdcd888f70cd..2667ad307f24 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -13,6 +13,11 @@ #define NUMA_NO_NODE (-1) +static inline bool numa_valid_node(int nid) +{ + return nid >= 0 && nid < MAX_NUMNODES; +} + /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO #define __initdata_or_meminfo diff --git a/mm/memblock.c b/mm/memblock.c index 94008ef55a82..d3ba47f8c988 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -965,7 +965,7 @@ static bool should_skip_region(struct memblock_type *type, return false; /* only memory regions are associated with nodes, check it */ - if (nid != NUMA_NO_NODE && nid != m_nid) + if (numa_valid_node(nid) && nid != m_nid) return true; /* skip hotpluggable memory regions if needed */ @@ -1022,10 +1022,6 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - for (; idx_a < type_a->cnt; idx_a++) { struct memblock_region *m = &type_a->regions[idx_a]; @@ -1119,9 +1115,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (*idx == (u64)ULLONG_MAX) { idx_a = type_a->cnt - 1; if (type_b != NULL) @@ -1207,7 +1200,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) continue; - if (nid == MAX_NUMNODES || nid == r_nid) + if (!numa_valid_node(nid) || nid == r_nid) break; } if (*idx >= type->cnt) { @@ -1353,9 +1346,6 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, enum memblock_flags flags = choose_memblock_flags(); phys_addr_t found; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (!align) { /* Can't use WARNs this early in boot on powerpc */ dump_stack(); @@ -1368,7 +1358,7 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, if (found && !memblock_reserve(found, size)) goto done; - if (nid != NUMA_NO_NODE && !exact_nid) { + if (numa_valid_node(nid) && !exact_nid) { found = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, flags); @@ -1969,7 +1959,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) end = base + size - 1; flags = rgn->flags; #ifdef CONFIG_NEED_MULTIPLE_NODES - if (memblock_get_region_node(rgn) != MAX_NUMNODES) + if (numa_valid_node(memblock_get_region_node(rgn))) snprintf(nid_buf, sizeof(nid_buf), " on node %d", memblock_get_region_node(rgn)); #endif -- Gitee From 4eaff5bd94874cf2df71e37d73f09f14dfce4ee8 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 29 Jan 2024 14:20:40 +0800 Subject: [PATCH 02/15] EDAC/i10nm: Add Intel Grand Ridge micro-server support mainline inclusion from mainline-v6.9-rc1 commit e77086c3750834553cf6fd2255c5f3ee04843ed8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e77086c3750834553cf6fd2255c5f3ee04843ed8 ------------------------------------------------- Intel-SIG: commit e77086c37508 EDAC/i10nm: Add Intel Grand Ridge micro-server support Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D The Grand Ridge CPU model uses similar memory controller registers with Granite Rapids server. Add Grand Ridge CPU model ID for EDAC support. Tested-by: Ricardo Neri Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20240129062040.60809-3-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 7a9cdd13c889..7a6d479e75ca 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -951,6 +951,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); -- Gitee From cd118eee68fd8495731e33bd0b0813045a527210 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 14 Feb 2025 08:27:28 +0800 Subject: [PATCH 03/15] EDAC/{skx_common,i10nm}: Fix some missing error reports on Emerald Rapids mainline inclusion from mainline-v6.15-rc1 commit d9207cf7760f5f5599e9ff7eb0fedf56821a1d59 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d9207cf7760f5f5599e9ff7eb0fedf56821a1d59 ------------------------------------------------- Intel-SIG: commit d9207cf7760f EDAC/{skx_common,i10nm}: Fix some missing error reports on Emerald Rapids Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D When doing error injection to some memory DIMMs on certain Intel Emerald Rapids servers, the i10nm_edac missed error reports for some memory DIMMs. Certain BIOS configurations may hide some memory controllers, and the i10nm_edac doesn't enumerate these hidden memory controllers. However, the ADXL decodes memory errors using memory controller physical indices even if there are hidden memory controllers. Therefore, the memory controller physical indices reported by the ADXL may mismatch the logical indices enumerated by the i10nm_edac, resulting in missed error reports for some memory DIMMs. Fix this issue by creating a mapping table from memory controller physical indices (used by the ADXL) to logical indices (used by the i10nm_edac) and using it to convert the physical indices to the logical indices during the error handling process. Fixes: c545f5e41225 ("EDAC/i10nm: Skip the absent memory controllers") Reported-by: Kevin Chang Tested-by: Kevin Chang Reported-by: Thomas Chen Tested-by: Thomas Chen Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20250214002728.6287-1-qiuxu.zhuo@intel.com [ Zhang Rui: convert code to follow C89 format and amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 2 ++ drivers/edac/skx_common.c | 35 +++++++++++++++++++++++++++++++++++ drivers/edac/skx_common.h | 11 +++++++++++ 3 files changed, 48 insertions(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 7a6d479e75ca..b864e336e869 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -755,6 +755,8 @@ static int i10nm_get_ddr_munits(void) continue; } else { d->imc[lmc].mdev = mdev; + if (res_cfg->type == SPR) + skx_set_mc_mapping(d, i, lmc); lmc++; } } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 3d912671d4a3..7fcd18717227 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -121,6 +121,37 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); +static void skx_init_mc_mapping(struct skx_dev *d) +{ + int i; + + /* + * By default, the BIOS presents all memory controllers within each + * socket to the EDAC driver. The physical indices are the same as + * the logical indices of the memory controllers enumerated by the + * EDAC driver. + */ + for (i = 0; i < NUM_IMC; i++) + d->mc_mapping[i] = i; +} + +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) +{ + edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + d->mc_mapping[pmc] = lmc; +} +EXPORT_SYMBOL_GPL(skx_set_mc_mapping); + +static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc) +{ + edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, d->mc_mapping[pmc]); + + return d->mc_mapping[pmc]; +} + static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) { struct skx_dev *d; @@ -188,6 +219,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) return false; } + res->imc = skx_get_mc_mapping(d, res->imc); + for (i = 0; i < adxl_component_count; i++) { if (adxl_values[i] == ~0x0ull) continue; @@ -308,6 +341,8 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) d->bus[0], d->bus[1], d->bus[2], d->bus[3]); list_add_tail(&d->list, &dev_edac_list); prev = pdev; + + skx_init_mc_mapping(d); } if (list) diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 85c978b61236..859746091a47 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -94,6 +94,16 @@ struct skx_dev { struct pci_dev *uracu; /* for i10nm CPU */ struct pci_dev *pcu_cr3; /* for HBM memory detection */ u32 mcroute; + /* + * Some server BIOS may hide certain memory controllers, and the + * EDAC driver skips those hidden memory controllers. However, the + * ADXL still decodes memory error address using physical memory + * controller indices. The mapping table is used to convert the + * physical indices (reported by ADXL) to the logical indices + * (used the EDAC driver) of present memory controllers during the + * error handling process. + */ + u8 mc_mapping[NUM_IMC]; struct skx_imc { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ @@ -243,6 +253,7 @@ void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); void skx_set_res_cfg(struct res_config *cfg); +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); -- Gitee From 06e2ff9291ea70ae668b2b76634260a4a01f2cc8 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:19 +0800 Subject: [PATCH 04/15] EDAC/{skx_common,i10nm}: Fix the loss of saved RRL for HBM pseudo channel 0 mainline inclusion from mainline-v6.16-rc1 commit eeed3e03f4261e5e381a72ae099ff00ccafbb437 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eeed3e03f4261e5e381a72ae099ff00ccafbb437 ------------------------------------------------- Intel-SIG: commit eeed3e03f426 EDAC/{skx_common,i10nm}: Fix the loss of saved RRL for HBM pseudo channel 0 Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D When enabling the retry_rd_err_log (RRL) feature during the loading of the i10nm_edac driver with the module parameter retry_rd_err_log=2 (Linux RRL control mode), the default values of the control bits of RRL are saved so that they can be restored during the unloading of the driver. In the current code, the RRL of pseudo channel 1 of HBM overwrites pseudo channel 0 during the loading of the driver, resulting in the loss of saved RRL for pseudo channel 0. This causes the RRL of pseudo channel 0 of HBM to be wrongly restored with the values from pseudo channel 1 when unloading the driver. Fix this issue by creating two separate groups of RRL control registers per channel to save default RRL settings of two {sub-,pseudo-}channels. Fixes: acd4cf68fefe ("EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-3-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 35 +++++++++++++++++++---------------- drivers/edac/skx_common.h | 11 ++++++++--- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index b864e336e869..e8100df1b0ba 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -99,7 +99,7 @@ static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, +static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl, u32 *offsets_scrub, u32 *offsets_demand, u32 *offsets_demand2) { @@ -112,10 +112,10 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable if (enable) { /* Save default configurations */ - imc->chan[chan].retry_rd_err_log_s = s; - imc->chan[chan].retry_rd_err_log_d = d; + rrl_ctl[0] = s; + rrl_ctl[1] = d; if (offsets_demand2) - imc->chan[chan].retry_rd_err_log_d2 = d2; + rrl_ctl[2] = d2; s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; s |= RETRY_RD_ERR_LOG_EN; @@ -129,25 +129,25 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable } } else { /* Restore default configurations */ - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC) s |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) + if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER) s |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN)) s &= ~RETRY_RD_ERR_LOG_EN; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC) d |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) + if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER) d |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN)) d &= ~RETRY_RD_ERR_LOG_EN; if (offsets_demand2) { - if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC) d2 |= RETRY_RD_ERR_LOG_UC; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) + if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER)) d2 &= ~RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN)) d2 &= ~RETRY_RD_ERR_LOG_EN; } } @@ -161,6 +161,7 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable static void enable_retry_rd_err_log(bool enable) { int i, j, imc_num, chan_num; + struct skx_channel *chan; struct skx_imc *imc; struct skx_dev *d; @@ -175,8 +176,9 @@ static void enable_retry_rd_err_log(bool enable) if (!imc->mbase) continue; + chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], res_cfg->offsets_scrub, res_cfg->offsets_demand, res_cfg->offsets_demand2); @@ -190,12 +192,13 @@ static void enable_retry_rd_err_log(bool enable) if (!imc->mbase || !imc->hbm_mc) continue; + chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) { - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], res_cfg->offsets_scrub_hbm0, res_cfg->offsets_demand_hbm0, NULL); - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1], res_cfg->offsets_scrub_hbm1, res_cfg->offsets_demand_hbm1, NULL); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 859746091a47..b89fc9bd10ec 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -80,6 +80,9 @@ */ #define MCACOD_EXT_MEM_ERR 0x280 +/* Max RRL register sets per {,sub-,pseudo-}channel. */ +#define NUM_RRL_SET 3 + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two @@ -118,9 +121,11 @@ struct skx_dev { struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; - u32 retry_rd_err_log_s; - u32 retry_rd_err_log_d; - u32 retry_rd_err_log_d2; + /* + * Two groups of RRL control registers per channel to save default RRL + * settings of two {sub-,pseudo-}channels in Linux RRL control mode. + */ + u32 rrl_ctl[2][NUM_RRL_SET]; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; -- Gitee From 2bcb2760219d79552dbd094af457d58d180ec674 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 29 Aug 2024 14:13:09 +0800 Subject: [PATCH 05/15] EDAC/{skx_common,i10nm}: Remove the AMAP register for determing DDR5 mainline inclusion from mainline-v6.12-rc1 commit 7a33c144c28ebb27c59963007992fed15f3953b3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7a33c144c28ebb27c59963007992fed15f3953b3 ------------------------------------------------- Intel-SIG: commit 7a33c144c28e EDAC/{skx_common,i10nm}: Remove the AMAP register for determing DDR5 Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D The configuration flag 'res_config->support_ddr5 = true' sufficiently indicates DDR5 memory support for Sapphire Rapids and Granite Rapids. Additionally, the i10nm_edac driver doesn't need to use the AMAP register for setting the 'fine_grain_bank' of each DIMM. Therefore, remove the AMAP register for determining DDR5. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20240829061309.57738-1-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 9 ++------- drivers/edac/skx_common.c | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index e8100df1b0ba..76794511f38c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -47,10 +47,6 @@ readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \ (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \ (i) * (m)->chan_mmio_sz) -#define I10NM_GET_AMAP(m, i) \ - readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \ - (res_cfg->type == GNR ? 0xc14 : 0x20814)) + \ - (i) * (m)->chan_mmio_sz) #define I10NM_GET_REG32(m, i, offset) \ readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) #define I10NM_GET_REG64(m, i, offset) \ @@ -976,7 +972,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, { struct skx_pvt *pvt = mci->pvt_info; struct skx_imc *imc = pvt->imc; - u32 mtr, amap, mcddrtcfg = 0; + u32 mtr, mcddrtcfg = 0; struct dimm_info *dimm; int i, j, ndimms; @@ -985,7 +981,6 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, continue; ndimms = 0; - amap = I10NM_GET_AMAP(imc, i); if (res_cfg->type != GNR) mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); @@ -997,7 +992,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, mtr, mcddrtcfg, imc->mc, i, j); if (IS_DIMM_PRESENT(mtr)) - ndimms += skx_get_dimm_info(mtr, 0, amap, dimm, + ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, imc, i, j, cfg); else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) ndimms += skx_get_nvdimm_info(dimm, imc, i, j, diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 7fcd18717227..8bb373114c9f 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -422,7 +422,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, if (imc->hbm_mc) { banks = 32; mtype = MEM_HBM2; - } else if (cfg->support_ddr5 && (amap & 0x8)) { + } else if (cfg->support_ddr5) { banks = 32; mtype = MEM_DDR5; } else { -- Gitee From 3a3d3e0ebb7628c76e2786dff9ab96acc90340ac Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 3 Dec 2024 10:20:38 +0800 Subject: [PATCH 06/15] EDAC/i10nm: Add Intel Clearwater Forest server support mainline inclusion from mainline-v6.14-rc1 commit 2e55bb9b71e179c37d05deff37daa0dd8d04b59d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2e55bb9b71e179c37d05deff37daa0dd8d04b59d ------------------------------------------------- Intel-SIG: commit 2e55bb9b71e1 EDAC/i10nm: Add Intel Clearwater Forest server support Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D Clearwater Forest is the successor to Sierra Forest. Add Clearwater Forest CPU model ID for EDAC support. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Yi Lai Link: https://lore.kernel.org/r/20241203022038.72873-1-qiuxu.zhuo@intel.com [ Zhang Rui: resolve conflict (use old X86 Macro) and amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 76794511f38c..d1fedafa0bef 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -953,6 +953,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_DARKMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); -- Gitee From 38f1fc36dc110463ca6d8f659a802b481df2d677 Mon Sep 17 00:00:00 2001 From: Kyle Meyer Date: Thu, 12 Dec 2024 19:25:49 -0600 Subject: [PATCH 07/15] EDAC/{i10nm,skx,skx_common}: Support UV systems mainline inclusion from mainline-v6.14-rc1 commit 584e09743d2f44905290b0dbf3215064d2a1888c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=584e09743d2f44905290b0dbf3215064d2a1888c ------------------------------------------------- Intel-SIG: commit 584e09743d2f EDAC/{i10nm,skx,skx_common}: Support UV systems Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D The 3-bit source IDs in PCI configuration space registers, used to map devices to sockets, are limited to 8 unique IDs, and each ID is local to a UPI/QPI domain. Source IDs cannot be used to map devices to sockets on UV systems because they can exceed 8 sockets and have multiple UPI/QPI domains with identical, repeating source IDs. Use NUMA information to get package IDs instead of source IDs on UV systems, and use package/source IDs to name IMC information structures. Signed-off-by: Kyle Meyer Signed-off-by: Tony Luck Tested-by: Qiuxu Zhuo Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/all/20241213012549.43099-1-kyle.meyer@hpe.com/ [ Zhang Rui: resolve confilct (use topology_physical_package_id) amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 11 +++------ drivers/edac/skx_base.c | 9 +++----- drivers/edac/skx_common.c | 47 +++++++++++++++++++++++++++------------ drivers/edac/skx_common.h | 3 +-- 4 files changed, 40 insertions(+), 30 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index d1fedafa0bef..2d7695f4fcb5 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1016,7 +1016,7 @@ static struct notifier_block i10nm_mce_dec = { static int __init i10nm_init(void) { - u8 mc = 0, src_id = 0, node_id = 0; + u8 mc = 0, src_id = 0; const struct x86_cpu_id *id; struct res_config *cfg; const char *owner; @@ -1073,19 +1073,14 @@ static int __init i10nm_init(void) if (rc < 0) goto fail; - rc = skx_get_node_id(d, &node_id); - if (rc < 0) - goto fail; - - edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id); + edac_dbg(2, "src_id = %d\n", src_id); for (i = 0; i < imc_num; i++) { if (!d->imc[i].mdev) continue; d->imc[i].mc = mc++; d->imc[i].lmc = i; - d->imc[i].src_id = src_id; - d->imc[i].node_id = node_id; + d->imc[i].src_id = src_id; if (d->imc[i].hbm_mc) { d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; d->imc[i].num_channels = cfg->hbm_chan_num; diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 0280518a51a9..ad410444af20 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -600,7 +600,7 @@ static int __init skx_init(void) const struct munit *m; const char *owner; int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8}; - u8 mc = 0, src_id, node_id; + u8 mc = 0, src_id; struct skx_dev *d; edac_dbg(2, "\n"); @@ -647,15 +647,12 @@ static int __init skx_init(void) rc = skx_get_src_id(d, 0xf0, &src_id); if (rc < 0) goto fail; - rc = skx_get_node_id(d, &node_id); - if (rc < 0) - goto fail; - edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); + + edac_dbg(2, "src_id = %d\n", src_id); for (i = 0; i < SKX_NUM_IMC; i++) { d->imc[i].mc = mc++; d->imc[i].lmc = i; d->imc[i].src_id = src_id; - d->imc[i].node_id = node_id; rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev, "Skylake Socket", EDAC_MOD_STR, skx_get_dimm_config, cfg); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 8bb373114c9f..65af9228b851 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "edac_module.h" #include "skx_common.h" @@ -255,33 +256,51 @@ void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) } EXPORT_SYMBOL_GPL(skx_set_decode); -int skx_get_src_id(struct skx_dev *d, int off, u8 *id) +static int skx_get_pkg_id(struct skx_dev *d, u8 *id) { - u32 reg; + int node; + int cpu; - if (pci_read_config_dword(d->util_all, off, ®)) { - skx_printk(KERN_ERR, "Failed to read src id\n"); - return -ENODEV; + node = pcibus_to_node(d->util_all->bus); + if (numa_valid_node(node)) { + for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) { + *id = topology_physical_package_id(cpu); + return 0; + } + } } - *id = GET_BITFIELD(reg, 12, 14); - return 0; + skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n"); + return -ENODEV; } -EXPORT_SYMBOL_GPL(skx_get_src_id); -int skx_get_node_id(struct skx_dev *d, u8 *id) +int skx_get_src_id(struct skx_dev *d, int off, u8 *id) { u32 reg; - if (pci_read_config_dword(d->util_all, 0xf4, ®)) { - skx_printk(KERN_ERR, "Failed to read node id\n"); + /* + * The 3-bit source IDs in PCI configuration space registers are limited + * to 8 unique IDs, and each ID is local to a UPI/QPI domain. + * + * Source IDs cannot be used to map devices to sockets on UV systems + * because they can exceed 8 sockets and have multiple UPI/QPI domains + * with identical, repeating source IDs. + */ + if (is_uv_system()) + return skx_get_pkg_id(d, id); + + if (pci_read_config_dword(d->util_all, off, ®)) { + skx_printk(KERN_ERR, "Failed to read src id\n"); return -ENODEV; } - *id = GET_BITFIELD(reg, 0, 2); + *id = GET_BITFIELD(reg, 12, 14); return 0; } -EXPORT_SYMBOL_GPL(skx_get_node_id); +EXPORT_SYMBOL_GPL(skx_get_src_id); static int get_width(u32 mtr) { @@ -543,7 +562,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, pvt->imc = imc; mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name, - imc->node_id, imc->lmc); + imc->src_id, imc->lmc); if (!mci->ctl_name) { rc = -ENOMEM; goto fail0; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index b89fc9bd10ec..bd2d9eaeb448 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -117,7 +117,7 @@ struct skx_dev { bool hbm_mc; u8 mc; /* system wide mc# */ u8 lmc; /* socket relative mc# */ - u8 src_id, node_id; + u8 src_id; struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; @@ -261,7 +261,6 @@ void skx_set_res_cfg(struct res_config *cfg); void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); -int skx_get_node_id(struct skx_dev *d, u8 *id); int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); -- Gitee From 97001ba96ea5e33693d02810bd85d80b649750d8 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:20 +0800 Subject: [PATCH 08/15] EDAC/i10nm: Explicitly set the modes of the RRL register sets mainline inclusion from mainline-v6.16-rc1 commit 4878e1e90056230cefd580136d0e6d5689a7b770 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4878e1e90056230cefd580136d0e6d5689a7b770 ------------------------------------------------- Intel-SIG: commit 4878e1e90056 EDAC/i10nm: Explicitly set the modes of the RRL register sets Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D The i10nm_edac driver uses the default modes (either patrol scrub read or on-demand read) of the RRL register sets configured by the BIOS. Explicitly set the modes during the loading of the i10nm_edac driver with the module parameter retry_rd_err_log=2. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-4-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 2d7695f4fcb5..ff3dada5b543 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -69,6 +69,7 @@ #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) #define RETRY_RD_ERR_LOG_UC BIT(1) +#define RETRY_RD_ERR_LOG_EN_PATSPR BIT(13) #define RETRY_RD_ERR_LOG_NOOVER BIT(14) #define RETRY_RD_ERR_LOG_EN BIT(15) #define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) @@ -114,12 +115,15 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable rrl_ctl[2] = d2; s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; + s |= RETRY_RD_ERR_LOG_EN_PATSPR; s |= RETRY_RD_ERR_LOG_EN; d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; + d &= ~RETRY_RD_ERR_LOG_EN_PATSPR; d |= RETRY_RD_ERR_LOG_EN; if (offsets_demand2) { d2 &= ~RETRY_RD_ERR_LOG_UC; + d2 &= ~RETRY_RD_ERR_LOG_EN_PATSPR; d2 |= RETRY_RD_ERR_LOG_NOOVER; d2 |= RETRY_RD_ERR_LOG_EN; } @@ -129,18 +133,24 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable s |= RETRY_RD_ERR_LOG_UC; if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER) s |= RETRY_RD_ERR_LOG_NOOVER; + if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN_PATSPR)) + s &= ~RETRY_RD_ERR_LOG_EN_PATSPR; if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN)) s &= ~RETRY_RD_ERR_LOG_EN; if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC) d |= RETRY_RD_ERR_LOG_UC; if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER) d |= RETRY_RD_ERR_LOG_NOOVER; + if (rrl_ctl[1] & RETRY_RD_ERR_LOG_EN_PATSPR) + d |= RETRY_RD_ERR_LOG_EN_PATSPR; if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN)) d &= ~RETRY_RD_ERR_LOG_EN; if (offsets_demand2) { if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC) d2 |= RETRY_RD_ERR_LOG_UC; + if (rrl_ctl[2] & RETRY_RD_ERR_LOG_EN_PATSPR) + d2 |= RETRY_RD_ERR_LOG_EN_PATSPR; if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER)) d2 &= ~RETRY_RD_ERR_LOG_NOOVER; if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN)) -- Gitee From 3386bcce7cf4baf2458f0499e907897b0cef8fb2 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:21 +0800 Subject: [PATCH 09/15] EDAC/{skx_common,i10nm}: Structure the per-channel RRL registers mainline inclusion from mainline-v6.16-rc1 commit 1a8a6af663a7f16c9b2779cf728187775735047b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1a8a6af663a7f16c9b2779cf728187775735047b ------------------------------------------------- Intel-SIG: commit 1a8a6af663a7 EDAC/{skx_common,i10nm}: Structure the per-channel RRL registers Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D As the number of RRL (retry_rd_err_log) registers per memory channel increases, the positions of the RRL control bits and the widths of the RRL registers vary across different CPU generations. Adding RRL support for a new CPU requires handling these differences throughout the RRL-related code. Structure the offsets, widths, control bit positions, set numbers, modes, etc., of the per-channel RRL registers and make them configurable to facilitate easier RRL support for new CPUs. No functional changes are intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-5-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 92 ++++++++++++++++++++++++--------------- drivers/edac/skx_common.h | 21 +++++---- 2 files changed, 69 insertions(+), 44 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index ff3dada5b543..f7b88194d10b 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -86,15 +86,38 @@ static int retry_rd_err_log; static int decoding_via_mca; static bool mem_cfg_2lm; -static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; -static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; -static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; -static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; -static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; -static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; -static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; -static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; -static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; +static struct reg_rrl icx_reg_rrl_ddr = { + .set_num = 2, + .offsets = { + {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, + }, +}; + +static struct reg_rrl spr_reg_rrl_ddr = { + .set_num = 3, + .offsets = { + {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, + {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, + }, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch0 = { + .set_num = 2, + .offsets = { + {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, + {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, + }, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch1 = { + .set_num = 2, + .offsets = { + {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, + {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, + }, +}; static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl, u32 *offsets_scrub, u32 *offsets_demand, @@ -185,9 +208,11 @@ static void enable_retry_rd_err_log(bool enable) chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], - res_cfg->offsets_scrub, - res_cfg->offsets_demand, - res_cfg->offsets_demand2); + res_cfg->reg_rrl_ddr->offsets[0], + res_cfg->reg_rrl_ddr->offsets[1], + res_cfg->reg_rrl_ddr->set_num > 2 ? + res_cfg->reg_rrl_ddr->offsets[2] : NULL); + } imc_num += res_cfg->hbm_imc_num; @@ -201,12 +226,12 @@ static void enable_retry_rd_err_log(bool enable) chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) { __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], - res_cfg->offsets_scrub_hbm0, - res_cfg->offsets_demand_hbm0, + res_cfg->reg_rrl_hbm[0]->offsets[0], + res_cfg->reg_rrl_hbm[0]->offsets[1], NULL); __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1], - res_cfg->offsets_scrub_hbm1, - res_cfg->offsets_demand_hbm1, + res_cfg->reg_rrl_hbm[1]->offsets[0], + res_cfg->reg_rrl_hbm[1]->offsets[1], NULL); } } @@ -233,17 +258,18 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, pch = res->cs & 1; if (pch) - offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : - res_cfg->offsets_demand_hbm1; + offsets = scrub_err ? res_cfg->reg_rrl_hbm[1]->offsets[0] : + res_cfg->reg_rrl_hbm[1]->offsets[1]; else - offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : - res_cfg->offsets_demand_hbm0; + offsets = scrub_err ? res_cfg->reg_rrl_hbm[0]->offsets[0] : + res_cfg->reg_rrl_hbm[0]->offsets[1]; } else { if (scrub_err) { - offsets = res_cfg->offsets_scrub; + offsets = res_cfg->reg_rrl_ddr->offsets[0]; } else { - offsets = res_cfg->offsets_demand; - xffsets = res_cfg->offsets_demand2; + offsets = res_cfg->reg_rrl_ddr->offsets[1]; + if (res_cfg->reg_rrl_ddr->set_num > 2) + xffsets = res_cfg->reg_rrl_ddr->offsets[2]; } } @@ -883,8 +909,7 @@ static struct res_config i10nm_cfg0 = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x108, - .offsets_scrub = offsets_scrub_icx, - .offsets_demand = offsets_demand_icx, + .reg_rrl_ddr = &icx_reg_rrl_ddr, }; static struct res_config i10nm_cfg1 = { @@ -902,8 +927,7 @@ static struct res_config i10nm_cfg1 = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x108, - .offsets_scrub = offsets_scrub_icx, - .offsets_demand = offsets_demand_icx, + .reg_rrl_ddr = &icx_reg_rrl_ddr, }; static struct res_config spr_cfg = { @@ -926,13 +950,9 @@ static struct res_config spr_cfg = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x300, - .offsets_scrub = offsets_scrub_spr, - .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, - .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, - .offsets_demand = offsets_demand_spr, - .offsets_demand2 = offsets_demand2_spr, - .offsets_demand_hbm0 = offsets_demand_spr_hbm0, - .offsets_demand_hbm1 = offsets_demand_spr_hbm1, + .reg_rrl_ddr = &spr_reg_rrl_ddr, + .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0, + .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1, }; static struct res_config gnr_cfg = { @@ -1117,7 +1137,7 @@ static int __init i10nm_init(void) mce_register_decode_chain(&i10nm_mce_dec); skx_setup_debug("i10nm_test"); - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); if (retry_rd_err_log == 2) enable_retry_rd_err_log(true); @@ -1137,7 +1157,7 @@ static void __exit i10nm_exit(void) { edac_dbg(2, "\n"); - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { skx_set_decode(NULL, NULL); if (retry_rd_err_log == 2) enable_retry_rd_err_log(false); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index bd2d9eaeb448..cda7a16b8bee 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -82,6 +82,15 @@ /* Max RRL register sets per {,sub-,pseudo-}channel. */ #define NUM_RRL_SET 3 +/* Max RRL registers per set. */ +#define NUM_RRL_REG 6 + +/* RRL registers per {,sub-,pseudo-}channel. */ +struct reg_rrl { + /* RRL register parts. */ + int set_num; + u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; +}; /* * Each cpu socket contains some pci devices that provide global @@ -238,14 +247,10 @@ struct res_config { /* HBM mdev device BDF */ struct pci_bdf hbm_mdev_bdf; int sad_all_offset; - /* Offsets of retry_rd_err_log registers */ - u32 *offsets_scrub; - u32 *offsets_scrub_hbm0; - u32 *offsets_scrub_hbm1; - u32 *offsets_demand; - u32 *offsets_demand2; - u32 *offsets_demand_hbm0; - u32 *offsets_demand_hbm1; + /* RRL register sets per DDR channel */ + struct reg_rrl *reg_rrl_ddr; + /* RRL register sets per HBM channel */ + struct reg_rrl *reg_rrl_hbm[2]; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, -- Gitee From 78acba6340f897a80e364052e0d0330323f32431 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:22 +0800 Subject: [PATCH 10/15] EDAC/{skx_common,i10nm}: Refactor enable_retry_rd_err_log() mainline inclusion from mainline-v6.16-rc1 commit ba3985c1faf5eb72084ddc31204b076c2a450263 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ba3985c1faf5eb72084ddc31204b076c2a450263 ------------------------------------------------- Intel-SIG: commit ba3985c1faf5 EDAC/{skx_common,i10nm}: Refactor enable_retry_rd_err_log() Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D Refactor enable_retry_rd_err_log() using helper functions for both DDR and HBM, making the RRL control bits configurable instead of hard-coded. Additionally, explicitly define the four RRL modes for better readability. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-6-qiuxu.zhuo@intel.com [ Zhang Rui: convert code to follow C89 format and amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 239 ++++++++++++++++++++++---------------- drivers/edac/skx_common.h | 20 ++++ 2 files changed, 158 insertions(+), 101 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index f7b88194d10b..b3478046fbd2 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -68,11 +68,6 @@ #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) -#define RETRY_RD_ERR_LOG_UC BIT(1) -#define RETRY_RD_ERR_LOG_EN_PATSPR BIT(13) -#define RETRY_RD_ERR_LOG_NOOVER BIT(14) -#define RETRY_RD_ERR_LOG_EN BIT(15) -#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) #define I10NM_MAX_SAD 16 @@ -88,153 +83,195 @@ static bool mem_cfg_2lm; static struct reg_rrl icx_reg_rrl_ddr = { .set_num = 2, + .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, }, + .widths = {4, 4, 4, 4, 4, 8}, + .uc_mask = BIT(1), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), }; static struct reg_rrl spr_reg_rrl_ddr = { .set_num = 3, + .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND}, .offsets = { {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, }, + .widths = {4, 4, 8, 4, 4, 8}, + .uc_mask = BIT(1), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), }; static struct reg_rrl spr_reg_rrl_hbm_pch0 = { .set_num = 2, + .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, }, + .widths = {4, 4, 8, 4, 4, 8}, + .uc_mask = BIT(1), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), }; static struct reg_rrl spr_reg_rrl_hbm_pch1 = { .set_num = 2, + .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, }, + .widths = {4, 4, 8, 4, 4, 8}, + .uc_mask = BIT(1), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), }; -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl, - u32 *offsets_scrub, u32 *offsets_demand, - u32 *offsets_demand2) +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) { - u32 s, d, d2; + switch (width) { + case 4: + return I10NM_GET_REG32(imc, chan, offset); + case 8: + return I10NM_GET_REG64(imc, chan, offset); + default: + i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width); + return 0; + } +} + +static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val) +{ + switch (width) { + case 4: + return I10NM_SET_REG32(imc, chan, offset, (u32)val); + default: + i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width); + } +} + +static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + int rrl_set, bool enable, u32 *rrl_ctl) +{ + enum rrl_mode mode = rrl->modes[rrl_set]; + u32 offset = rrl->offsets[rrl_set][0], v; + u8 width = rrl->widths[0]; + bool first, scrub; - s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); - d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); - if (offsets_demand2) - d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); + /* First or last read error. */ + first = (mode == FRE_SCRUB || mode == FRE_DEMAND); + /* Patrol scrub or on-demand read error. */ + scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB); + + v = read_imc_reg(imc, chan, offset, width); if (enable) { - /* Save default configurations */ - rrl_ctl[0] = s; - rrl_ctl[1] = d; - if (offsets_demand2) - rrl_ctl[2] = d2; - - s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; - s |= RETRY_RD_ERR_LOG_EN_PATSPR; - s |= RETRY_RD_ERR_LOG_EN; - d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; - d &= ~RETRY_RD_ERR_LOG_EN_PATSPR; - d |= RETRY_RD_ERR_LOG_EN; - - if (offsets_demand2) { - d2 &= ~RETRY_RD_ERR_LOG_UC; - d2 &= ~RETRY_RD_ERR_LOG_EN_PATSPR; - d2 |= RETRY_RD_ERR_LOG_NOOVER; - d2 |= RETRY_RD_ERR_LOG_EN; - } + /* Save default configurations. */ + *rrl_ctl = v; + v &= ~rrl->uc_mask; + + if (first) + v |= rrl->noover_mask; + else + v &= ~rrl->noover_mask; + + if (scrub) + v |= rrl->en_patspr_mask; + else + v &= ~rrl->en_patspr_mask; + + v |= rrl->en_mask; } else { - /* Restore default configurations */ - if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC) - s |= RETRY_RD_ERR_LOG_UC; - if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER) - s |= RETRY_RD_ERR_LOG_NOOVER; - if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN_PATSPR)) - s &= ~RETRY_RD_ERR_LOG_EN_PATSPR; - if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN)) - s &= ~RETRY_RD_ERR_LOG_EN; - if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC) - d |= RETRY_RD_ERR_LOG_UC; - if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER) - d |= RETRY_RD_ERR_LOG_NOOVER; - if (rrl_ctl[1] & RETRY_RD_ERR_LOG_EN_PATSPR) - d |= RETRY_RD_ERR_LOG_EN_PATSPR; - if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN)) - d &= ~RETRY_RD_ERR_LOG_EN; - - if (offsets_demand2) { - if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC) - d2 |= RETRY_RD_ERR_LOG_UC; - if (rrl_ctl[2] & RETRY_RD_ERR_LOG_EN_PATSPR) - d2 |= RETRY_RD_ERR_LOG_EN_PATSPR; - if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER)) - d2 &= ~RETRY_RD_ERR_LOG_NOOVER; - if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN)) - d2 &= ~RETRY_RD_ERR_LOG_EN; + /* Restore default configurations. */ + if (*rrl_ctl & rrl->uc_mask) + v |= rrl->uc_mask; + + if (first) { + if (!(*rrl_ctl & rrl->noover_mask)) + v &= ~rrl->noover_mask; + } else { + if (*rrl_ctl & rrl->noover_mask) + v |= rrl->noover_mask; } + + if (scrub) { + if (!(*rrl_ctl & rrl->en_patspr_mask)) + v &= ~rrl->en_patspr_mask; + } else { + if (*rrl_ctl & rrl->en_patspr_mask) + v |= rrl->en_patspr_mask; + } + + if (!(*rrl_ctl & rrl->en_mask)) + v &= ~rrl->en_mask; } - I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); - I10NM_SET_REG32(imc, chan, offsets_demand[0], d); - if (offsets_demand2) - I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); + write_imc_reg(imc, chan, offset, width, v); +} + +static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + bool enable, u32 *rrl_ctl) +{ + int i; + + for (i = 0; i < rrl->set_num; i++) + enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i); +} + +static void enable_rrls_ddr(struct skx_imc *imc, bool enable) +{ + struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr; + int i, chan_num = res_cfg->ddr_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase) + return; + + for (i = 0; i < chan_num; i++) + enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]); +} + +static void enable_rrls_hbm(struct skx_imc *imc, bool enable) +{ + struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm; + int i, chan_num = res_cfg->hbm_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1]) + return; + + for (i = 0; i < chan_num; i++) { + enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]); + enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]); + } } static void enable_retry_rd_err_log(bool enable) { - int i, j, imc_num, chan_num; - struct skx_channel *chan; - struct skx_imc *imc; struct skx_dev *d; + int i, imc_num; edac_dbg(2, "\n"); list_for_each_entry(d, i10nm_edac_list, list) { imc_num = res_cfg->ddr_imc_num; - chan_num = res_cfg->ddr_chan_num; - - for (i = 0; i < imc_num; i++) { - imc = &d->imc[i]; - if (!imc->mbase) - continue; - - chan = d->imc[i].chan; - for (j = 0; j < chan_num; j++) - __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], - res_cfg->reg_rrl_ddr->offsets[0], - res_cfg->reg_rrl_ddr->offsets[1], - res_cfg->reg_rrl_ddr->set_num > 2 ? - res_cfg->reg_rrl_ddr->offsets[2] : NULL); - - } + for (i = 0; i < imc_num; i++) + enable_rrls_ddr(&d->imc[i], enable); imc_num += res_cfg->hbm_imc_num; - chan_num = res_cfg->hbm_chan_num; - - for (; i < imc_num; i++) { - imc = &d->imc[i]; - if (!imc->mbase || !imc->hbm_mc) - continue; - - chan = d->imc[i].chan; - for (j = 0; j < chan_num; j++) { - __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], - res_cfg->reg_rrl_hbm[0]->offsets[0], - res_cfg->reg_rrl_hbm[0]->offsets[1], - NULL); - __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1], - res_cfg->reg_rrl_hbm[1]->offsets[0], - res_cfg->reg_rrl_hbm[1]->offsets[1], - NULL); - } - } + for (; i < imc_num; i++) + enable_rrls_hbm(&d->imc[i], enable); } } diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index cda7a16b8bee..7fc0c5af43e7 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -85,11 +85,31 @@ /* Max RRL registers per set. */ #define NUM_RRL_REG 6 +/* Modes of RRL register set. */ +enum rrl_mode { + /* Last read error from patrol scrub. */ + LRE_SCRUB, + /* Last read error from demand. */ + LRE_DEMAND, + /* First read error from patrol scrub. */ + FRE_SCRUB, + /* First read error from demand. */ + FRE_DEMAND, +}; + /* RRL registers per {,sub-,pseudo-}channel. */ struct reg_rrl { /* RRL register parts. */ int set_num; + enum rrl_mode modes[NUM_RRL_SET]; u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; + /* RRL register widths in byte per set. */ + u8 widths[NUM_RRL_REG]; + /* RRL control bits of the first register per set. */ + u32 uc_mask; + u32 en_patspr_mask; + u32 noover_mask; + u32 en_mask; }; /* -- Gitee From 02e502e1cf0351b3f82b72fc14f2f0182859d26d Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:23 +0800 Subject: [PATCH 11/15] EDAC/{skx_common,i10nm}: Refactor show_retry_rd_err_log() mainline inclusion from mainline-v6.16-rc1 commit 126168fa2c3e16113ea75a656fff5156a54a5726 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=126168fa2c3e16113ea75a656fff5156a54a5726 ------------------------------------------------- Intel-SIG: commit 126168fa2c3e EDAC/{skx_common,i10nm}: Refactor show_retry_rd_err_log() Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D Make the {valid bit, overwritten status, number} of RRL registers and the {number, offsets, widths} of per-channel CORRERRCNT registers configurable. Refactor show_retry_rd_err_log() to use the configurable fields of struct reg_rrl, making the code more scalable and simpler. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-7-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 160 +++++++++++++++++--------------------- drivers/edac/skx_common.h | 11 ++- 2 files changed, 81 insertions(+), 90 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index b3478046fbd2..1d51e6d8a796 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -83,20 +83,28 @@ static bool mem_cfg_2lm; static struct reg_rrl icx_reg_rrl_ddr = { .set_num = 2, + .reg_num = 6, .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, }, .widths = {4, 4, 4, 4, 4, 8}, + .v_mask = BIT(0), .uc_mask = BIT(1), + .over_mask = BIT(2), .en_patspr_mask = BIT(13), .noover_mask = BIT(14), .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, }; static struct reg_rrl spr_reg_rrl_ddr = { .set_num = 3, + .reg_num = 6, .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND}, .offsets = { {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, @@ -104,38 +112,58 @@ static struct reg_rrl spr_reg_rrl_ddr = { {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, }, .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), .uc_mask = BIT(1), + .over_mask = BIT(2), .en_patspr_mask = BIT(13), .noover_mask = BIT(14), .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, }; static struct reg_rrl spr_reg_rrl_hbm_pch0 = { .set_num = 2, + .reg_num = 6, .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, }, .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), .uc_mask = BIT(1), + .over_mask = BIT(2), .en_patspr_mask = BIT(13), .noover_mask = BIT(14), .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824}, + .cecnt_widths = {4, 4, 4, 4}, }; static struct reg_rrl spr_reg_rrl_hbm_pch1 = { .set_num = 2, + .reg_num = 6, .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, }, .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), .uc_mask = BIT(1), + .over_mask = BIT(2), .en_patspr_mask = BIT(13), .noover_mask = BIT(14), .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24}, + .cecnt_widths = {4, 4, 4, 4}, }; static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) @@ -278,110 +306,64 @@ static void enable_retry_rd_err_log(bool enable) static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, int len, bool scrub_err) { + int i, j, n, ch = res->channel, pch = res->cs & 1; struct skx_imc *imc = &res->dev->imc[res->imc]; - u32 log0, log1, log2, log3, log4; - u32 corr0, corr1, corr2, corr3; - u32 lxg0, lxg1, lxg3, lxg4; - u32 *xffsets = NULL; - u64 log2a, log5; - u64 lxg2a, lxg5; - u32 *offsets; - int n, pch; + u32 offset, status_mask; + struct reg_rrl *rrl; + u64 log, corr; + bool scrub; + u8 width; if (!imc->mbase) return; - if (imc->hbm_mc) { - pch = res->cs & 1; + rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr; - if (pch) - offsets = scrub_err ? res_cfg->reg_rrl_hbm[1]->offsets[0] : - res_cfg->reg_rrl_hbm[1]->offsets[1]; - else - offsets = scrub_err ? res_cfg->reg_rrl_hbm[0]->offsets[0] : - res_cfg->reg_rrl_hbm[0]->offsets[1]; - } else { - if (scrub_err) { - offsets = res_cfg->reg_rrl_ddr->offsets[0]; - } else { - offsets = res_cfg->reg_rrl_ddr->offsets[1]; - if (res_cfg->reg_rrl_ddr->set_num > 2) - xffsets = res_cfg->reg_rrl_ddr->offsets[2]; - } - } + if (!rrl) + return; - log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); - log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); - log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]); - log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); - log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); - - if (xffsets) { - lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); - lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); - lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); - lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); - lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); - } + status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; - if (res_cfg->type == SPR) { - log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx", - log0, log1, log2a, log3, log4, log5); + n = snprintf(msg, len, " retry_rd_err_log["); + for (i = 0; i < rrl->set_num; i++) { + scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); + if (scrub_err != scrub) + continue; - if (len - n > 0) { - if (xffsets) { - lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); - n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]", - lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); - } else { - n += snprintf(msg + n, len - n, "]"); - } - } - } else { - log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", - log0, log1, log2, log3, log4, log5); - } + for (j = 0; j < rrl->reg_num && len - n > 0; j++) { + offset = rrl->offsets[i][j]; + width = rrl->widths[j]; + log = read_imc_reg(imc, ch, offset, width); - if (imc->hbm_mc) { - if (pch) { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); - } else { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); + if (width == 4) + n += snprintf(msg + n, len - n, "%.8llx ", log); + else + n += snprintf(msg + n, len - n, "%.16llx ", log); + + /* Clear RRL status if RRL in Linux control mode. */ + if (retry_rd_err_log == 2 && !j && (log & status_mask)) + write_imc_reg(imc, ch, offset, width, log & ~status_mask); } - } else { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); } - if (len - n > 0) - snprintf(msg + n, len - n, - " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", - corr0 & 0xffff, corr0 >> 16, - corr1 & 0xffff, corr1 >> 16, - corr2 & 0xffff, corr2 >> 16, - corr3 & 0xffff, corr3 >> 16); - - /* Clear status bits */ - if (retry_rd_err_log == 2) { - if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { - log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, offsets[0], log0); - } + /* Move back one space. */ + n--; + n += snprintf(msg + n, len - n, "]"); - if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { - lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); + if (len - n > 0) { + n += snprintf(msg + n, len - n, " correrrcnt["); + for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { + offset = rrl->cecnt_offsets[i]; + width = rrl->cecnt_widths[i]; + corr = read_imc_reg(imc, ch, offset, width); + + n += snprintf(msg + n, len - n, "%.4llx %.4llx ", + corr & 0xffff, corr >> 16); } + + /* Move back one space. */ + n--; + n += snprintf(msg + n, len - n, "]"); } } diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 7fc0c5af43e7..b04f4d835a9c 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -84,6 +84,8 @@ #define NUM_RRL_SET 3 /* Max RRL registers per set. */ #define NUM_RRL_REG 6 +/* Max correctable error count registers. */ +#define NUM_CECNT_REG 4 /* Modes of RRL register set. */ enum rrl_mode { @@ -100,16 +102,23 @@ enum rrl_mode { /* RRL registers per {,sub-,pseudo-}channel. */ struct reg_rrl { /* RRL register parts. */ - int set_num; + int set_num, reg_num; enum rrl_mode modes[NUM_RRL_SET]; u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; /* RRL register widths in byte per set. */ u8 widths[NUM_RRL_REG]; /* RRL control bits of the first register per set. */ + u32 v_mask; u32 uc_mask; + u32 over_mask; u32 en_patspr_mask; u32 noover_mask; u32 en_mask; + + /* CORRERRCNT register parts. */ + int cecnt_num; + u32 cecnt_offsets[NUM_CECNT_REG]; + u8 cecnt_widths[NUM_CECNT_REG]; }; /* -- Gitee From 780ee7de0186f77c494061c659a97145d6ba6c74 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:24 +0800 Subject: [PATCH 12/15] EDAC/{skx_common,i10nm}: Add RRL support for Intel Granite Rapids server mainline inclusion from mainline-v6.16-rc1 commit 5904dc561ef21e69f0b9dca39d1a66e34b7ea764 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5904dc561ef21e69f0b9dca39d1a66e34b7ea764 ------------------------------------------------- Intel-SIG: commit 5904dc561ef2 EDAC/{skx_common,i10nm}: Add RRL support for Intel Granite Rapids server Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D Compared to previous generations, Granite Rapids defines the RRL control bits {en_patspr, noover, en} in different positions, adds an extra RRL set for the new mode of the first patrol-scrub read error, and extends the number of CORRERRCNT registers from 4 to 8, encoding one counter per CORRERRCNT register. Add a Granite Rapids reg_rrl configuration table and adjust the code to accommodate the differences mentioned above for RRL support. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-8-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 37 +++++++++++++++++++++++++++++++++++-- drivers/edac/skx_common.h | 4 ++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 1d51e6d8a796..1ac92ad01a5f 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -166,6 +166,29 @@ static struct reg_rrl spr_reg_rrl_hbm_pch1 = { .cecnt_widths = {4, 4, 4, 4}, }; +static struct reg_rrl gnr_reg_rrl_ddr = { + .set_num = 4, + .reg_num = 6, + .modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0}, + {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8}, + {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0}, + {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(14), + .noover_mask = BIT(15), + .en_mask = BIT(12), + + .cecnt_num = 8, + .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c}, + .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4}, +}; + static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) { switch (width) { @@ -357,8 +380,17 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, width = rrl->cecnt_widths[i]; corr = read_imc_reg(imc, ch, offset, width); - n += snprintf(msg + n, len - n, "%.4llx %.4llx ", - corr & 0xffff, corr >> 16); + /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ + if (res_cfg->type <= SPR) { + n += snprintf(msg + n, len - n, "%.4llx %.4llx ", + corr & 0xffff, corr >> 16); + } else { + /* CPUs {GNR} encode one counter per CORRERRCNT register. */ + if (width == 4) + n += snprintf(msg + n, len - n, "%.8llx ", corr); + else + n += snprintf(msg + n, len - n, "%.16llx ", corr); + } } /* Move back one space. */ @@ -989,6 +1021,7 @@ static struct res_config gnr_cfg = { .uracu_bdf = {0, 0, 1}, .ddr_mdev_bdf = {0, 5, 1}, .sad_all_offset = 0x300, + .reg_rrl_ddr = &gnr_reg_rrl_ddr, }; static const struct x86_cpu_id i10nm_cpuids[] = { diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index b04f4d835a9c..15cb774d88ec 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -81,11 +81,11 @@ #define MCACOD_EXT_MEM_ERR 0x280 /* Max RRL register sets per {,sub-,pseudo-}channel. */ -#define NUM_RRL_SET 3 +#define NUM_RRL_SET 4 /* Max RRL registers per set. */ #define NUM_RRL_REG 6 /* Max correctable error count registers. */ -#define NUM_CECNT_REG 4 +#define NUM_CECNT_REG 8 /* Modes of RRL register set. */ enum rrl_mode { -- Gitee From c617775b9fa1b9c98f16ec36bb6a50412852e64c Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 24 Apr 2025 16:14:54 +0800 Subject: [PATCH 13/15] EDAC/i10nm: Fix the bitwise operation between variables of different sizes mainline inclusion from mainline-v6.16-rc1 commit 2b2408aca90b86c1ef51c19d834e5f6db0a1ff30 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2b2408aca90b86c1ef51c19d834e5f6db0a1ff30 ------------------------------------------------- Intel-SIG: commit 2b2408aca90b EDAC/i10nm: Fix the bitwise operation between variables of different sizes Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D The tool of Smatch static checker reported the following warning: drivers/edac/i10nm_base.c:364 show_retry_rd_err_log() warn: should bitwise negate be 'ullong'? This warning was due to the bitwise NOT/AND operations between 'status_mask' (a u32 type) and 'log' (a u64 type), which resulted in the high 32 bits of 'log' were cleared. This was a false positive warning, as only the low 32 bits of 'log' was written to the first RRL memory controller register (a u32 type). To improve code sanity, fix this warning by changing 'status_mask' to a u64 type, ensuring it matches the size of 'log' for bitwise operations. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aAih0KmEVq7ch6v2@stanley.mountain/ Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20250424081454.2952632-1-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 1ac92ad01a5f..5d9a3261aa8b 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -331,10 +331,10 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, { int i, j, n, ch = res->channel, pch = res->cs & 1; struct skx_imc *imc = &res->dev->imc[res->imc]; - u32 offset, status_mask; + u64 log, corr, status_mask; struct reg_rrl *rrl; - u64 log, corr; bool scrub; + u32 offset; u8 width; if (!imc->mbase) -- Gitee From 58321061b65b352fcc181398397df197eb1c9f94 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 4 Jul 2025 23:16:07 +0800 Subject: [PATCH 14/15] EDAC/i10nm: Add Intel Granite Rapids-D support mainline inclusion from mainline-v6.17-rc1 commit 9ad08c1115646533097c8a799ad046bf5127b04a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9ad08c1115646533097c8a799ad046bf5127b04a ------------------------------------------------- Intel-SIG: commit 9ad08c111564 EDAC/i10nm: Add Intel Granite Rapids-D support Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D The Granite Rapids-D CPU model uses memory controller registers similar to those of the Granite Rapids server CPU but with a different memory controller MMIO base. Add the Granite Rapids-D CPU model ID and use the new memory controller MMIO base for EDAC support. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: VikasX Chougule Link: https://lore.kernel.org/r/20250704151609.7833-2-qiuxu.zhuo@intel.com [ Zhang Rui: resolve conflict (missing vfm support) and amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 5d9a3261aa8b..01bb6acebb69 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -62,6 +62,7 @@ ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) #define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000 +#define I10NM_GNR_D_IMC_MMIO_OFFSET 0x206000 #define I10NM_GNR_IMC_MMIO_SIZE 0x4000 #define I10NM_HBM_IMC_MMIO_SIZE 0x9000 #define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24) @@ -691,6 +692,16 @@ static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *phy return NULL; } +static u32 get_gnr_imc_mmio_offset(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == INTEL_FAM6_GRANITERAPIDS_D) + return I10NM_GNR_D_IMC_MMIO_OFFSET; + + return I10NM_GNR_IMC_MMIO_OFFSET; +} + /** * get_ddr_munit() - Get the resource of the i-th DDR memory controller. * @@ -719,7 +730,7 @@ static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsi return NULL; *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) + - I10NM_GNR_IMC_MMIO_OFFSET + + get_gnr_imc_mmio_offset() + physical_idx * I10NM_GNR_IMC_MMIO_SIZE; *size = I10NM_GNR_IMC_MMIO_SIZE; @@ -1033,6 +1044,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_D, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_DARKMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), -- Gitee From ffa81918e80ce791a9b9836ace1f5e68c52d4a10 Mon Sep 17 00:00:00 2001 From: Wang Haoran Date: Tue, 15 Jul 2025 21:17:00 +0800 Subject: [PATCH 15/15] EDAC/{skx_common,i10nm}: Use scnprintf() for safer buffer handling mainline inclusion from mainline-v6.17-rc1 commit 35928bc38db69a2af26624e35a250c1e0f9a6a3f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/ID1YV2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=35928bc38db69a2af26624e35a250c1e0f9a6a3f ------------------------------------------------- Intel-SIG: commit 35928bc38db6 EDAC/{skx_common,i10nm}: Use scnprintf() for safer buffer handling Add EDAC basic support and RRL enhancement for CWF/SRF/GNR/GNR-D snprintf() is fragile when its return value will be used to append additional data to a buffer. Use scnprintf() instead. Signed-off-by: Wang Haoran Signed-off-by: Tony Luck Tested-by: Qiuxu Zhuo Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20250715131700.1092720-1-haoranwangsec@gmail.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 18 +++++++++--------- drivers/edac/skx_common.c | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 01bb6acebb69..75262ee01167 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -348,7 +348,7 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; - n = snprintf(msg, len, " retry_rd_err_log["); + n = scnprintf(msg, len, " retry_rd_err_log["); for (i = 0; i < rrl->set_num; i++) { scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); if (scrub_err != scrub) @@ -360,9 +360,9 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, log = read_imc_reg(imc, ch, offset, width); if (width == 4) - n += snprintf(msg + n, len - n, "%.8llx ", log); + n += scnprintf(msg + n, len - n, "%.8llx ", log); else - n += snprintf(msg + n, len - n, "%.16llx ", log); + n += scnprintf(msg + n, len - n, "%.16llx ", log); /* Clear RRL status if RRL in Linux control mode. */ if (retry_rd_err_log == 2 && !j && (log & status_mask)) @@ -372,10 +372,10 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, /* Move back one space. */ n--; - n += snprintf(msg + n, len - n, "]"); + n += scnprintf(msg + n, len - n, "]"); if (len - n > 0) { - n += snprintf(msg + n, len - n, " correrrcnt["); + n += scnprintf(msg + n, len - n, " correrrcnt["); for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { offset = rrl->cecnt_offsets[i]; width = rrl->cecnt_widths[i]; @@ -383,20 +383,20 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ if (res_cfg->type <= SPR) { - n += snprintf(msg + n, len - n, "%.4llx %.4llx ", + n += scnprintf(msg + n, len - n, "%.4llx %.4llx ", corr & 0xffff, corr >> 16); } else { /* CPUs {GNR} encode one counter per CORRERRCNT register. */ if (width == 4) - n += snprintf(msg + n, len - n, "%.8llx ", corr); + n += scnprintf(msg + n, len - n, "%.8llx ", corr); else - n += snprintf(msg + n, len - n, "%.16llx ", corr); + n += scnprintf(msg + n, len - n, "%.16llx ", corr); } } /* Move back one space. */ n--; - n += snprintf(msg + n, len - n, "]"); + n += scnprintf(msg + n, len - n, "]"); } } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 65af9228b851..0bd9dd2506b1 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -688,12 +688,12 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, } } if (res->decoded_by_adxl) { - len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, adxl_msg); } else { - len = snprintf(skx_msg, MSG_SIZE, + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", -- Gitee