From a11d46d0c67cfb81bd679bc6550b241df3a78e38 Mon Sep 17 00:00:00 2001 From: fuhao Date: Wed, 19 Feb 2025 18:59:46 +0800 Subject: [PATCH 1/8] EDAC/mce_amd: Add LS and IF mce types for Hygon family 18h model 7h The error types are changed in LS and IF machine check control registers of Hygon family 18h model 7h processors, so add support to get the correct error types in smca error decoding process. Signed-off-by: fuhao --- drivers/edac/mce_amd.c | 62 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 06e29d2b51d1..cf0298e44849 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -168,6 +168,33 @@ static const char * const smca_ls_mce_desc[] = { "L2 Fill Data error", }; +/* Hygon Model7h Scalable MCA LS error strings */ +static const char * const smca_ls_mce_hygon_desc[] = { + "Load queue parity error", + "Store queue parity error", + "Miss address buffer payload parity error", + "Level 1 TLB parity error", + "DC Tag error type 5", + "DC Tag error type 6", + "DC Tag error type 1", + "Internal error type 1", + "Internal error type 2", + "System Read Data Error 0", + "System Read Data Error 1", + "System Read Data Error 2", + "System Read Data Error 3", + "DC Tag error type 2", + "DC Data error type 1 and poison consumption", + "DC Data error type 2", + "DC Data error type 3", + "DC Tag error type 4", + "Level 2 TLB parity error", + "PDC parity error", + "DC Tag error type 3", + "DC Tag error type 5", + "L2 Fill Data error", +}; + static const char * const smca_ls2_mce_desc[] = { "An ECC error was detected on a data cache read by a probe or victimization", "An ECC error or L2 poison was detected on a data cache read by a load", @@ -217,6 +244,31 @@ static const char * const smca_if_mce_desc[] = { "CT MCE", }; +/* Hygon Model7h Scalable MCA IF error strings */ +static const char * const smca_if_mce_hygon_desc[] = { + "Op Cache Microtag Probe Port Parity Error", + "IC Microtag or Full Tag Multi-hit Error", + "IC Full Tag Parity Error", + "IC Data Array Parity Error", + "Decoupling Queue PhysAddr Parity Error", + "L0 ITLB Parity Error", + "L1 ITLB Parity Error", + "L2 ITLB Parity Error", + "BPQ 0 Snoop Parity Error", + "BPQ 1 Snoop Parity Error", + "BPQ 2 Snoop Parity Error", + "BPQ 3 Snoop Parity Error", + "L1 BTB Multi-Match Error", + "L2 BTB Multi-Match Error", + "L2 Cache Response Poison Error", + "System Read Data Error", + "Hardware Assertion Error", + "L1-TLB Multi-Hit", + "L2-TLB Multi-Hit", + "BSR Parity Error", + "CT MCE", +}; + static const char * const smca_l2_mce_desc[] = { "L2M Tag Multiple-Way-Hit error", "L2M Tag or State Array ECC Error", @@ -1433,6 +1485,16 @@ static int __init mce_amd_init(void) out: pr_info("MCE: In-kernel MCE decoding enabled.\n"); + if (c->x86_vendor == X86_VENDOR_HYGON && + c->x86_model >= 0x7 && c->x86_model <= 0xf) { + smca_mce_descs[SMCA_LS].descs = smca_ls_mce_hygon_desc; + smca_mce_descs[SMCA_LS].num_descs = ARRAY_SIZE(smca_ls_mce_hygon_desc); + smca_mce_descs[SMCA_IF].descs = smca_if_mce_hygon_desc; + smca_mce_descs[SMCA_IF].num_descs = ARRAY_SIZE(smca_if_mce_hygon_desc); + pr_info("MCE: Hygon Fam%xh Model%xh smca mce descs setup.\n", + c->x86, c->x86_model); + } + mce_register_decode_chain(&amd_mce_dec_nb); return 0; -- Gitee From c3d1e855b3664d6609974f7302b25775b62f17ec Mon Sep 17 00:00:00 2001 From: fuhao Date: Wed, 19 Feb 2025 19:04:53 +0800 Subject: [PATCH 2/8] iommu/hygon: Add support for Hygon family 18h model 10h IOAPIC The SB IOAPIC for Hygon family 18h model 10h processors is also on the device 0xb. Signed-off-by: fuhao --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f6c1f7e04d47..4beddd8cc420 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3045,7 +3045,7 @@ static bool __init check_ioapic_information(void) (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && boot_cpu_data.x86 == 0x18 && boot_cpu_data.x86_model >= 0x4 && - boot_cpu_data.x86_model <= 0xf && + boot_cpu_data.x86_model <= 0x10 && devid == IOAPIC_SB_DEVID_FAM18H_M4H)) { has_sb_ioapic = true; ret = true; -- Gitee From 6eb190063c87fce756a1c1ddac743ff8f1c42d9e Mon Sep 17 00:00:00 2001 From: fuhao Date: Wed, 19 Feb 2025 19:08:43 +0800 Subject: [PATCH 3/8] x86/amd_nb: Add helper function to identify Hygon family 18h model 10h Add hygon_f18h_m10h() to identify Hygon family 18h model 10h processors. Signed-off-by: fuhao --- arch/x86/include/asm/amd_nb.h | 2 ++ arch/x86/kernel/amd_nb.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 497ad86ef225..a2948b68ba83 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -83,6 +83,7 @@ bool amd_nb_has_feature(unsigned int feature); struct amd_northbridge *node_to_amd_nb(int node); bool hygon_f18h_m4h(void); +bool hygon_f18h_m10h(void); u16 hygon_nb_num(void); int get_df_id(struct pci_dev *misc, u8 *id); @@ -127,6 +128,7 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) #define amd_gart_present(x) false #define hygon_f18h_m4h false +#define hygon_f18h_m10h false #define hygon_nb_num(x) 0 #define get_df_id(x, y) NULL diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index cc5c9b0b72c3..f2830067e174 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -262,6 +262,20 @@ bool hygon_f18h_m4h(void) } EXPORT_SYMBOL_GPL(hygon_f18h_m4h); +bool hygon_f18h_m10h(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return false; + + if (boot_cpu_data.x86 == 0x18 && + boot_cpu_data.x86_model >= 0x10 && + boot_cpu_data.x86_model <= 0x1f) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(hygon_f18h_m10h); + u16 hygon_nb_num(void) { return nb_num; -- Gitee From ecbf7c39c79d73d80d5cc0f82a8dbe0e56e7d73d Mon Sep 17 00:00:00 2001 From: fuhao Date: Wed, 19 Feb 2025 19:09:50 +0800 Subject: [PATCH 4/8] EDAC/amd64: Adjust address translation for Hygon family 18h model 10h Add umc address translation support for Hygon family 18h model 10h. Signed-off-by: fuhao --- drivers/edac/amd64_edac.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5890c6565904..0e739514437b 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1154,7 +1154,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr ctx.inst_id = umc; /* Read DramOffset, check if base 1 is used. */ - if (hygon_f18h_m4h() && + if ((hygon_f18h_m4h() || hygon_f18h_m10h()) && df_indirect_read_instance(nid, 0, 0x214, umc, &ctx.tmp)) goto out_err; else if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) @@ -1182,7 +1182,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr } intlv_num_sockets = 0; - if (hygon_f18h_m4h()) + if (hygon_f18h_m4h() || hygon_f18h_m10h()) intlv_num_sockets = (ctx.tmp >> 2) & 0x3; lgcy_mmio_hole_en = ctx.tmp & BIT(1); intlv_num_chan = (ctx.tmp >> 4) & 0xF; @@ -1200,14 +1200,15 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) goto out_err; - if (!hygon_f18h_m4h()) + if (!hygon_f18h_m4h() && !hygon_f18h_m10h()) intlv_num_sockets = (ctx.tmp >> 8) & 0x1; intlv_num_dies = (ctx.tmp >> 10) & 0x3; dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); intlv_addr_bit = intlv_addr_sel + 8; - if (hygon_f18h_m4h() && boot_cpu_data.x86_model >= 0x6) { + if ((hygon_f18h_m4h() && boot_cpu_data.x86_model >= 0x6) || + hygon_f18h_m10h()) { if (df_indirect_read_instance(nid, 0, 0x60, umc, &ctx.tmp)) goto out_err; intlv_num_dies = ctx.tmp & 0x3; @@ -1272,7 +1273,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) goto out_err; - if (hygon_f18h_m4h()) + if (hygon_f18h_m4h() || hygon_f18h_m10h()) cs_fabric_id = (ctx.tmp >> 8) & 0x7FF; else cs_fabric_id = (ctx.tmp >> 8) & 0xFF; -- Gitee From fb544ed2228d8ff91c406c666c8e6c19fcd4b9a6 Mon Sep 17 00:00:00 2001 From: fuhao Date: Wed, 19 Feb 2025 19:10:54 +0800 Subject: [PATCH 5/8] EDAC/amd64: Check if umc channel is enabled for Hygon family 18h model 10h For Hygon family 18h model 10h processor, channels without memory are gated. As a result, all bits in relevant registers are set to 1, which cause the edac driver initializing incorrectly. So add support to check if the umc channel is effectively enabled. Signed-off-by: fuhao --- drivers/edac/amd64_edac.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 0e739514437b..42d7c1bc6582 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1656,12 +1656,29 @@ static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) } } +static bool hygon_umc_channel_enabled(struct amd64_pvt *pvt, int channel) +{ + u32 enable; + + if (hygon_f18h_m10h()) { + __df_indirect_read(pvt->mc_node_id, 1, 0x32c, 0xc, &enable); + if ((enable & BIT(channel))) + return true; + return false; + } + + return true; +} + static void umc_dump_misc_regs(struct amd64_pvt *pvt) { struct amd64_umc *umc; u32 i, tmp, umc_base; for_each_umc(i) { + if (!hygon_umc_channel_enabled(pvt, i)) + continue; + if (hygon_f18h_m4h()) umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, i); else @@ -1778,6 +1795,9 @@ static void umc_read_base_mask(struct amd64_pvt *pvt) int cs, umc; for_each_umc(umc) { + if (!hygon_umc_channel_enabled(pvt, umc)) + continue; + if (hygon_f18h_m4h()) umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, umc); else @@ -3246,6 +3266,9 @@ static void umc_read_mc_regs(struct amd64_pvt *pvt) /* Read registers from each UMC */ for_each_umc(i) { + if (!hygon_umc_channel_enabled(pvt, i)) + continue; + if (hygon_f18h_m4h()) umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, i); else -- Gitee From f6ea03b613f9774f90c0ea5da762e828ecc5c484 Mon Sep 17 00:00:00 2001 From: fuhao Date: Wed, 19 Feb 2025 19:12:31 +0800 Subject: [PATCH 6/8] EDAC/amd64: Get correct memory type for Hygon family 18h model 10h Get the correct DDR memory types for Hygon family 18h model 10h. Signed-off-by: fuhao --- drivers/edac/amd64_edac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 42d7c1bc6582..f5877cf961de 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1906,7 +1906,9 @@ static void umc_determine_memory_type(struct amd64_pvt *pvt) * Check if the system supports the "DDR Type" field in UMC Config * and has DDR5 DIMMs in use. */ - if ((pvt->flags.zn_regs_v2 || hygon_f18h_m4h()) && + if ((pvt->flags.zn_regs_v2 || + hygon_f18h_m4h() || + hygon_f18h_m10h()) && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { if (umc->dimm_cfg & BIT(5)) umc->dram_type = MEM_LRDDR5; -- Gitee From dd47fece3d0e41b88282be82940ee46daa28db7c Mon Sep 17 00:00:00 2001 From: fuhao Date: Wed, 19 Feb 2025 19:13:57 +0800 Subject: [PATCH 7/8] EDAC/amd64: Get instance_id for Hygon family 18h model 10h The instance_id for Hygon family 18h model 10h is also get from IPID[23:20] and IPID[13]. Signed-off-by: fuhao --- drivers/edac/amd64_edac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index f5877cf961de..2286146e0ac7 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3189,7 +3189,8 @@ static void decode_umc_error(int node_id, struct mce *m) pvt->ops->get_err_info(m, &err); - if (hygon_f18h_m4h() && boot_cpu_data.x86_model >= 0x6) + if ((hygon_f18h_m4h() && boot_cpu_data.x86_model >= 0x6) || + hygon_f18h_m10h()) umc = (err.channel << 1) + ((m->ipid & BIT(13)) >> 13); else umc = err.channel; -- Gitee From 2617602a9c062afec68f54bdcdbe300dd6bf7ab0 Mon Sep 17 00:00:00 2001 From: fuhao Date: Thu, 20 Feb 2025 11:34:27 +0800 Subject: [PATCH 8/8] perf/x86/amd/core: Fix performance monitor for Hygon family 18h processor The commit 69fe5f17 ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") changed amd_f17h_perfmon_event_map to amd_zen1_perfmon_event_map which is limited to processors only with X86_FEATURE_ZEN1. This makes some event counters such as cache-misses and cache-references can not be tracked on Hygon family 18h processors. When using perf tool to monitor performance, the result will be listed as follows: 0 cache-miesses # 0.000 % of all cache refs 0 cache-references So add perfmon_event_map support for Hygon family 18h processors to fix the incorrect display. Fixes: 69fe5f17 ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") Signed-off-by: fuhao --- arch/x86/events/amd/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 4eacac39a501..f5e5906ae36c 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -296,6 +296,9 @@ static u64 amd_pmu_event_map(int hw_event) if (cpu_feature_enabled(X86_FEATURE_ZEN1)) return amd_zen1_perfmon_event_map[hw_event]; + if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return amd_zen1_perfmon_event_map[hw_event]; + return amd_perfmon_event_map[hw_event]; } -- Gitee