diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 7a9cdd13c8895b505a09c0a1cf2a931ff357ae50..75262ee011674f72b045b94eb1faeafbe7b58d90 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -47,10 +47,6 @@ readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \ (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \ (i) * (m)->chan_mmio_sz) -#define I10NM_GET_AMAP(m, i) \ - readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \ - (res_cfg->type == GNR ? 0xc14 : 0x20814)) + \ - (i) * (m)->chan_mmio_sz) #define I10NM_GET_REG32(m, i, offset) \ readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) #define I10NM_GET_REG64(m, i, offset) \ @@ -66,16 +62,13 @@ ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) #define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000 +#define I10NM_GNR_D_IMC_MMIO_OFFSET 0x206000 #define I10NM_GNR_IMC_MMIO_SIZE 0x4000 #define I10NM_HBM_IMC_MMIO_SIZE 0x9000 #define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24) #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) -#define RETRY_RD_ERR_LOG_UC BIT(1) -#define RETRY_RD_ERR_LOG_NOOVER BIT(14) -#define RETRY_RD_ERR_LOG_EN BIT(15) -#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) #define I10NM_MAX_SAD 16 @@ -89,227 +82,321 @@ static int retry_rd_err_log; static int decoding_via_mca; static bool mem_cfg_2lm; -static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; -static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; -static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; -static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; -static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; -static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; -static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; -static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; -static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; - -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, - u32 *offsets_scrub, u32 *offsets_demand, - u32 *offsets_demand2) +static struct reg_rrl icx_reg_rrl_ddr = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, + }, + .widths = {4, 4, 4, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_ddr = { + .set_num = 3, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND}, + .offsets = { + {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, + {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch0 = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, + {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch1 = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, + {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl gnr_reg_rrl_ddr = { + .set_num = 4, + .reg_num = 6, + .modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0}, + {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8}, + {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0}, + {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(14), + .noover_mask = BIT(15), + .en_mask = BIT(12), + + .cecnt_num = 8, + .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c}, + .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4}, +}; + +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) { - u32 s, d, d2; + switch (width) { + case 4: + return I10NM_GET_REG32(imc, chan, offset); + case 8: + return I10NM_GET_REG64(imc, chan, offset); + default: + i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width); + return 0; + } +} + +static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val) +{ + switch (width) { + case 4: + return I10NM_SET_REG32(imc, chan, offset, (u32)val); + default: + i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width); + } +} + +static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + int rrl_set, bool enable, u32 *rrl_ctl) +{ + enum rrl_mode mode = rrl->modes[rrl_set]; + u32 offset = rrl->offsets[rrl_set][0], v; + u8 width = rrl->widths[0]; + bool first, scrub; + + /* First or last read error. */ + first = (mode == FRE_SCRUB || mode == FRE_DEMAND); + /* Patrol scrub or on-demand read error. */ + scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB); - s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); - d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); - if (offsets_demand2) - d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); + v = read_imc_reg(imc, chan, offset, width); if (enable) { - /* Save default configurations */ - imc->chan[chan].retry_rd_err_log_s = s; - imc->chan[chan].retry_rd_err_log_d = d; - if (offsets_demand2) - imc->chan[chan].retry_rd_err_log_d2 = d2; - - s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; - s |= RETRY_RD_ERR_LOG_EN; - d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; - d |= RETRY_RD_ERR_LOG_EN; - - if (offsets_demand2) { - d2 &= ~RETRY_RD_ERR_LOG_UC; - d2 |= RETRY_RD_ERR_LOG_NOOVER; - d2 |= RETRY_RD_ERR_LOG_EN; - } + /* Save default configurations. */ + *rrl_ctl = v; + v &= ~rrl->uc_mask; + + if (first) + v |= rrl->noover_mask; + else + v &= ~rrl->noover_mask; + + if (scrub) + v |= rrl->en_patspr_mask; + else + v &= ~rrl->en_patspr_mask; + + v |= rrl->en_mask; } else { - /* Restore default configurations */ - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) - s |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) - s |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) - s &= ~RETRY_RD_ERR_LOG_EN; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) - d |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) - d |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) - d &= ~RETRY_RD_ERR_LOG_EN; - - if (offsets_demand2) { - if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) - d2 |= RETRY_RD_ERR_LOG_UC; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) - d2 &= ~RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) - d2 &= ~RETRY_RD_ERR_LOG_EN; + /* Restore default configurations. */ + if (*rrl_ctl & rrl->uc_mask) + v |= rrl->uc_mask; + + if (first) { + if (!(*rrl_ctl & rrl->noover_mask)) + v &= ~rrl->noover_mask; + } else { + if (*rrl_ctl & rrl->noover_mask) + v |= rrl->noover_mask; + } + + if (scrub) { + if (!(*rrl_ctl & rrl->en_patspr_mask)) + v &= ~rrl->en_patspr_mask; + } else { + if (*rrl_ctl & rrl->en_patspr_mask) + v |= rrl->en_patspr_mask; } + + if (!(*rrl_ctl & rrl->en_mask)) + v &= ~rrl->en_mask; } - I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); - I10NM_SET_REG32(imc, chan, offsets_demand[0], d); - if (offsets_demand2) - I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); + write_imc_reg(imc, chan, offset, width, v); +} + +static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + bool enable, u32 *rrl_ctl) +{ + int i; + + for (i = 0; i < rrl->set_num; i++) + enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i); +} + +static void enable_rrls_ddr(struct skx_imc *imc, bool enable) +{ + struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr; + int i, chan_num = res_cfg->ddr_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase) + return; + + for (i = 0; i < chan_num; i++) + enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]); +} + +static void enable_rrls_hbm(struct skx_imc *imc, bool enable) +{ + struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm; + int i, chan_num = res_cfg->hbm_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1]) + return; + + for (i = 0; i < chan_num; i++) { + enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]); + enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]); + } } static void enable_retry_rd_err_log(bool enable) { - int i, j, imc_num, chan_num; - struct skx_imc *imc; struct skx_dev *d; + int i, imc_num; edac_dbg(2, "\n"); list_for_each_entry(d, i10nm_edac_list, list) { imc_num = res_cfg->ddr_imc_num; - chan_num = res_cfg->ddr_chan_num; - - for (i = 0; i < imc_num; i++) { - imc = &d->imc[i]; - if (!imc->mbase) - continue; - - for (j = 0; j < chan_num; j++) - __enable_retry_rd_err_log(imc, j, enable, - res_cfg->offsets_scrub, - res_cfg->offsets_demand, - res_cfg->offsets_demand2); - } + for (i = 0; i < imc_num; i++) + enable_rrls_ddr(&d->imc[i], enable); imc_num += res_cfg->hbm_imc_num; - chan_num = res_cfg->hbm_chan_num; - - for (; i < imc_num; i++) { - imc = &d->imc[i]; - if (!imc->mbase || !imc->hbm_mc) - continue; - - for (j = 0; j < chan_num; j++) { - __enable_retry_rd_err_log(imc, j, enable, - res_cfg->offsets_scrub_hbm0, - res_cfg->offsets_demand_hbm0, - NULL); - __enable_retry_rd_err_log(imc, j, enable, - res_cfg->offsets_scrub_hbm1, - res_cfg->offsets_demand_hbm1, - NULL); - } - } + for (; i < imc_num; i++) + enable_rrls_hbm(&d->imc[i], enable); } } static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, int len, bool scrub_err) { + int i, j, n, ch = res->channel, pch = res->cs & 1; struct skx_imc *imc = &res->dev->imc[res->imc]; - u32 log0, log1, log2, log3, log4; - u32 corr0, corr1, corr2, corr3; - u32 lxg0, lxg1, lxg3, lxg4; - u32 *xffsets = NULL; - u64 log2a, log5; - u64 lxg2a, lxg5; - u32 *offsets; - int n, pch; + u64 log, corr, status_mask; + struct reg_rrl *rrl; + bool scrub; + u32 offset; + u8 width; if (!imc->mbase) return; - if (imc->hbm_mc) { - pch = res->cs & 1; + rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr; - if (pch) - offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : - res_cfg->offsets_demand_hbm1; - else - offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : - res_cfg->offsets_demand_hbm0; - } else { - if (scrub_err) { - offsets = res_cfg->offsets_scrub; - } else { - offsets = res_cfg->offsets_demand; - xffsets = res_cfg->offsets_demand2; - } - } + if (!rrl) + return; - log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); - log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); - log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]); - log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); - log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); - - if (xffsets) { - lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); - lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); - lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); - lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); - lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); - } + status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; - if (res_cfg->type == SPR) { - log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx", - log0, log1, log2a, log3, log4, log5); + n = scnprintf(msg, len, " retry_rd_err_log["); + for (i = 0; i < rrl->set_num; i++) { + scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); + if (scrub_err != scrub) + continue; - if (len - n > 0) { - if (xffsets) { - lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); - n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]", - lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); - } else { - n += snprintf(msg + n, len - n, "]"); - } - } - } else { - log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", - log0, log1, log2, log3, log4, log5); - } + for (j = 0; j < rrl->reg_num && len - n > 0; j++) { + offset = rrl->offsets[i][j]; + width = rrl->widths[j]; + log = read_imc_reg(imc, ch, offset, width); - if (imc->hbm_mc) { - if (pch) { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); - } else { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); + if (width == 4) + n += scnprintf(msg + n, len - n, "%.8llx ", log); + else + n += scnprintf(msg + n, len - n, "%.16llx ", log); + + /* Clear RRL status if RRL in Linux control mode. */ + if (retry_rd_err_log == 2 && !j && (log & status_mask)) + write_imc_reg(imc, ch, offset, width, log & ~status_mask); } - } else { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); } - if (len - n > 0) - snprintf(msg + n, len - n, - " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", - corr0 & 0xffff, corr0 >> 16, - corr1 & 0xffff, corr1 >> 16, - corr2 & 0xffff, corr2 >> 16, - corr3 & 0xffff, corr3 >> 16); - - /* Clear status bits */ - if (retry_rd_err_log == 2) { - if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { - log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + /* Move back one space. */ + n--; + n += scnprintf(msg + n, len - n, "]"); + + if (len - n > 0) { + n += scnprintf(msg + n, len - n, " correrrcnt["); + for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { + offset = rrl->cecnt_offsets[i]; + width = rrl->cecnt_widths[i]; + corr = read_imc_reg(imc, ch, offset, width); + + /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ + if (res_cfg->type <= SPR) { + n += scnprintf(msg + n, len - n, "%.4llx %.4llx ", + corr & 0xffff, corr >> 16); + } else { + /* CPUs {GNR} encode one counter per CORRERRCNT register. */ + if (width == 4) + n += scnprintf(msg + n, len - n, "%.8llx ", corr); + else + n += scnprintf(msg + n, len - n, "%.16llx ", corr); + } } - if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { - lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); - } + /* Move back one space. */ + n--; + n += scnprintf(msg + n, len - n, "]"); } } @@ -605,6 +692,16 @@ static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *phy return NULL; } +static u32 get_gnr_imc_mmio_offset(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == INTEL_FAM6_GRANITERAPIDS_D) + return I10NM_GNR_D_IMC_MMIO_OFFSET; + + return I10NM_GNR_IMC_MMIO_OFFSET; +} + /** * get_ddr_munit() - Get the resource of the i-th DDR memory controller. * @@ -633,7 +730,7 @@ static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsi return NULL; *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) + - I10NM_GNR_IMC_MMIO_OFFSET + + get_gnr_imc_mmio_offset() + physical_idx * I10NM_GNR_IMC_MMIO_SIZE; *size = I10NM_GNR_IMC_MMIO_SIZE; @@ -755,6 +852,8 @@ static int i10nm_get_ddr_munits(void) continue; } else { d->imc[lmc].mdev = mdev; + if (res_cfg->type == SPR) + skx_set_mc_mapping(d, i, lmc); lmc++; } } @@ -872,8 +971,7 @@ static struct res_config i10nm_cfg0 = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x108, - .offsets_scrub = offsets_scrub_icx, - .offsets_demand = offsets_demand_icx, + .reg_rrl_ddr = &icx_reg_rrl_ddr, }; static struct res_config i10nm_cfg1 = { @@ -891,8 +989,7 @@ static struct res_config i10nm_cfg1 = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x108, - .offsets_scrub = offsets_scrub_icx, - .offsets_demand = offsets_demand_icx, + .reg_rrl_ddr = &icx_reg_rrl_ddr, }; static struct res_config spr_cfg = { @@ -915,13 +1012,9 @@ static struct res_config spr_cfg = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x300, - .offsets_scrub = offsets_scrub_spr, - .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, - .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, - .offsets_demand = offsets_demand_spr, - .offsets_demand2 = offsets_demand2_spr, - .offsets_demand_hbm0 = offsets_demand_spr_hbm0, - .offsets_demand_hbm1 = offsets_demand_spr_hbm1, + .reg_rrl_ddr = &spr_reg_rrl_ddr, + .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0, + .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1, }; static struct res_config gnr_cfg = { @@ -939,6 +1032,7 @@ static struct res_config gnr_cfg = { .uracu_bdf = {0, 0, 1}, .ddr_mdev_bdf = {0, 5, 1}, .sad_all_offset = 0x300, + .reg_rrl_ddr = &gnr_reg_rrl_ddr, }; static const struct x86_cpu_id i10nm_cpuids[] = { @@ -950,7 +1044,10 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_D, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_DARKMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); @@ -970,7 +1067,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, { struct skx_pvt *pvt = mci->pvt_info; struct skx_imc *imc = pvt->imc; - u32 mtr, amap, mcddrtcfg = 0; + u32 mtr, mcddrtcfg = 0; struct dimm_info *dimm; int i, j, ndimms; @@ -979,7 +1076,6 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, continue; ndimms = 0; - amap = I10NM_GET_AMAP(imc, i); if (res_cfg->type != GNR) mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); @@ -991,7 +1087,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, mtr, mcddrtcfg, imc->mc, i, j); if (IS_DIMM_PRESENT(mtr)) - ndimms += skx_get_dimm_info(mtr, 0, amap, dimm, + ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, imc, i, j, cfg); else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) ndimms += skx_get_nvdimm_info(dimm, imc, i, j, @@ -1014,7 +1110,7 @@ static struct notifier_block i10nm_mce_dec = { static int __init i10nm_init(void) { - u8 mc = 0, src_id = 0, node_id = 0; + u8 mc = 0, src_id = 0; const struct x86_cpu_id *id; struct res_config *cfg; const char *owner; @@ -1071,19 +1167,14 @@ static int __init i10nm_init(void) if (rc < 0) goto fail; - rc = skx_get_node_id(d, &node_id); - if (rc < 0) - goto fail; - - edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id); + edac_dbg(2, "src_id = %d\n", src_id); for (i = 0; i < imc_num; i++) { if (!d->imc[i].mdev) continue; d->imc[i].mc = mc++; d->imc[i].lmc = i; - d->imc[i].src_id = src_id; - d->imc[i].node_id = node_id; + d->imc[i].src_id = src_id; if (d->imc[i].hbm_mc) { d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; d->imc[i].num_channels = cfg->hbm_chan_num; @@ -1110,7 +1201,7 @@ static int __init i10nm_init(void) mce_register_decode_chain(&i10nm_mce_dec); skx_setup_debug("i10nm_test"); - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); if (retry_rd_err_log == 2) enable_retry_rd_err_log(true); @@ -1130,7 +1221,7 @@ static void __exit i10nm_exit(void) { edac_dbg(2, "\n"); - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { skx_set_decode(NULL, NULL); if (retry_rd_err_log == 2) enable_retry_rd_err_log(false); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 0280518a51a9034f73048952b28973efd257641d..ad410444af20f728ef83a1a5eebdba008604cec2 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -600,7 +600,7 @@ static int __init skx_init(void) const struct munit *m; const char *owner; int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8}; - u8 mc = 0, src_id, node_id; + u8 mc = 0, src_id; struct skx_dev *d; edac_dbg(2, "\n"); @@ -647,15 +647,12 @@ static int __init skx_init(void) rc = skx_get_src_id(d, 0xf0, &src_id); if (rc < 0) goto fail; - rc = skx_get_node_id(d, &node_id); - if (rc < 0) - goto fail; - edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); + + edac_dbg(2, "src_id = %d\n", src_id); for (i = 0; i < SKX_NUM_IMC; i++) { d->imc[i].mc = mc++; d->imc[i].lmc = i; d->imc[i].src_id = src_id; - d->imc[i].node_id = node_id; rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev, "Skylake Socket", EDAC_MOD_STR, skx_get_dimm_config, cfg); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 3d912671d4a38cee88fb4ed6995c9bd0de0278eb..0bd9dd2506b1475bd891423fea1a42a6c9445a1c 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "edac_module.h" #include "skx_common.h" @@ -121,6 +122,37 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); +static void skx_init_mc_mapping(struct skx_dev *d) +{ + int i; + + /* + * By default, the BIOS presents all memory controllers within each + * socket to the EDAC driver. The physical indices are the same as + * the logical indices of the memory controllers enumerated by the + * EDAC driver. + */ + for (i = 0; i < NUM_IMC; i++) + d->mc_mapping[i] = i; +} + +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) +{ + edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + d->mc_mapping[pmc] = lmc; +} +EXPORT_SYMBOL_GPL(skx_set_mc_mapping); + +static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc) +{ + edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, d->mc_mapping[pmc]); + + return d->mc_mapping[pmc]; +} + static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) { struct skx_dev *d; @@ -188,6 +220,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) return false; } + res->imc = skx_get_mc_mapping(d, res->imc); + for (i = 0; i < adxl_component_count; i++) { if (adxl_values[i] == ~0x0ull) continue; @@ -222,33 +256,51 @@ void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) } EXPORT_SYMBOL_GPL(skx_set_decode); -int skx_get_src_id(struct skx_dev *d, int off, u8 *id) +static int skx_get_pkg_id(struct skx_dev *d, u8 *id) { - u32 reg; + int node; + int cpu; - if (pci_read_config_dword(d->util_all, off, ®)) { - skx_printk(KERN_ERR, "Failed to read src id\n"); - return -ENODEV; + node = pcibus_to_node(d->util_all->bus); + if (numa_valid_node(node)) { + for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) { + *id = topology_physical_package_id(cpu); + return 0; + } + } } - *id = GET_BITFIELD(reg, 12, 14); - return 0; + skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n"); + return -ENODEV; } -EXPORT_SYMBOL_GPL(skx_get_src_id); -int skx_get_node_id(struct skx_dev *d, u8 *id) +int skx_get_src_id(struct skx_dev *d, int off, u8 *id) { u32 reg; - if (pci_read_config_dword(d->util_all, 0xf4, ®)) { - skx_printk(KERN_ERR, "Failed to read node id\n"); + /* + * The 3-bit source IDs in PCI configuration space registers are limited + * to 8 unique IDs, and each ID is local to a UPI/QPI domain. + * + * Source IDs cannot be used to map devices to sockets on UV systems + * because they can exceed 8 sockets and have multiple UPI/QPI domains + * with identical, repeating source IDs. + */ + if (is_uv_system()) + return skx_get_pkg_id(d, id); + + if (pci_read_config_dword(d->util_all, off, ®)) { + skx_printk(KERN_ERR, "Failed to read src id\n"); return -ENODEV; } - *id = GET_BITFIELD(reg, 0, 2); + *id = GET_BITFIELD(reg, 12, 14); return 0; } -EXPORT_SYMBOL_GPL(skx_get_node_id); +EXPORT_SYMBOL_GPL(skx_get_src_id); static int get_width(u32 mtr) { @@ -308,6 +360,8 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) d->bus[0], d->bus[1], d->bus[2], d->bus[3]); list_add_tail(&d->list, &dev_edac_list); prev = pdev; + + skx_init_mc_mapping(d); } if (list) @@ -387,7 +441,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, if (imc->hbm_mc) { banks = 32; mtype = MEM_HBM2; - } else if (cfg->support_ddr5 && (amap & 0x8)) { + } else if (cfg->support_ddr5) { banks = 32; mtype = MEM_DDR5; } else { @@ -508,7 +562,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, pvt->imc = imc; mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name, - imc->node_id, imc->lmc); + imc->src_id, imc->lmc); if (!mci->ctl_name) { rc = -ENOMEM; goto fail0; @@ -634,12 +688,12 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, } } if (res->decoded_by_adxl) { - len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, adxl_msg); } else { - len = snprintf(skx_msg, MSG_SIZE, + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 85c978b612362b1e6a15fa9720497458864e446f..15cb774d88ece0f4cff2be0978432f168eddef5b 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -80,6 +80,47 @@ */ #define MCACOD_EXT_MEM_ERR 0x280 +/* Max RRL register sets per {,sub-,pseudo-}channel. */ +#define NUM_RRL_SET 4 +/* Max RRL registers per set. */ +#define NUM_RRL_REG 6 +/* Max correctable error count registers. */ +#define NUM_CECNT_REG 8 + +/* Modes of RRL register set. */ +enum rrl_mode { + /* Last read error from patrol scrub. */ + LRE_SCRUB, + /* Last read error from demand. */ + LRE_DEMAND, + /* First read error from patrol scrub. */ + FRE_SCRUB, + /* First read error from demand. */ + FRE_DEMAND, +}; + +/* RRL registers per {,sub-,pseudo-}channel. */ +struct reg_rrl { + /* RRL register parts. */ + int set_num, reg_num; + enum rrl_mode modes[NUM_RRL_SET]; + u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; + /* RRL register widths in byte per set. */ + u8 widths[NUM_RRL_REG]; + /* RRL control bits of the first register per set. */ + u32 v_mask; + u32 uc_mask; + u32 over_mask; + u32 en_patspr_mask; + u32 noover_mask; + u32 en_mask; + + /* CORRERRCNT register parts. */ + int cecnt_num; + u32 cecnt_offsets[NUM_CECNT_REG]; + u8 cecnt_widths[NUM_CECNT_REG]; +}; + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two @@ -94,6 +135,16 @@ struct skx_dev { struct pci_dev *uracu; /* for i10nm CPU */ struct pci_dev *pcu_cr3; /* for HBM memory detection */ u32 mcroute; + /* + * Some server BIOS may hide certain memory controllers, and the + * EDAC driver skips those hidden memory controllers. However, the + * ADXL still decodes memory error address using physical memory + * controller indices. The mapping table is used to convert the + * physical indices (reported by ADXL) to the logical indices + * (used the EDAC driver) of present memory controllers during the + * error handling process. + */ + u8 mc_mapping[NUM_IMC]; struct skx_imc { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ @@ -104,13 +155,15 @@ struct skx_dev { bool hbm_mc; u8 mc; /* system wide mc# */ u8 lmc; /* socket relative mc# */ - u8 src_id, node_id; + u8 src_id; struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; - u32 retry_rd_err_log_s; - u32 retry_rd_err_log_d; - u32 retry_rd_err_log_d2; + /* + * Two groups of RRL control registers per channel to save default RRL + * settings of two {sub-,pseudo-}channels in Linux RRL control mode. + */ + u32 rrl_ctl[2][NUM_RRL_SET]; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; @@ -223,14 +276,10 @@ struct res_config { /* HBM mdev device BDF */ struct pci_bdf hbm_mdev_bdf; int sad_all_offset; - /* Offsets of retry_rd_err_log registers */ - u32 *offsets_scrub; - u32 *offsets_scrub_hbm0; - u32 *offsets_scrub_hbm1; - u32 *offsets_demand; - u32 *offsets_demand2; - u32 *offsets_demand_hbm0; - u32 *offsets_demand_hbm1; + /* RRL register sets per DDR channel */ + struct reg_rrl *reg_rrl_ddr; + /* RRL register sets per HBM channel */ + struct reg_rrl *reg_rrl_hbm[2]; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, @@ -243,9 +292,9 @@ void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); void skx_set_res_cfg(struct res_config *cfg); +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); -int skx_get_node_id(struct skx_dev *d, u8 *id); int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); diff --git a/include/linux/numa.h b/include/linux/numa.h index fdcd888f70cd3d81a0ced36dcdc83419c4f1311d..2667ad307f2420e4cf6f49d271245fd2fe760fbd 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -13,6 +13,11 @@ #define NUMA_NO_NODE (-1) +static inline bool numa_valid_node(int nid) +{ + return nid >= 0 && nid < MAX_NUMNODES; +} + /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO #define __initdata_or_meminfo diff --git a/mm/memblock.c b/mm/memblock.c index 94008ef55a827eff8f864d6f45ac7da3e48a19e1..d3ba47f8c988c85bea0ee73993842e53288b4d28 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -965,7 +965,7 @@ static bool should_skip_region(struct memblock_type *type, return false; /* only memory regions are associated with nodes, check it */ - if (nid != NUMA_NO_NODE && nid != m_nid) + if (numa_valid_node(nid) && nid != m_nid) return true; /* skip hotpluggable memory regions if needed */ @@ -1022,10 +1022,6 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - for (; idx_a < type_a->cnt; idx_a++) { struct memblock_region *m = &type_a->regions[idx_a]; @@ -1119,9 +1115,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (*idx == (u64)ULLONG_MAX) { idx_a = type_a->cnt - 1; if (type_b != NULL) @@ -1207,7 +1200,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) continue; - if (nid == MAX_NUMNODES || nid == r_nid) + if (!numa_valid_node(nid) || nid == r_nid) break; } if (*idx >= type->cnt) { @@ -1353,9 +1346,6 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, enum memblock_flags flags = choose_memblock_flags(); phys_addr_t found; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (!align) { /* Can't use WARNs this early in boot on powerpc */ dump_stack(); @@ -1368,7 +1358,7 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, if (found && !memblock_reserve(found, size)) goto done; - if (nid != NUMA_NO_NODE && !exact_nid) { + if (numa_valid_node(nid) && !exact_nid) { found = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, flags); @@ -1969,7 +1959,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) end = base + size - 1; flags = rgn->flags; #ifdef CONFIG_NEED_MULTIPLE_NODES - if (memblock_get_region_node(rgn) != MAX_NUMNODES) + if (numa_valid_node(memblock_get_region_node(rgn))) snprintf(nid_buf, sizeof(nid_buf), " on node %d", memblock_get_region_node(rgn)); #endif