diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-27 09:19:35 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-27 09:19:35 -0800 |
commit | 30f5a75640998900d995e099e060e920e72790b2 (patch) | |
tree | 5c6ed5c5bf5a20dc2166e195d80fe79d68202277 /drivers/edac | |
parent | b62061b82a6e6783bed5f9052326694ba1418bba (diff) | |
parent | 86e9f9d60eb5e0c5d99ddf6b79f4d308d6453bd0 (diff) |
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov:
- Misc fixes to the MCE code all over the place, by Jan H. Schönherr.
- Initial support for AMD F19h and other cleanups to amd64_edac, by
Yazen Ghannam.
- Other small cleanups.
* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
EDAC/mce_amd: Make fam_ops static global
EDAC/amd64: Drop some family checks for newer systems
EDAC/amd64: Add family ops for Family 19h Models 00h-0Fh
x86/amd_nb: Add Family 19h PCI IDs
EDAC/mce_amd: Always load on SMCA systems
x86/MCE/AMD, EDAC/mce_amd: Add new Load Store unit McaType
x86/mce: Fix use of uninitialized MCE message string
x86/mce: Fix mce=nobootlog
x86/mce: Take action on UCNA/Deferred errors again
x86/mce: Remove mce_inject_log() in favor of mce_log()
x86/mce: Pass MCE message to mce_panic() on failed kernel recovery
x86/mce/therm_throt: Mark throttle_active_work() as __maybe_unused
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/amd64_edac.c | 62 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 3 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 105 |
3 files changed, 102 insertions, 68 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d2a6d3319650..9fbad908a854 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -214,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - if (pvt->fam == 0x17 || pvt->fam == 0x18) { + if (pvt->umc) { __f17h_set_scrubval(pvt, scrubval); } else if (pvt->fam == 0x15 && pvt->model == 0x60) { f15h_select_dct(pvt, 0); @@ -256,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) int i, retval = -EINVAL; u32 scrubval = 0; - switch (pvt->fam) { - case 0x15: - /* Erratum #505 */ - if (pvt->model < 0x10) - f15h_select_dct(pvt, 0); - - if (pvt->model == 0x60) - amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); - break; - - case 0x17: - case 0x18: + if (pvt->umc) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); if (scrubval & BIT(0)) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); @@ -276,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci) } else { scrubval = 0; } - break; + } else if (pvt->fam == 0x15) { + /* Erratum #505 */ + if (pvt->model < 0x10) + f15h_select_dct(pvt, 0); - default: + if (pvt->model == 0x60) + amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); + } else { amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); - break; } scrubval = scrubval & 0x001F; @@ -1055,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt) { u32 dram_ctrl, dcsm; + if (pvt->umc) { + if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) + pvt->dram_type = MEM_LRDDR4; + else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) + pvt->dram_type = MEM_RDDR4; + else + pvt->dram_type = MEM_DDR4; + return; + } + switch (pvt->fam) { case 0xf: if (pvt->ext_model >= K8_REV_F) @@ -1100,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt) case 0x16: goto ddr3; - case 0x17: - case 0x18: - if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) - pvt->dram_type = MEM_LRDDR4; - else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) - pvt->dram_type = MEM_RDDR4; - else - pvt->dram_type = MEM_DDR4; - return; - default: WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); pvt->dram_type = MEM_EMPTY; @@ -2336,6 +2329,16 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_addr_mask_to_cs_size, } }, + [F19_CPUS] = { + .ctl_name = "F19h", + .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6, + .max_mcs = 8, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, }; /* @@ -3368,6 +3371,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) family_types[F17_CPUS].ctl_name = "F18h"; break; + case 0x19: + fam_type = &family_types[F19_CPUS]; + pvt->ops = &family_types[F19_CPUS].ops; + family_types[F19_CPUS].ctl_name = "F19h"; + break; + default: amd64_err("Unsupported family!\n"); return NULL; @@ -3623,6 +3632,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 9be31688110b..abbf3c274d74 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -122,6 +122,8 @@ #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 +#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 +#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 /* * Function 1 - Address Map @@ -292,6 +294,7 @@ enum amd_families { F17_M10H_CPUS, F17_M30H_CPUS, F17_M70H_CPUS, + F19_CPUS, NUM_FAMILIES, }; diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ea622c6f3a39..ea980c556f2e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -6,7 +6,7 @@ #include "mce_amd.h" -static struct amd_decoder_ops *fam_ops; +static struct amd_decoder_ops fam_ops; static u8 xec_mask = 0xf; @@ -175,6 +175,33 @@ static const char * const smca_ls_mce_desc[] = { "L2 Fill Data error", }; +static const char * const smca_ls2_mce_desc[] = { + "An ECC error was detected on a data cache read by a probe or victimization", + "An ECC error or L2 poison was detected on a data cache read by a load", + "An ECC error was detected on a data cache read-modify-write by a store", + "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization", + "An ECC error or poison bit mismatch was detected on a tag read by a load", + "An ECC error or poison bit mismatch was detected on a tag read by a store", + "An ECC error was detected on an EMEM read by a load", + "An ECC error was detected on an EMEM read-modify-write by a store", + "A parity error was detected in an L1 TLB entry by any access", + "A parity error was detected in an L2 TLB entry by any access", + "A parity error was detected in a PWC entry by any access", + "A parity error was detected in an STQ entry by any access", + "A parity error was detected in an LDQ entry by any access", + "A parity error was detected in a MAB entry by any access", + "A parity error was detected in an SCB entry state field by any access", + "A parity error was detected in an SCB entry address field by any access", + "A parity error was detected in an SCB entry data field by any access", + "A parity error was detected in a WCB entry by any access", + "A poisoned line was detected in an SCB entry by any access", + "A SystemReadDataError error was reported on read data returned from L2 for a load", + "A SystemReadDataError error was reported on read data returned from L2 for an SCB store", + "A SystemReadDataError error was reported on read data returned from L2 for a WCB store", + "A hardware assertion error was reported", + "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access", +}; + static const char * const smca_if_mce_desc[] = { "Op Cache Microtag Probe Port Parity Error", "IC Microtag or Full Tag Multi-hit Error", @@ -378,6 +405,7 @@ struct smca_mce_desc { static struct smca_mce_desc smca_mce_descs[] = { [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, + [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) }, [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, @@ -555,7 +583,7 @@ static void decode_mc0_mce(struct mce *m) : (xec ? "multimatch" : "parity"))); return; } - } else if (fam_ops->mc0_mce(ec, xec)) + } else if (fam_ops.mc0_mce(ec, xec)) ; else pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); @@ -669,7 +697,7 @@ static void decode_mc1_mce(struct mce *m) pr_cont("Hardware Assert.\n"); else goto wrong_mc1_mce; - } else if (fam_ops->mc1_mce(ec, xec)) + } else if (fam_ops.mc1_mce(ec, xec)) ; else goto wrong_mc1_mce; @@ -803,7 +831,7 @@ static void decode_mc2_mce(struct mce *m) pr_emerg(HW_ERR "MC2 Error: "); - if (!fam_ops->mc2_mce(ec, xec)) + if (!fam_ops.mc2_mce(ec, xec)) pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); } @@ -1102,7 +1130,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (m->tsc) pr_emerg(HW_ERR "TSC: %llu\n", m->tsc); - if (!fam_ops) + /* Doesn't matter which member to test. */ + if (!fam_ops.mc0_mce) goto err_code; switch (m->bank) { @@ -1157,80 +1186,73 @@ static int __init mce_amd_init(void) c->x86_vendor != X86_VENDOR_HYGON) return -ENODEV; - fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); - if (!fam_ops) - return -ENOMEM; + if (boot_cpu_has(X86_FEATURE_SMCA)) { + xec_mask = 0x3f; + goto out; + } switch (c->x86) { case 0xf: - fam_ops->mc0_mce = k8_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = k8_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x10: - fam_ops->mc0_mce = f10h_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = f10h_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x11: - fam_ops->mc0_mce = k8_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = k8_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x12: - fam_ops->mc0_mce = f12h_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = f12h_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x14: - fam_ops->mc0_mce = cat_mc0_mce; - fam_ops->mc1_mce = cat_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = cat_mc0_mce; + fam_ops.mc1_mce = cat_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x15: xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f; - fam_ops->mc0_mce = f15h_mc0_mce; - fam_ops->mc1_mce = f15h_mc1_mce; - fam_ops->mc2_mce = f15h_mc2_mce; + fam_ops.mc0_mce = f15h_mc0_mce; + fam_ops.mc1_mce = f15h_mc1_mce; + fam_ops.mc2_mce = f15h_mc2_mce; break; case 0x16: xec_mask = 0x1f; - fam_ops->mc0_mce = cat_mc0_mce; - fam_ops->mc1_mce = cat_mc1_mce; - fam_ops->mc2_mce = f16h_mc2_mce; + fam_ops.mc0_mce = cat_mc0_mce; + fam_ops.mc1_mce = cat_mc1_mce; + fam_ops.mc2_mce = f16h_mc2_mce; break; case 0x17: case 0x18: - xec_mask = 0x3f; - if (!boot_cpu_has(X86_FEATURE_SMCA)) { - printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); - goto err_out; - } - break; + pr_warn("Decoding supported only on Scalable MCA processors.\n"); + return -EINVAL; default: printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); - goto err_out; + return -EINVAL; } +out: pr_info("MCE: In-kernel MCE decoding enabled.\n"); mce_register_decode_chain(&amd_mce_dec_nb); return 0; - -err_out: - kfree(fam_ops); - fam_ops = NULL; - return -EINVAL; } early_initcall(mce_amd_init); @@ -1238,7 +1260,6 @@ early_initcall(mce_amd_init); static void __exit mce_amd_exit(void) { mce_unregister_decode_chain(&amd_mce_dec_nb); - kfree(fam_ops); } MODULE_DESCRIPTION("AMD MCE decoder"); |