summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-27 09:19:35 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-27 09:19:35 -0800
commit30f5a75640998900d995e099e060e920e72790b2 (patch)
tree5c6ed5c5bf5a20dc2166e195d80fe79d68202277 /drivers/edac
parentb62061b82a6e6783bed5f9052326694ba1418bba (diff)
parent86e9f9d60eb5e0c5d99ddf6b79f4d308d6453bd0 (diff)
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov: - Misc fixes to the MCE code all over the place, by Jan H. Schönherr. - Initial support for AMD F19h and other cleanups to amd64_edac, by Yazen Ghannam. - Other small cleanups. * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: EDAC/mce_amd: Make fam_ops static global EDAC/amd64: Drop some family checks for newer systems EDAC/amd64: Add family ops for Family 19h Models 00h-0Fh x86/amd_nb: Add Family 19h PCI IDs EDAC/mce_amd: Always load on SMCA systems x86/MCE/AMD, EDAC/mce_amd: Add new Load Store unit McaType x86/mce: Fix use of uninitialized MCE message string x86/mce: Fix mce=nobootlog x86/mce: Take action on UCNA/Deferred errors again x86/mce: Remove mce_inject_log() in favor of mce_log() x86/mce: Pass MCE message to mce_panic() on failed kernel recovery x86/mce/therm_throt: Mark throttle_active_work() as __maybe_unused
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/amd64_edac.c62
-rw-r--r--drivers/edac/amd64_edac.h3
-rw-r--r--drivers/edac/mce_amd.c105
3 files changed, 102 insertions, 68 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index d2a6d3319650..9fbad908a854 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -214,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
scrubval = scrubrates[i].scrubval;
- if (pvt->fam == 0x17 || pvt->fam == 0x18) {
+ if (pvt->umc) {
__f17h_set_scrubval(pvt, scrubval);
} else if (pvt->fam == 0x15 && pvt->model == 0x60) {
f15h_select_dct(pvt, 0);
@@ -256,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
int i, retval = -EINVAL;
u32 scrubval = 0;
- switch (pvt->fam) {
- case 0x15:
- /* Erratum #505 */
- if (pvt->model < 0x10)
- f15h_select_dct(pvt, 0);
-
- if (pvt->model == 0x60)
- amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
- break;
-
- case 0x17:
- case 0x18:
+ if (pvt->umc) {
amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
if (scrubval & BIT(0)) {
amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
@@ -276,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
} else {
scrubval = 0;
}
- break;
+ } else if (pvt->fam == 0x15) {
+ /* Erratum #505 */
+ if (pvt->model < 0x10)
+ f15h_select_dct(pvt, 0);
- default:
+ if (pvt->model == 0x60)
+ amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
+ } else {
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
- break;
}
scrubval = scrubval & 0x001F;
@@ -1055,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt)
{
u32 dram_ctrl, dcsm;
+ if (pvt->umc) {
+ if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
+ pvt->dram_type = MEM_LRDDR4;
+ else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
+ pvt->dram_type = MEM_RDDR4;
+ else
+ pvt->dram_type = MEM_DDR4;
+ return;
+ }
+
switch (pvt->fam) {
case 0xf:
if (pvt->ext_model >= K8_REV_F)
@@ -1100,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt)
case 0x16:
goto ddr3;
- case 0x17:
- case 0x18:
- if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
- pvt->dram_type = MEM_LRDDR4;
- else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
- pvt->dram_type = MEM_RDDR4;
- else
- pvt->dram_type = MEM_DDR4;
- return;
-
default:
WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
pvt->dram_type = MEM_EMPTY;
@@ -2336,6 +2329,16 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
+ [F19_CPUS] = {
+ .ctl_name = "F19h",
+ .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6,
+ .max_mcs = 8,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ }
+ },
};
/*
@@ -3368,6 +3371,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
family_types[F17_CPUS].ctl_name = "F18h";
break;
+ case 0x19:
+ fam_type = &family_types[F19_CPUS];
+ pvt->ops = &family_types[F19_CPUS].ops;
+ family_types[F19_CPUS].ctl_name = "F19h";
+ break;
+
default:
amd64_err("Unsupported family!\n");
return NULL;
@@ -3623,6 +3632,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
{ X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
+ { X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ }
};
MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 9be31688110b..abbf3c274d74 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -122,6 +122,8 @@
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446
+#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650
+#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656
/*
* Function 1 - Address Map
@@ -292,6 +294,7 @@ enum amd_families {
F17_M10H_CPUS,
F17_M30H_CPUS,
F17_M70H_CPUS,
+ F19_CPUS,
NUM_FAMILIES,
};
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea622c6f3a39..ea980c556f2e 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -6,7 +6,7 @@
#include "mce_amd.h"
-static struct amd_decoder_ops *fam_ops;
+static struct amd_decoder_ops fam_ops;
static u8 xec_mask = 0xf;
@@ -175,6 +175,33 @@ static const char * const smca_ls_mce_desc[] = {
"L2 Fill Data error",
};
+static const char * const smca_ls2_mce_desc[] = {
+ "An ECC error was detected on a data cache read by a probe or victimization",
+ "An ECC error or L2 poison was detected on a data cache read by a load",
+ "An ECC error was detected on a data cache read-modify-write by a store",
+ "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization",
+ "An ECC error or poison bit mismatch was detected on a tag read by a load",
+ "An ECC error or poison bit mismatch was detected on a tag read by a store",
+ "An ECC error was detected on an EMEM read by a load",
+ "An ECC error was detected on an EMEM read-modify-write by a store",
+ "A parity error was detected in an L1 TLB entry by any access",
+ "A parity error was detected in an L2 TLB entry by any access",
+ "A parity error was detected in a PWC entry by any access",
+ "A parity error was detected in an STQ entry by any access",
+ "A parity error was detected in an LDQ entry by any access",
+ "A parity error was detected in a MAB entry by any access",
+ "A parity error was detected in an SCB entry state field by any access",
+ "A parity error was detected in an SCB entry address field by any access",
+ "A parity error was detected in an SCB entry data field by any access",
+ "A parity error was detected in a WCB entry by any access",
+ "A poisoned line was detected in an SCB entry by any access",
+ "A SystemReadDataError error was reported on read data returned from L2 for a load",
+ "A SystemReadDataError error was reported on read data returned from L2 for an SCB store",
+ "A SystemReadDataError error was reported on read data returned from L2 for a WCB store",
+ "A hardware assertion error was reported",
+ "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access",
+};
+
static const char * const smca_if_mce_desc[] = {
"Op Cache Microtag Probe Port Parity Error",
"IC Microtag or Full Tag Multi-hit Error",
@@ -378,6 +405,7 @@ struct smca_mce_desc {
static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
+ [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) },
[SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
[SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
[SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
@@ -555,7 +583,7 @@ static void decode_mc0_mce(struct mce *m)
: (xec ? "multimatch" : "parity")));
return;
}
- } else if (fam_ops->mc0_mce(ec, xec))
+ } else if (fam_ops.mc0_mce(ec, xec))
;
else
pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
@@ -669,7 +697,7 @@ static void decode_mc1_mce(struct mce *m)
pr_cont("Hardware Assert.\n");
else
goto wrong_mc1_mce;
- } else if (fam_ops->mc1_mce(ec, xec))
+ } else if (fam_ops.mc1_mce(ec, xec))
;
else
goto wrong_mc1_mce;
@@ -803,7 +831,7 @@ static void decode_mc2_mce(struct mce *m)
pr_emerg(HW_ERR "MC2 Error: ");
- if (!fam_ops->mc2_mce(ec, xec))
+ if (!fam_ops.mc2_mce(ec, xec))
pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
}
@@ -1102,7 +1130,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (m->tsc)
pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
- if (!fam_ops)
+ /* Doesn't matter which member to test. */
+ if (!fam_ops.mc0_mce)
goto err_code;
switch (m->bank) {
@@ -1157,80 +1186,73 @@ static int __init mce_amd_init(void)
c->x86_vendor != X86_VENDOR_HYGON)
return -ENODEV;
- fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
- if (!fam_ops)
- return -ENOMEM;
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
+ xec_mask = 0x3f;
+ goto out;
+ }
switch (c->x86) {
case 0xf:
- fam_ops->mc0_mce = k8_mc0_mce;
- fam_ops->mc1_mce = k8_mc1_mce;
- fam_ops->mc2_mce = k8_mc2_mce;
+ fam_ops.mc0_mce = k8_mc0_mce;
+ fam_ops.mc1_mce = k8_mc1_mce;
+ fam_ops.mc2_mce = k8_mc2_mce;
break;
case 0x10:
- fam_ops->mc0_mce = f10h_mc0_mce;
- fam_ops->mc1_mce = k8_mc1_mce;
- fam_ops->mc2_mce = k8_mc2_mce;
+ fam_ops.mc0_mce = f10h_mc0_mce;
+ fam_ops.mc1_mce = k8_mc1_mce;
+ fam_ops.mc2_mce = k8_mc2_mce;
break;
case 0x11:
- fam_ops->mc0_mce = k8_mc0_mce;
- fam_ops->mc1_mce = k8_mc1_mce;
- fam_ops->mc2_mce = k8_mc2_mce;
+ fam_ops.mc0_mce = k8_mc0_mce;
+ fam_ops.mc1_mce = k8_mc1_mce;
+ fam_ops.mc2_mce = k8_mc2_mce;
break;
case 0x12:
- fam_ops->mc0_mce = f12h_mc0_mce;
- fam_ops->mc1_mce = k8_mc1_mce;
- fam_ops->mc2_mce = k8_mc2_mce;
+ fam_ops.mc0_mce = f12h_mc0_mce;
+ fam_ops.mc1_mce = k8_mc1_mce;
+ fam_ops.mc2_mce = k8_mc2_mce;
break;
case 0x14:
- fam_ops->mc0_mce = cat_mc0_mce;
- fam_ops->mc1_mce = cat_mc1_mce;
- fam_ops->mc2_mce = k8_mc2_mce;
+ fam_ops.mc0_mce = cat_mc0_mce;
+ fam_ops.mc1_mce = cat_mc1_mce;
+ fam_ops.mc2_mce = k8_mc2_mce;
break;
case 0x15:
xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
- fam_ops->mc0_mce = f15h_mc0_mce;
- fam_ops->mc1_mce = f15h_mc1_mce;
- fam_ops->mc2_mce = f15h_mc2_mce;
+ fam_ops.mc0_mce = f15h_mc0_mce;
+ fam_ops.mc1_mce = f15h_mc1_mce;
+ fam_ops.mc2_mce = f15h_mc2_mce;
break;
case 0x16:
xec_mask = 0x1f;
- fam_ops->mc0_mce = cat_mc0_mce;
- fam_ops->mc1_mce = cat_mc1_mce;
- fam_ops->mc2_mce = f16h_mc2_mce;
+ fam_ops.mc0_mce = cat_mc0_mce;
+ fam_ops.mc1_mce = cat_mc1_mce;
+ fam_ops.mc2_mce = f16h_mc2_mce;
break;
case 0x17:
case 0x18:
- xec_mask = 0x3f;
- if (!boot_cpu_has(X86_FEATURE_SMCA)) {
- printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
- goto err_out;
- }
- break;
+ pr_warn("Decoding supported only on Scalable MCA processors.\n");
+ return -EINVAL;
default:
printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
- goto err_out;
+ return -EINVAL;
}
+out:
pr_info("MCE: In-kernel MCE decoding enabled.\n");
mce_register_decode_chain(&amd_mce_dec_nb);
return 0;
-
-err_out:
- kfree(fam_ops);
- fam_ops = NULL;
- return -EINVAL;
}
early_initcall(mce_amd_init);
@@ -1238,7 +1260,6 @@ early_initcall(mce_amd_init);
static void __exit mce_amd_exit(void)
{
mce_unregister_decode_chain(&amd_mce_dec_nb);
- kfree(fam_ops);
}
MODULE_DESCRIPTION("AMD MCE decoder");