diff options
author | Jiri Kosina <jkosina@suse.cz> | 2016-04-18 11:18:55 +0200 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2016-04-18 11:18:55 +0200 |
commit | 9938b04472d5c59f8bd8152a548533a8599596a2 (patch) | |
tree | 0fc8318100878c5e446076613ec02a97aa179119 /drivers/edac | |
parent | bd7ced98812dbb906950d8b0ec786f14f631cede (diff) | |
parent | c3b46c73264b03000d1e18b22f5caf63332547c9 (diff) |
Merge branch 'master' into for-next
Sync with Linus' tree so that patches against newer codebase can be applied.
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 26 | ||||
-rw-r--r-- | drivers/edac/Makefile | 4 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 492 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/debugfs.c | 2 | ||||
-rw-r--r-- | drivers/edac/edac_device.c | 41 | ||||
-rw-r--r-- | drivers/edac/edac_device_sysfs.c | 11 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 81 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 37 | ||||
-rw-r--r-- | drivers/edac/edac_module.c | 52 | ||||
-rw-r--r-- | drivers/edac/edac_module.h | 10 | ||||
-rw-r--r-- | drivers/edac/edac_pci.c | 125 | ||||
-rw-r--r-- | drivers/edac/edac_pci_sysfs.c | 16 | ||||
-rw-r--r-- | drivers/edac/edac_stub.c | 41 | ||||
-rw-r--r-- | drivers/edac/i5100_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 335 | ||||
-rw-r--r-- | drivers/edac/mpc85xx_edac.c | 56 | ||||
-rw-r--r-- | drivers/edac/mv64x60_edac.c | 39 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 1097 | ||||
-rw-r--r-- | drivers/edac/wq.c | 42 | ||||
-rw-r--r-- | drivers/edac/xgene_edac.c | 70 |
21 files changed, 2134 insertions, 449 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index ef25000a5bc6..37755e63cc28 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -367,14 +367,30 @@ config EDAC_OCTEON_PCI Support for error detection and correction on the Cavium Octeon family of SOCs. -config EDAC_ALTERA_MC - bool "Altera SDRAM Memory Controller EDAC" +config EDAC_ALTERA + bool "Altera SOCFPGA ECC" depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA help Support for error detection and correction on the - Altera SDRAM memory controller. Note that the - preloader must initialize the SDRAM before loading - the kernel. + Altera SOCs. This must be selected for SDRAM ECC. + Note that the preloader must initialize the SDRAM + before loading the kernel. + +config EDAC_ALTERA_L2C + bool "Altera L2 Cache ECC" + depends on EDAC_ALTERA=y + select CACHE_L2X0 + help + Support for error detection and correction on the + Altera L2 cache Memory for Altera SoCs. This option + requires L2 cache so it will force that selection. + +config EDAC_ALTERA_OCRAM + bool "Altera On-Chip RAM ECC" + depends on EDAC_ALTERA=y && SRAM && GENERIC_ALLOCATOR + help + Support for error detection and correction on the + Altera On-Chip RAM Memory for Altera SoCs. config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index dbf53e08bdd1..f9e4a3e0e6e9 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o -edac_core-y += edac_module.o edac_device_sysfs.o +edac_core-y += edac_module.o edac_device_sysfs.o wq.o edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o @@ -67,6 +67,6 @@ obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o -obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o +obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 929640981d8a..63e42098726d 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1,5 +1,5 @@ /* - * Copyright Altera Corporation (C) 2014-2015. All rights reserved. + * Copyright Altera Corporation (C) 2014-2016. All rights reserved. * Copyright 2011-2012 Calxeda, Inc. * * This program is free software; you can redistribute it and/or modify it @@ -17,8 +17,10 @@ * Adapted from the highbank_mc_edac driver. */ +#include <asm/cacheflush.h> #include <linux/ctype.h> #include <linux/edac.h> +#include <linux/genalloc.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/mfd/syscon.h> @@ -34,6 +36,7 @@ #define EDAC_MOD_STR "altera_edac" #define EDAC_VERSION "1" +#define EDAC_DEVICE "Altera" static const struct altr_sdram_prv_data c5_data = { .ecc_ctrl_offset = CV_CTLCFG_OFST, @@ -75,6 +78,31 @@ static const struct altr_sdram_prv_data a10_data = { .ue_set_mask = A10_DIAGINT_TDERRA_MASK, }; +/************************** EDAC Device Defines **************************/ + +/* OCRAM ECC Management Group Defines */ +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04 +#define ALTR_OCR_ECC_EN BIT(0) +#define ALTR_OCR_ECC_INJS BIT(1) +#define ALTR_OCR_ECC_INJD BIT(2) +#define ALTR_OCR_ECC_SERR BIT(3) +#define ALTR_OCR_ECC_DERR BIT(4) + +/* L2 ECC Management Group Defines */ +#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00 +#define ALTR_L2_ECC_EN BIT(0) +#define ALTR_L2_ECC_INJS BIT(1) +#define ALTR_L2_ECC_INJD BIT(2) + +#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */ +#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */ +#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */ +#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */ + +/*********************** EDAC Memory Controller Functions ****************/ + +/* The SDRAM controller uses the EDAC Memory Controller framework. */ + static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) { struct mem_ctl_info *mci = dev_id; @@ -504,6 +532,466 @@ static struct platform_driver altr_sdram_edac_driver = { module_platform_driver(altr_sdram_edac_driver); +/************************* EDAC Parent Probe *************************/ + +static const struct of_device_id altr_edac_device_of_match[]; + +static const struct of_device_id altr_edac_of_match[] = { + { .compatible = "altr,socfpga-ecc-manager" }, + {}, +}; +MODULE_DEVICE_TABLE(of, altr_edac_of_match); + +static int altr_edac_probe(struct platform_device *pdev) +{ + of_platform_populate(pdev->dev.of_node, altr_edac_device_of_match, + NULL, &pdev->dev); + return 0; +} + +static struct platform_driver altr_edac_driver = { + .probe = altr_edac_probe, + .driver = { + .name = "socfpga_ecc_manager", + .of_match_table = altr_edac_of_match, + }, +}; +module_platform_driver(altr_edac_driver); + +/************************* EDAC Device Functions *************************/ + +/* + * EDAC Device Functions (shared between various IPs). + * The discrete memories use the EDAC Device framework. The probe + * and error handling functions are very similar between memories + * so they are shared. The memory allocation and freeing for EDAC + * trigger testing are different for each memory. + */ + +const struct edac_device_prv_data ocramecc_data; +const struct edac_device_prv_data l2ecc_data; + +struct edac_device_prv_data { + int (*setup)(struct platform_device *pdev, void __iomem *base); + int ce_clear_mask; + int ue_clear_mask; + char dbgfs_name[20]; + void * (*alloc_mem)(size_t size, void **other); + void (*free_mem)(void *p, size_t size, void *other); + int ecc_enable_mask; + int ce_set_mask; + int ue_set_mask; + int trig_alloc_sz; +}; + +struct altr_edac_device_dev { + void __iomem *base; + int sb_irq; + int db_irq; + const struct edac_device_prv_data *data; + struct dentry *debugfs_dir; + char *edac_dev_name; +}; + +static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) +{ + irqreturn_t ret_value = IRQ_NONE; + struct edac_device_ctl_info *dci = dev_id; + struct altr_edac_device_dev *drvdata = dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + + if (irq == drvdata->sb_irq) { + if (priv->ce_clear_mask) + writel(priv->ce_clear_mask, drvdata->base); + edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name); + ret_value = IRQ_HANDLED; + } else if (irq == drvdata->db_irq) { + if (priv->ue_clear_mask) + writel(priv->ue_clear_mask, drvdata->base); + edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + ret_value = IRQ_HANDLED; + } else { + WARN_ON(1); + } + + return ret_value; +} + +static ssize_t altr_edac_device_trig(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) + +{ + u32 *ptemp, i, error_mask; + int result = 0; + u8 trig_type; + unsigned long flags; + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void *generic_ptr = edac_dci->dev; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + if (!priv->alloc_mem) + return -ENOMEM; + + /* + * Note that generic_ptr is initialized to the device * but in + * some alloc_functions, this is overridden and returns data. + */ + ptemp = priv->alloc_mem(priv->trig_alloc_sz, &generic_ptr); + if (!ptemp) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Inject: Buffer Allocation error\n"); + return -ENOMEM; + } + + if (trig_type == ALTR_UE_TRIGGER_CHAR) + error_mask = priv->ue_set_mask; + else + error_mask = priv->ce_set_mask; + + edac_printk(KERN_ALERT, EDAC_DEVICE, + "Trigger Error Mask (0x%X)\n", error_mask); + + local_irq_save(flags); + /* write ECC corrupted data out. */ + for (i = 0; i < (priv->trig_alloc_sz / sizeof(*ptemp)); i++) { + /* Read data so we're in the correct state */ + rmb(); + if (ACCESS_ONCE(ptemp[i])) + result = -1; + /* Toggle Error bit (it is latched), leave ECC enabled */ + writel(error_mask, drvdata->base); + writel(priv->ecc_enable_mask, drvdata->base); + ptemp[i] = i; + } + /* Ensure it has been written out */ + wmb(); + local_irq_restore(flags); + + if (result) + edac_printk(KERN_ERR, EDAC_DEVICE, "Mem Not Cleared\n"); + + /* Read out written data. ECC error caused here */ + for (i = 0; i < ALTR_TRIGGER_READ_WRD_CNT; i++) + if (ACCESS_ONCE(ptemp[i]) != i) + edac_printk(KERN_ERR, EDAC_DEVICE, + "Read doesn't match written data\n"); + + if (priv->free_mem) + priv->free_mem(ptemp, priv->trig_alloc_sz, generic_ptr); + + return count; +} + +static const struct file_operations altr_edac_device_inject_fops = { + .open = simple_open, + .write = altr_edac_device_trig, + .llseek = generic_file_llseek, +}; + +static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, + const struct edac_device_prv_data *priv) +{ + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return; + + drvdata->debugfs_dir = edac_debugfs_create_dir(drvdata->edac_dev_name); + if (!drvdata->debugfs_dir) + return; + + if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR, + drvdata->debugfs_dir, edac_dci, + &altr_edac_device_inject_fops)) + debugfs_remove_recursive(drvdata->debugfs_dir); +} + +static const struct of_device_id altr_edac_device_of_match[] = { +#ifdef CONFIG_EDAC_ALTERA_L2C + { .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_OCRAM + { .compatible = "altr,socfpga-ocram-ecc", + .data = (void *)&ocramecc_data }, +#endif + {}, +}; +MODULE_DEVICE_TABLE(of, altr_edac_device_of_match); + +/* + * altr_edac_device_probe() + * This is a generic EDAC device driver that will support + * various Altera memory devices such as the L2 cache ECC and + * OCRAM ECC as well as the memories for other peripherals. + * Module specific initialization is done by passing the + * function index in the device tree. + */ +static int altr_edac_device_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci; + struct altr_edac_device_dev *drvdata; + struct resource *r; + int res = 0; + struct device_node *np = pdev->dev.of_node; + char *ecc_name = (char *)np->name; + static int dev_instance; + + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to open devm\n"); + return -ENOMEM; + } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to get mem resource\n"); + res = -ENODEV; + goto fail; + } + + if (!devm_request_mem_region(&pdev->dev, r->start, resource_size(r), + dev_name(&pdev->dev))) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error requesting mem region\n", ecc_name); + res = -EBUSY; + goto fail; + } + + dci = edac_device_alloc_ctl_info(sizeof(*drvdata), ecc_name, + 1, ecc_name, 1, 0, NULL, 0, + dev_instance++); + + if (!dci) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: Unable to allocate EDAC device\n", ecc_name); + res = -ENOMEM; + goto fail; + } + + drvdata = dci->pvt_info; + dci->dev = &pdev->dev; + platform_set_drvdata(pdev, dci); + drvdata->edac_dev_name = ecc_name; + + drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r)); + if (!drvdata->base) + goto fail1; + + /* Get driver specific data for this EDAC device */ + drvdata->data = of_match_node(altr_edac_device_of_match, np)->data; + + /* Check specific dependencies for the module */ + if (drvdata->data->setup) { + res = drvdata->data->setup(pdev, drvdata->base); + if (res) + goto fail1; + } + + drvdata->sb_irq = platform_get_irq(pdev, 0); + res = devm_request_irq(&pdev->dev, drvdata->sb_irq, + altr_edac_device_handler, + 0, dev_name(&pdev->dev), dci); + if (res) + goto fail1; + + drvdata->db_irq = platform_get_irq(pdev, 1); + res = devm_request_irq(&pdev->dev, drvdata->db_irq, + altr_edac_device_handler, + 0, dev_name(&pdev->dev), dci); + if (res) + goto fail1; + + dci->mod_name = "Altera ECC Manager"; + dci->dev_name = drvdata->edac_dev_name; + + res = edac_device_add_device(dci); + if (res) + goto fail1; + + altr_create_edacdev_dbgfs(dci, drvdata->data); + + devres_close_group(&pdev->dev, NULL); + + return 0; + +fail1: + edac_device_free_ctl_info(dci); +fail: + devres_release_group(&pdev->dev, NULL); + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error setting up EDAC device: %d\n", ecc_name, res); + + return res; +} + +static int altr_edac_device_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); + struct altr_edac_device_dev *drvdata = dci->pvt_info; + + debugfs_remove_recursive(drvdata->debugfs_dir); + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(dci); + + return 0; +} + +static struct platform_driver altr_edac_device_driver = { + .probe = altr_edac_device_probe, + .remove = altr_edac_device_remove, + .driver = { + .name = "altr_edac_device", + .of_match_table = altr_edac_device_of_match, + }, +}; +module_platform_driver(altr_edac_device_driver); + +/*********************** OCRAM EDAC Device Functions *********************/ + +#ifdef CONFIG_EDAC_ALTERA_OCRAM + +static void *ocram_alloc_mem(size_t size, void **other) +{ + struct device_node *np; + struct gen_pool *gp; + void *sram_addr; + + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-ocram-ecc"); + if (!np) + return NULL; + + gp = of_gen_pool_get(np, "iram", 0); + of_node_put(np); + if (!gp) + return NULL; + + sram_addr = (void *)gen_pool_alloc(gp, size); + if (!sram_addr) + return NULL; + + memset(sram_addr, 0, size); + /* Ensure data is written out */ + wmb(); + + /* Remember this handle for freeing later */ + *other = gp; + + return sram_addr; +} + +static void ocram_free_mem(void *p, size_t size, void *other) +{ + gen_pool_free((struct gen_pool *)other, (u32)p, size); +} + +/* + * altr_ocram_check_deps() + * Test for OCRAM cache ECC dependencies upon entry because + * platform specific startup should have initialized the + * On-Chip RAM memory and enabled the ECC. + * Can't turn on ECC here because accessing un-initialized + * memory will cause CE/UE errors possibly causing an ABORT. + */ +static int altr_ocram_check_deps(struct platform_device *pdev, + void __iomem *base) +{ + if (readl(base) & ALTR_OCR_ECC_EN) + return 0; + + edac_printk(KERN_ERR, EDAC_DEVICE, + "OCRAM: No ECC present or ECC disabled.\n"); + return -ENODEV; +} + +const struct edac_device_prv_data ocramecc_data = { + .setup = altr_ocram_check_deps, + .ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR), + .ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR), + .dbgfs_name = "altr_ocram_trigger", + .alloc_mem = ocram_alloc_mem, + .free_mem = ocram_free_mem, + .ecc_enable_mask = ALTR_OCR_ECC_EN, + .ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS), + .ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD), + .trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE, +}; + +#endif /* CONFIG_EDAC_ALTERA_OCRAM */ + +/********************* L2 Cache EDAC Device Functions ********************/ + +#ifdef CONFIG_EDAC_ALTERA_L2C + +static void *l2_alloc_mem(size_t size, void **other) +{ + struct device *dev = *other; + void *ptemp = devm_kzalloc(dev, size, GFP_KERNEL); + + if (!ptemp) + return NULL; + + /* Make sure everything is written out */ + wmb(); + + /* + * Clean all cache levels up to LoC (includes L2) + * This ensures the corrupted data is written into + * L2 cache for readback test (which causes ECC error). + */ + flush_cache_all(); + + return ptemp; +} + +static void l2_free_mem(void *p, size_t size, void *other) +{ + struct device *dev = other; + + if (dev && p) + devm_kfree(dev, p); +} + +/* + * altr_l2_check_deps() + * Test for L2 cache ECC dependencies upon entry because + * platform specific startup should have initialized the L2 + * memory and enabled the ECC. + * Bail if ECC is not enabled. + * Note that L2 Cache Enable is forced at build time. + */ +static int altr_l2_check_deps(struct platform_device *pdev, + void __iomem *base) +{ + if (readl(base) & ALTR_L2_ECC_EN) + return 0; + + edac_printk(KERN_ERR, EDAC_DEVICE, + "L2: No ECC present, or ECC disabled\n"); + return -ENODEV; +} + +const struct edac_device_prv_data l2ecc_data = { + .setup = altr_l2_check_deps, + .ce_clear_mask = 0, + .ue_clear_mask = 0, + .dbgfs_name = "altr_l2_trigger", + .alloc_mem = l2_alloc_mem, + .free_mem = l2_free_mem, + .ecc_enable_mask = ALTR_L2_ECC_EN, + .ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS), + .ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD), + .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, +}; + +#endif /* CONFIG_EDAC_ALTERA_L2C */ + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thor Thayer"); -MODULE_DESCRIPTION("EDAC Driver for Altera SDRAM Controller"); +MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9eee13ef83a5..d87a47547ba5 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1452,7 +1452,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 chan_off; u64 dram_base = get_dram_base(pvt, range); u64 hole_off = f10_dhar_offset(pvt); - u64 dct_sel_base_off = (pvt->dct_sel_hi & 0xFFFFFC00) << 16; + u64 dct_sel_base_off = (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16; if (hi_rng) { /* diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c index 54d2f668cb0a..92dbb7e2320c 100644 --- a/drivers/edac/debugfs.c +++ b/drivers/edac/debugfs.c @@ -53,7 +53,7 @@ int __init edac_debugfs_init(void) void edac_debugfs_exit(void) { - debugfs_remove(edac_debugfs); + debugfs_remove_recursive(edac_debugfs); } int edac_create_debugfs_nodes(struct mem_ctl_info *mci) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 592af5f0cf39..a97900333e2d 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -390,11 +390,9 @@ static void edac_device_workq_function(struct work_struct *work_req) * between integral seconds */ if (edac_dev->poll_msec == 1000) - queue_delayed_work(edac_workqueue, &edac_dev->work, - round_jiffies_relative(edac_dev->delay)); + edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else - queue_delayed_work(edac_workqueue, &edac_dev->work, - edac_dev->delay); + edac_queue_work(&edac_dev->work, edac_dev->delay); } /* @@ -402,8 +400,8 @@ static void edac_device_workq_function(struct work_struct *work_req) * initialize a workq item for this edac_device instance * passing in the new delay period in msec */ -void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, - unsigned msec) +static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, + unsigned msec) { edac_dbg(0, "\n"); @@ -422,29 +420,23 @@ void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, * to fire together on the 1 second exactly */ if (edac_dev->poll_msec == 1000) - queue_delayed_work(edac_workqueue, &edac_dev->work, - round_jiffies_relative(edac_dev->delay)); + edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else - queue_delayed_work(edac_workqueue, &edac_dev->work, - edac_dev->delay); + edac_queue_work(&edac_dev->work, edac_dev->delay); } /* * edac_device_workq_teardown * stop the workq processing on this edac_dev */ -void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) +static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) { - int status; - if (!edac_dev->edac_check) return; - status = cancel_delayed_work(&edac_dev->work); - if (status == 0) { - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + edac_dev->op_state = OP_OFFLINE; + + edac_stop_work(&edac_dev->work); } /* @@ -457,16 +449,15 @@ void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, unsigned long value) { - /* cancel the current workq request, without the mutex lock */ - edac_device_workq_teardown(edac_dev); + unsigned long jiffs = msecs_to_jiffies(value); - /* acquire the mutex before doing the workq setup */ - mutex_lock(&device_ctls_mutex); + if (value == 1000) + jiffs = round_jiffies_relative(value); - /* restart the workq request, with new delay value */ - edac_device_workq_setup(edac_dev, value); + edac_dev->poll_msec = value; + edac_dev->delay = jiffs; - mutex_unlock(&device_ctls_mutex); + edac_mod_work(&edac_dev->work, jiffs); } /* diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index fb68a06ad683..93da1a45c716 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -237,11 +237,6 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) /* get the /sys/devices/system/edac reference */ edac_subsys = edac_get_sysfs_subsys(); - if (edac_subsys == NULL) { - edac_dbg(1, "no edac_subsys error\n"); - err = -ENODEV; - goto err_out; - } /* Point to the 'edac_subsys' this instance 'reports' to */ edac_dev->edac_subsys = edac_subsys; @@ -256,7 +251,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) if (!try_module_get(edac_dev->owner)) { err = -ENODEV; - goto err_mod_get; + goto err_out; } /* register */ @@ -282,9 +277,6 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) err_kobj_reg: module_put(edac_dev->owner); -err_mod_get: - edac_put_sysfs_subsys(); - err_out: return err; } @@ -306,7 +298,6 @@ void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev) * b) 'kfree' the memory */ kobject_put(&dev->kobj); - edac_put_sysfs_subsys(); } /* edac_dev -> instance information */ diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 77ecd6a4179a..1472f48c8ac6 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -535,69 +535,18 @@ static void edac_mc_workq_function(struct work_struct *work_req) mutex_lock(&mem_ctls_mutex); - /* if this control struct has movd to offline state, we are done */ - if (mci->op_state == OP_OFFLINE) { + if (mci->op_state != OP_RUNNING_POLL) { mutex_unlock(&mem_ctls_mutex); return; } - /* Only poll controllers that are running polled and have a check */ - if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) + if (edac_mc_assert_error_check_and_clear()) mci->edac_check(mci); mutex_unlock(&mem_ctls_mutex); - /* Reschedule */ - queue_delayed_work(edac_workqueue, &mci->work, - msecs_to_jiffies(edac_mc_get_poll_msec())); -} - -/* - * edac_mc_workq_setup - * initialize a workq item for this mci - * passing in the new delay period in msec - * - * locking model: - * - * called with the mem_ctls_mutex held - */ -static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec, - bool init) -{ - edac_dbg(0, "\n"); - - /* if this instance is not in the POLL state, then simply return */ - if (mci->op_state != OP_RUNNING_POLL) - return; - - if (init) - INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); - - mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); -} - -/* - * edac_mc_workq_teardown - * stop the workq processing on this mci - * - * locking model: - * - * called WITHOUT lock held - */ -static void edac_mc_workq_teardown(struct mem_ctl_info *mci) -{ - int status; - - if (mci->op_state != OP_RUNNING_POLL) - return; - - status = cancel_delayed_work(&mci->work); - if (status == 0) { - edac_dbg(0, "not canceled, flush the queue\n"); - - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + /* Queue ourselves again. */ + edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec())); } /* @@ -616,9 +565,8 @@ void edac_mc_reset_delay_period(unsigned long value) list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - edac_mc_workq_setup(mci, value, false); + edac_mod_work(&mci->work, value); } - mutex_unlock(&mem_ctls_mutex); } @@ -784,12 +732,12 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci, goto fail1; } - /* If there IS a check routine, then we are running POLLED */ - if (mci->edac_check != NULL) { - /* This instance is NOW RUNNING */ + if (mci->edac_check) { mci->op_state = OP_RUNNING_POLL; - edac_mc_workq_setup(mci, edac_mc_get_poll_msec(), true); + INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); + edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec())); + } else { mci->op_state = OP_RUNNING_INTERRUPT; } @@ -836,15 +784,16 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } + /* mark MCI offline: */ + mci->op_state = OP_OFFLINE; + if (!del_mc_from_global_list(mci)) edac_mc_owner = NULL; - mutex_unlock(&mem_ctls_mutex); - /* flush workq processes */ - edac_mc_workq_teardown(mci); + mutex_unlock(&mem_ctls_mutex); - /* marking MCI offline */ - mci->op_state = OP_OFFLINE; + if (mci->edac_check) + edac_stop_work(&mci->work); /* remove from sysfs */ edac_remove_sysfs_mci_device(mci); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index a75acea0f674..26e65ab5932a 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -880,21 +880,26 @@ static struct device_type mci_attr_type = { int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups) { + char *name; int i, err; /* * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus->name) + name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!name) return -ENOMEM; + mci->bus->name = name; + edac_dbg(0, "creating bus %s\n", mci->bus->name); err = bus_register(mci->bus); - if (err < 0) - goto fail_free_name; + if (err < 0) { + kfree(name); + return err; + } /* get the /sys/devices/system/edac subsys reference */ mci->dev.type = &mci_attr_type; @@ -961,8 +966,8 @@ fail_unregister_dimm: device_unregister(&mci->dev); fail_unregister_bus: bus_unregister(mci->bus); -fail_free_name: - kfree(mci->bus->name); + kfree(name); + return err; } @@ -993,10 +998,12 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) void edac_unregister_sysfs(struct mem_ctl_info *mci) { + const char *name = mci->bus->name; + edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); bus_unregister(mci->bus); - kfree(mci->bus->name); + kfree(name); } static void mc_attr_release(struct device *dev) @@ -1018,24 +1025,15 @@ static struct device_type mc_attr_type = { */ int __init edac_mc_sysfs_init(void) { - struct bus_type *edac_subsys; int err; - /* get the /sys/devices/system/edac subsys reference */ - edac_subsys = edac_get_sysfs_subsys(); - if (edac_subsys == NULL) { - edac_dbg(1, "no edac_subsys\n"); - err = -EINVAL; - goto out; - } - mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); if (!mci_pdev) { err = -ENOMEM; - goto out_put_sysfs; + goto out; } - mci_pdev->bus = edac_subsys; + mci_pdev->bus = edac_get_sysfs_subsys(); mci_pdev->type = &mc_attr_type; device_initialize(mci_pdev); dev_set_name(mci_pdev, "mc"); @@ -1050,8 +1048,6 @@ int __init edac_mc_sysfs_init(void) out_dev_free: kfree(mci_pdev); - out_put_sysfs: - edac_put_sysfs_subsys(); out: return err; } @@ -1059,5 +1055,4 @@ int __init edac_mc_sysfs_init(void) void edac_mc_sysfs_exit(void) { device_unregister(mci_pdev); - edac_put_sysfs_subsys(); } diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 9cb082a19d8a..5f8543be995a 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -43,9 +43,6 @@ module_param_call(edac_debug_level, edac_set_debug_level, param_get_int, MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2"); #endif -/* scope is to module level only */ -struct workqueue_struct *edac_workqueue; - /* * edac_op_state_to_string() */ @@ -66,31 +63,37 @@ char *edac_op_state_to_string(int opstate) } /* - * edac_workqueue_setup - * initialize the edac work queue for polling operations + * sysfs object: /sys/devices/system/edac + * need to export to other files */ -static int edac_workqueue_setup(void) +static struct bus_type edac_subsys = { + .name = "edac", + .dev_name = "edac", +}; + +static int edac_subsys_init(void) { - edac_workqueue = create_singlethread_workqueue("edac-poller"); - if (edac_workqueue == NULL) - return -ENODEV; - else - return 0; + int err; + + /* create the /sys/devices/system/edac directory */ + err = subsys_system_register(&edac_subsys, NULL); + if (err) + printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n"); + + return err; } -/* - * edac_workqueue_teardown - * teardown the edac workqueue - */ -static void edac_workqueue_teardown(void) +static void edac_subsys_exit(void) { - if (edac_workqueue) { - flush_workqueue(edac_workqueue); - destroy_workqueue(edac_workqueue); - edac_workqueue = NULL; - } + bus_unregister(&edac_subsys); } +/* return pointer to the 'edac' node in sysfs */ +struct bus_type *edac_get_sysfs_subsys(void) +{ + return &edac_subsys; +} +EXPORT_SYMBOL_GPL(edac_get_sysfs_subsys); /* * edac_init * module initialization entry point @@ -101,6 +104,10 @@ static int __init edac_init(void) edac_printk(KERN_INFO, EDAC_MC, EDAC_VERSION "\n"); + err = edac_subsys_init(); + if (err) + return err; + /* * Harvest and clear any boot/initialization PCI parity errors * @@ -129,6 +136,8 @@ err_wq: edac_mc_sysfs_exit(); err_sysfs: + edac_subsys_exit(); + return err; } @@ -144,6 +153,7 @@ static void __exit edac_exit(void) edac_workqueue_teardown(); edac_mc_sysfs_exit(); edac_debugfs_exit(); + edac_subsys_exit(); } /* diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index b95a48fc723d..cfaacb99c973 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -47,10 +47,12 @@ extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); /* edac core workqueue: single CPU mode */ -extern struct workqueue_struct *edac_workqueue; -extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, - unsigned msec); -extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev); +int edac_workqueue_setup(void); +void edac_workqueue_teardown(void); +bool edac_queue_work(struct delayed_work *work, unsigned long delay); +bool edac_stop_work(struct delayed_work *work); +bool edac_mod_work(struct delayed_work *work, unsigned long delay); + extern void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, unsigned long value); extern void edac_mc_reset_delay_period(unsigned long value); diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 2cf44b4db80c..8f2f2899a7a2 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -178,41 +178,6 @@ static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) INIT_LIST_HEAD(&pci->link); } -#if 0 -/* Older code, but might use in the future */ - -/* - * edac_pci_find() - * Search for an edac_pci_ctl_info structure whose index is 'idx' - * - * If found, return a pointer to the structure - * Else return NULL. - * - * Caller must hold pci_ctls_mutex. - */ -struct edac_pci_ctl_info *edac_pci_find(int idx) -{ - struct list_head *item; - struct edac_pci_ctl_info *pci; - - /* Iterage over list, looking for exact match of ID */ - list_for_each(item, &edac_pci_list) { - pci = list_entry(item, struct edac_pci_ctl_info, link); - - if (pci->pci_idx >= idx) { - if (pci->pci_idx == idx) - return pci; - - /* not on list, so terminate early */ - break; - } - } - - return NULL; -} -EXPORT_SYMBOL_GPL(edac_pci_find); -#endif - /* * edac_pci_workq_function() * @@ -230,81 +195,25 @@ static void edac_pci_workq_function(struct work_struct *work_req) mutex_lock(&edac_pci_ctls_mutex); - if (pci->op_state == OP_RUNNING_POLL) { - /* we might be in POLL mode, but there may NOT be a poll func - */ - if ((pci->edac_check != NULL) && edac_pci_get_check_errors()) - pci->edac_check(pci); - - /* if we are on a one second period, then use round */ - msec = edac_pci_get_poll_msec(); - if (msec == 1000) - delay = round_jiffies_relative(msecs_to_jiffies(msec)); - else - delay = msecs_to_jiffies(msec); - - /* Reschedule only if we are in POLL mode */ - queue_delayed_work(edac_workqueue, &pci->work, delay); + if (pci->op_state != OP_RUNNING_POLL) { + mutex_unlock(&edac_pci_ctls_mutex); + return; } - mutex_unlock(&edac_pci_ctls_mutex); -} - -/* - * edac_pci_workq_setup() - * initialize a workq item for this edac_pci instance - * passing in the new delay period in msec - * - * locking model: - * called when 'edac_pci_ctls_mutex' is locked - */ -static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, - unsigned int msec) -{ - edac_dbg(0, "\n"); - - INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function); - queue_delayed_work(edac_workqueue, &pci->work, - msecs_to_jiffies(edac_pci_get_poll_msec())); -} - -/* - * edac_pci_workq_teardown() - * stop the workq processing on this edac_pci instance - */ -static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) -{ - int status; - - edac_dbg(0, "\n"); - - status = cancel_delayed_work(&pci->work); - if (status == 0) - flush_workqueue(edac_workqueue); -} - -/* - * edac_pci_reset_delay_period - * - * called with a new period value for the workq period - * a) stop current workq timer - * b) restart workq timer with new value - */ -void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, - unsigned long value) -{ - edac_dbg(0, "\n"); + if (edac_pci_get_check_errors()) + pci->edac_check(pci); - edac_pci_workq_teardown(pci); + /* if we are on a one second period, then use round */ + msec = edac_pci_get_poll_msec(); + if (msec == 1000) + delay = round_jiffies_relative(msecs_to_jiffies(msec)); + else + delay = msecs_to_jiffies(msec); - /* need to lock for the setup */ - mutex_lock(&edac_pci_ctls_mutex); - - edac_pci_workq_setup(pci, value); + edac_queue_work(&pci->work, delay); mutex_unlock(&edac_pci_ctls_mutex); } -EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period); /* * edac_pci_alloc_index: Allocate a unique PCI index number @@ -349,10 +258,12 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) goto fail1; } - if (pci->edac_check != NULL) { + if (pci->edac_check) { pci->op_state = OP_RUNNING_POLL; - edac_pci_workq_setup(pci, 1000); + INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function); + edac_queue_work(&pci->work, msecs_to_jiffies(edac_pci_get_poll_msec())); + } else { pci->op_state = OP_RUNNING_INTERRUPT; } @@ -410,8 +321,8 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev) mutex_unlock(&edac_pci_ctls_mutex); - /* stop the workq timer */ - edac_pci_workq_teardown(pci); + if (pci->edac_check) + edac_stop_work(&pci->work); edac_printk(KERN_INFO, EDAC_PCI, "Removed device %d for %s %s: DEV %s\n", diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 24d877f6e577..6e3428ba400f 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -331,10 +331,7 @@ static struct kobj_type ktype_edac_pci_main_kobj = { }; /** - * edac_pci_main_kobj_setup() - * - * setup the sysfs for EDAC PCI attributes - * assumes edac_subsys has already been initialized + * edac_pci_main_kobj_setup: Setup the sysfs for EDAC PCI attributes. */ static int edac_pci_main_kobj_setup(void) { @@ -351,11 +348,6 @@ static int edac_pci_main_kobj_setup(void) * controls and attributes */ edac_subsys = edac_get_sysfs_subsys(); - if (edac_subsys == NULL) { - edac_dbg(1, "no edac_subsys\n"); - err = -ENODEV; - goto decrement_count_fail; - } /* Bump the reference count on this module to ensure the * modules isn't unloaded until we deconstruct the top @@ -364,7 +356,7 @@ static int edac_pci_main_kobj_setup(void) if (!try_module_get(THIS_MODULE)) { edac_dbg(1, "try_module_get() failed\n"); err = -ENODEV; - goto mod_get_fail; + goto decrement_count_fail; } edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); @@ -399,9 +391,6 @@ kobject_init_and_add_fail: kzalloc_fail: module_put(THIS_MODULE); -mod_get_fail: - edac_put_sysfs_subsys(); - decrement_count_fail: /* if are on this error exit, nothing to tear down */ atomic_dec(&edac_pci_sysfs_refcount); @@ -426,7 +415,6 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); - edac_put_sysfs_subsys(); } } diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index ff07aae5b7fb..952e411f01f2 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -26,8 +26,6 @@ EXPORT_SYMBOL_GPL(edac_handlers); int edac_err_assert = 0; EXPORT_SYMBOL_GPL(edac_err_assert); -static atomic_t edac_subsys_valid = ATOMIC_INIT(0); - int edac_report_status = EDAC_REPORTING_ENABLED; EXPORT_SYMBOL_GPL(edac_report_status); @@ -68,42 +66,3 @@ void edac_atomic_assert_error(void) edac_err_assert++; } EXPORT_SYMBOL_GPL(edac_atomic_assert_error); - -/* - * sysfs object: /sys/devices/system/edac - * need to export to other files - */ -struct bus_type edac_subsys = { - .name = "edac", - .dev_name = "edac", -}; -EXPORT_SYMBOL_GPL(edac_subsys); - -/* return pointer to the 'edac' node in sysfs */ -struct bus_type *edac_get_sysfs_subsys(void) -{ - int err = 0; - - if (atomic_read(&edac_subsys_valid)) - goto out; - - /* create the /sys/devices/system/edac directory */ - err = subsys_system_register(&edac_subsys, NULL); - if (err) { - printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n"); - return NULL; - } - -out: - atomic_inc(&edac_subsys_valid); - return &edac_subsys; -} -EXPORT_SYMBOL_GPL(edac_get_sysfs_subsys); - -void edac_put_sysfs_subsys(void) -{ - /* last user unregisters it */ - if (atomic_dec_and_test(&edac_subsys_valid)) - bus_unregister(&edac_subsys); -} -EXPORT_SYMBOL_GPL(edac_put_sysfs_subsys); diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 40917775dca1..c655162caf08 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -575,9 +575,7 @@ static void i5100_check_error(struct mem_ctl_info *mci) static void i5100_refresh_scrubbing(struct work_struct *work) { - struct delayed_work *i5100_scrubbing = container_of(work, - struct delayed_work, - work); + struct delayed_work *i5100_scrubbing = to_delayed_work(work); struct i5100_priv *priv = container_of(i5100_scrubbing, struct i5100_priv, i5100_scrubbing); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index e3a945ce374b..49768c08ac07 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -147,6 +147,135 @@ static const char * const mc6_mce_desc[] = { "Status Register File", }; +/* Scalable MCA error strings */ +static const char * const f17h_ls_mce_desc[] = { + "Load queue parity", + "Store queue parity", + "Miss address buffer payload parity", + "L1 TLB parity", + "", /* reserved */ + "DC tag error type 6", + "DC tag error type 1", + "Internal error type 1", + "Internal error type 2", + "Sys Read data error thread 0", + "Sys read data error thread 1", + "DC tag error type 2", + "DC data error type 1 (poison comsumption)", + "DC data error type 2", + "DC data error type 3", + "DC tag error type 4", + "L2 TLB parity", + "PDC parity error", + "DC tag error type 3", + "DC tag error type 5", + "L2 fill data error", +}; + +static const char * const f17h_if_mce_desc[] = { + "microtag probe port parity error", + "IC microtag or full tag multi-hit error", + "IC full tag parity", + "IC data array parity", + "Decoupling queue phys addr parity error", + "L0 ITLB parity error", + "L1 ITLB parity error", + "L2 ITLB parity error", + "BPQ snoop parity on Thread 0", + "BPQ snoop parity on Thread 1", + "L1 BTB multi-match error", + "L2 BTB multi-match error", +}; + +static const char * const f17h_l2_mce_desc[] = { + "L2M tag multi-way-hit error", + "L2M tag ECC error", + "L2M data ECC error", + "HW assert", +}; + +static const char * const f17h_de_mce_desc[] = { + "uop cache tag parity error", + "uop cache data parity error", + "Insn buffer parity error", + "Insn dispatch queue parity error", + "Fetch address FIFO parity", + "Patch RAM data parity", + "Patch RAM sequencer parity", + "uop buffer parity" +}; + +static const char * const f17h_ex_mce_desc[] = { + "Watchdog timeout error", + "Phy register file parity", + "Flag register file parity", + "Immediate displacement register file parity", + "Address generator payload parity", + "EX payload parity", + "Checkpoint queue parity", + "Retire dispatch queue parity", +}; + +static const char * const f17h_fp_mce_desc[] = { + "Physical register file parity", + "Freelist parity error", + "Schedule queue parity", + "NSQ parity error", + "Retire queue parity", + "Status register file parity", +}; + +static const char * const f17h_l3_mce_desc[] = { + "Shadow tag macro ECC error", + "Shadow tag macro multi-way-hit error", + "L3M tag ECC error", + "L3M tag multi-way-hit error", + "L3M data ECC error", + "XI parity, L3 fill done channel error", + "L3 victim queue parity", + "L3 HW assert", +}; + +static const char * const f17h_cs_mce_desc[] = { + "Illegal request from transport layer", + "Address violation", + "Security violation", + "Illegal response from transport layer", + "Unexpected response", + "Parity error on incoming request or probe response data", + "Parity error on incoming read response data", + "Atomic request parity", + "ECC error on probe filter access", +}; + +static const char * const f17h_pie_mce_desc[] = { + "HW assert", + "Internal PIE register security violation", + "Error on GMI link", + "Poison data written to internal PIE register", +}; + +static const char * const f17h_umc_mce_desc[] = { + "DRAM ECC error", + "Data poison error on DRAM", + "SDP parity error", + "Advanced peripheral bus error", + "Command/address parity error", + "Write data CRC error", +}; + +static const char * const f17h_pb_mce_desc[] = { + "Parameter Block RAM ECC error", +}; + +static const char * const f17h_psp_mce_desc[] = { + "PSP RAM ECC or parity error", +}; + +static const char * const f17h_smu_mce_desc[] = { + "SMU RAM ECC or parity error", +}; + static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -691,6 +820,177 @@ static void decode_mc6_mce(struct mce *m) pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); } +static void decode_f17h_core_errors(const char *ip_name, u8 xec, + unsigned int mca_type) +{ + const char * const *error_desc_array; + size_t len; + + pr_emerg(HW_ERR "%s Error: ", ip_name); + + switch (mca_type) { + case SMCA_LS: + error_desc_array = f17h_ls_mce_desc; + len = ARRAY_SIZE(f17h_ls_mce_desc) - 1; + + if (xec == 0x4) { + pr_cont("Unrecognized LS MCA error code.\n"); + return; + } + break; + + case SMCA_IF: + error_desc_array = f17h_if_mce_desc; + len = ARRAY_SIZE(f17h_if_mce_desc) - 1; + break; + + case SMCA_L2_CACHE: + error_desc_array = f17h_l2_mce_desc; + len = ARRAY_SIZE(f17h_l2_mce_desc) - 1; + break; + + case SMCA_DE: + error_desc_array = f17h_de_mce_desc; + len = ARRAY_SIZE(f17h_de_mce_desc) - 1; + break; + + case SMCA_EX: + error_desc_array = f17h_ex_mce_desc; + len = ARRAY_SIZE(f17h_ex_mce_desc) - 1; + break; + + case SMCA_FP: + error_desc_array = f17h_fp_mce_desc; + len = ARRAY_SIZE(f17h_fp_mce_desc) - 1; + break; + + case SMCA_L3_CACHE: + error_desc_array = f17h_l3_mce_desc; + len = ARRAY_SIZE(f17h_l3_mce_desc) - 1; + break; + + default: + pr_cont("Corrupted MCA core error info.\n"); + return; + } + + if (xec > len) { + pr_cont("Unrecognized %s MCA bank error code.\n", + amd_core_mcablock_names[mca_type]); + return; + } + + pr_cont("%s.\n", error_desc_array[xec]); +} + +static void decode_df_errors(u8 xec, unsigned int mca_type) +{ + const char * const *error_desc_array; + size_t len; + + pr_emerg(HW_ERR "Data Fabric Error: "); + + switch (mca_type) { + case SMCA_CS: + error_desc_array = f17h_cs_mce_desc; + len = ARRAY_SIZE(f17h_cs_mce_desc) - 1; + break; + + case SMCA_PIE: + error_desc_array = f17h_pie_mce_desc; + len = ARRAY_SIZE(f17h_pie_mce_desc) - 1; + break; + + default: + pr_cont("Corrupted MCA Data Fabric info.\n"); + return; + } + + if (xec > len) { + pr_cont("Unrecognized %s MCA bank error code.\n", + amd_df_mcablock_names[mca_type]); + return; + } + + pr_cont("%s.\n", error_desc_array[xec]); +} + +/* Decode errors according to Scalable MCA specification */ +static void decode_smca_errors(struct mce *m) +{ + u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank); + unsigned int hwid, mca_type, i; + u8 xec = XEC(m->status, xec_mask); + const char * const *error_desc_array; + const char *ip_name; + u32 low, high; + size_t len; + + if (rdmsr_safe(addr, &low, &high)) { + pr_emerg("Invalid IP block specified, error information is unreliable.\n"); + return; + } + + hwid = high & MCI_IPID_HWID; + mca_type = (high & MCI_IPID_MCATYPE) >> 16; + + pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low); + + /* + * Based on hwid and mca_type values, decode errors from respective IPs. + * Note: mca_type values make sense only in the context of an hwid. + */ + for (i = 0; i < ARRAY_SIZE(amd_hwids); i++) + if (amd_hwids[i].hwid == hwid) + break; + + switch (i) { + case SMCA_F17H_CORE: + ip_name = (mca_type == SMCA_L3_CACHE) ? + "L3 Cache" : "F17h Core"; + return decode_f17h_core_errors(ip_name, xec, mca_type); + break; + + case SMCA_DF: + return decode_df_errors(xec, mca_type); + break; + + case SMCA_UMC: + error_desc_array = f17h_umc_mce_desc; + len = ARRAY_SIZE(f17h_umc_mce_desc) - 1; + break; + + case SMCA_PB: + error_desc_array = f17h_pb_mce_desc; + len = ARRAY_SIZE(f17h_pb_mce_desc) - 1; + break; + + case SMCA_PSP: + error_desc_array = f17h_psp_mce_desc; + len = ARRAY_SIZE(f17h_psp_mce_desc) - 1; + break; + + case SMCA_SMU: + error_desc_array = f17h_smu_mce_desc; + len = ARRAY_SIZE(f17h_smu_mce_desc) - 1; + break; + + default: + pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid); + return; + } + + ip_name = amd_hwids[i].name; + pr_emerg(HW_ERR "%s Error: ", ip_name); + + if (xec > len) { + pr_cont("Unrecognized %s MCA bank error code.\n", ip_name); + return; + } + + pr_cont("%s.\n", error_desc_array[xec]); +} + static inline void amd_decode_err_code(u16 ec) { if (INT_ERROR(ec)) { @@ -752,6 +1052,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) struct mce *m = (struct mce *)data; struct cpuinfo_x86 *c = &cpu_data(m->extcpu); int ecc; + u32 ebx = cpuid_ebx(0x80000007); if (amd_filter_mce(m)) return NOTIFY_STOP; @@ -769,11 +1070,20 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); - if (c->x86 == 0x15 || c->x86 == 0x16) + if (c->x86 >= 0x15) pr_cont("|%s|%s", ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), ((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); + if (!!(ebx & BIT(3))) { + u32 low, high; + u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); + + if (!rdmsr_safe(addr, &low, &high) && + (low & MCI_CONFIG_MCAX)) + pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); + } + /* do the two bits[14:13] together */ ecc = (m->status >> 45) & 0x3; if (ecc) @@ -784,6 +1094,11 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (m->status & MCI_STATUS_ADDRV) pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr); + if (!!(ebx & BIT(3))) { + decode_smca_errors(m); + goto err_code; + } + if (!fam_ops) goto err_code; @@ -834,6 +1149,7 @@ static struct notifier_block amd_mce_dec_nb = { static int __init mce_amd_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; + u32 ebx; if (c->x86_vendor != X86_VENDOR_AMD) return -ENODEV; @@ -888,10 +1204,18 @@ static int __init mce_amd_init(void) fam_ops->mc2_mce = f16h_mc2_mce; break; + case 0x17: + ebx = cpuid_ebx(0x80000007); + xec_mask = 0x3f; + if (!(ebx & BIT(3))) { + printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); + goto err_out; + } + break; + default: printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); - kfree(fam_ops); - fam_ops = NULL; + goto err_out; } pr_info("MCE: In-kernel MCE decoding enabled.\n"); @@ -899,6 +1223,11 @@ static int __init mce_amd_init(void) mce_register_decode_chain(&amd_mce_dec_nb); return 0; + +err_out: + kfree(fam_ops); + fam_ops = NULL; + return -EINVAL; } early_initcall(mce_amd_init); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 23ef8e9f2c9a..ca63d0da8889 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -20,6 +20,7 @@ #include <linux/edac.h> #include <linux/smp.h> #include <linux/gfp.h> +#include <linux/fsl/edac.h> #include <linux/of_platform.h> #include <linux/of_device.h> @@ -238,10 +239,12 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) return IRQ_HANDLED; } -int mpc85xx_pci_err_probe(struct platform_device *op) +static int mpc85xx_pci_err_probe(struct platform_device *op) { struct edac_pci_ctl_info *pci; struct mpc85xx_pci_pdata *pdata; + struct mpc85xx_edac_pci_plat_data *plat_data; + struct device_node *of_node; struct resource r; int res = 0; @@ -266,7 +269,15 @@ int mpc85xx_pci_err_probe(struct platform_device *op) pdata->name = "mpc85xx_pci_err"; pdata->irq = NO_IRQ; - if (mpc85xx_pcie_find_capability(op->dev.of_node) > 0) + plat_data = op->dev.platform_data; + if (!plat_data) { + dev_err(&op->dev, "no platform data"); + res = -ENXIO; + goto err; + } + of_node = plat_data->of_node; + + if (mpc85xx_pcie_find_capability(of_node) > 0) pdata->is_pcie = true; dev_set_drvdata(&op->dev, pci); @@ -284,7 +295,7 @@ int mpc85xx_pci_err_probe(struct platform_device *op) pdata->edac_idx = edac_pci_idx++; - res = of_address_to_resource(op->dev.of_node, 0, &r); + res = of_address_to_resource(of_node, 0, &r); if (res) { printk(KERN_ERR "%s: Unable to get resource for " "PCI err regs\n", __func__); @@ -339,7 +350,7 @@ int mpc85xx_pci_err_probe(struct platform_device *op) } if (edac_op_state == EDAC_OPSTATE_INT) { - pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); + pdata->irq = irq_of_parse_and_map(of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, mpc85xx_pci_isr, IRQF_SHARED, @@ -386,8 +397,22 @@ err: devres_release_group(&op->dev, mpc85xx_pci_err_probe); return res; } -EXPORT_SYMBOL(mpc85xx_pci_err_probe); +static const struct platform_device_id mpc85xx_pci_err_match[] = { + { + .name = "mpc85xx-pci-edac" + }, + {} +}; + +static struct platform_driver mpc85xx_pci_err_driver = { + .probe = mpc85xx_pci_err_probe, + .id_table = mpc85xx_pci_err_match, + .driver = { + .name = "mpc85xx_pci_err", + .suppress_bind_attrs = true, + }, +}; #endif /* CONFIG_PCI */ /**************************** L2 Err device ***************************/ @@ -1208,10 +1233,18 @@ static void __init mpc85xx_mc_clear_rfxe(void *data) } #endif +static struct platform_driver * const drivers[] = { + &mpc85xx_mc_err_driver, + &mpc85xx_l2_err_driver, +#ifdef CONFIG_PCI + &mpc85xx_pci_err_driver, +#endif +}; + static int __init mpc85xx_mc_init(void) { int res = 0; - u32 pvr = 0; + u32 __maybe_unused pvr = 0; printk(KERN_INFO "Freescale(R) MPC85xx EDAC driver, " "(C) 2006 Montavista Software\n"); @@ -1226,13 +1259,9 @@ static int __init mpc85xx_mc_init(void) break; } - res = platform_driver_register(&mpc85xx_mc_err_driver); - if (res) - printk(KERN_WARNING EDAC_MOD_STR "MC fails to register\n"); - - res = platform_driver_register(&mpc85xx_l2_err_driver); + res = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); if (res) - printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n"); + printk(KERN_WARNING EDAC_MOD_STR "drivers fail to register\n"); #ifdef CONFIG_FSL_SOC_BOOKE pvr = mfspr(SPRN_PVR); @@ -1270,8 +1299,7 @@ static void __exit mpc85xx_mc_exit(void) on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); } #endif - platform_driver_unregister(&mpc85xx_l2_err_driver); - platform_driver_unregister(&mpc85xx_mc_err_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(mpc85xx_mc_exit); diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 0574e1bbe45c..6c54127e6eae 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -847,6 +847,15 @@ static struct platform_driver mv64x60_mc_err_driver = { } }; +static struct platform_driver * const drivers[] = { + &mv64x60_mc_err_driver, + &mv64x60_cpu_err_driver, + &mv64x60_sram_err_driver, +#ifdef CONFIG_PCI + &mv64x60_pci_err_driver, +#endif +}; + static int __init mv64x60_edac_init(void) { int ret = 0; @@ -863,39 +872,13 @@ static int __init mv64x60_edac_init(void) break; } - ret = platform_driver_register(&mv64x60_mc_err_driver); - if (ret) - printk(KERN_WARNING EDAC_MOD_STR "MC err failed to register\n"); - - ret = platform_driver_register(&mv64x60_cpu_err_driver); - if (ret) - printk(KERN_WARNING EDAC_MOD_STR - "CPU err failed to register\n"); - - ret = platform_driver_register(&mv64x60_sram_err_driver); - if (ret) - printk(KERN_WARNING EDAC_MOD_STR - "SRAM err failed to register\n"); - -#ifdef CONFIG_PCI - ret = platform_driver_register(&mv64x60_pci_err_driver); - if (ret) - printk(KERN_WARNING EDAC_MOD_STR - "PCI err failed to register\n"); -#endif - - return ret; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } module_init(mv64x60_edac_init); static void __exit mv64x60_edac_exit(void) { -#ifdef CONFIG_PCI - platform_driver_unregister(&mv64x60_pci_err_driver); -#endif - platform_driver_unregister(&mv64x60_sram_err_driver); - platform_driver_unregister(&mv64x60_cpu_err_driver); - platform_driver_unregister(&mv64x60_mc_err_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(mv64x60_edac_exit); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 429309c62699..93f0d4120289 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -65,15 +65,20 @@ static const u32 ibridge_dram_rule[] = { 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, }; -#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff) -#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3) -#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1) +static const u32 knl_dram_rule[] = { + 0x60, 0x68, 0x70, 0x78, 0x80, /* 0-4 */ + 0x88, 0x90, 0x98, 0xa0, 0xa8, /* 5-9 */ + 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, /* 10-14 */ + 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, /* 15-19 */ + 0x100, 0x108, 0x110, 0x118, /* 20-23 */ +}; + #define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0) #define A7MODE(reg) GET_BITFIELD(reg, 26, 26) -static char *get_dram_attr(u32 reg) +static char *show_dram_attr(u32 attr) { - switch(DRAM_ATTR(reg)) { + switch (attr) { case 0: return "DRAM"; case 1: @@ -97,6 +102,14 @@ static const u32 ibridge_interleave_list[] = { 0xdc, 0xe4, 0xec, 0xf4, 0xfc, }; +static const u32 knl_interleave_list[] = { + 0x64, 0x6c, 0x74, 0x7c, 0x84, /* 0-4 */ + 0x8c, 0x94, 0x9c, 0xa4, 0xac, /* 5-9 */ + 0xb4, 0xbc, 0xc4, 0xcc, 0xd4, /* 10-14 */ + 0xdc, 0xe4, 0xec, 0xf4, 0xfc, /* 15-19 */ + 0x104, 0x10c, 0x114, 0x11c, /* 20-23 */ +}; + struct interleave_pkg { unsigned char start; unsigned char end; @@ -134,10 +147,13 @@ static inline int sad_pkg(const struct interleave_pkg *table, u32 reg, /* Devices 12 Function 7 */ #define TOLM 0x80 -#define TOHM 0x84 +#define TOHM 0x84 #define HASWELL_TOLM 0xd0 #define HASWELL_TOHM_0 0xd4 #define HASWELL_TOHM_1 0xd8 +#define KNL_TOLM 0xd0 +#define KNL_TOHM_0 0xd4 +#define KNL_TOHM_1 0xd8 #define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff) #define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff) @@ -148,6 +164,8 @@ static inline int sad_pkg(const struct interleave_pkg *table, u32 reg, #define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11) +#define SOURCE_ID_KNL(reg) GET_BITFIELD(reg, 12, 14) + #define SAD_CONTROL 0xf4 /* Device 14 function 0 */ @@ -170,6 +188,7 @@ static const u32 tad_dram_rule[] = { /* Device 15, function 0 */ #define MCMTR 0x7c +#define KNL_MCMTR 0x624 #define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2) #define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1) @@ -186,6 +205,8 @@ static const int mtr_regs[] = { 0x80, 0x84, 0x88, }; +static const int knl_mtr_reg = 0xb60; + #define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19) #define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14) #define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13) @@ -256,6 +277,9 @@ static const u32 correrrthrsld[] = { #define NUM_CHANNELS 8 /* 2MC per socket, four chan per MC */ #define MAX_DIMMS 3 /* Max DIMMS per channel */ +#define KNL_MAX_CHAS 38 /* KNL max num. of Cache Home Agents */ +#define KNL_MAX_CHANNELS 6 /* KNL max num. of PCI channels */ +#define KNL_MAX_EDCS 8 /* Embedded DRAM controllers */ #define CHANNEL_UNSPECIFIED 0xf /* Intel IA32 SDM 15-14 */ enum type { @@ -263,6 +287,7 @@ enum type { IVY_BRIDGE, HASWELL, BROADWELL, + KNIGHTS_LANDING, }; struct sbridge_pvt; @@ -273,6 +298,10 @@ struct sbridge_info { u64 (*get_tolm)(struct sbridge_pvt *pvt); u64 (*get_tohm)(struct sbridge_pvt *pvt); u64 (*rir_limit)(u32 reg); + u64 (*sad_limit)(u32 reg); + u32 (*interleave_mode)(u32 reg); + char* (*show_interleave_mode)(u32 reg); + u32 (*dram_attr)(u32 reg); const u32 *dram_rule; const u32 *interleave_list; const struct interleave_pkg *interleave_pkg; @@ -308,6 +337,16 @@ struct sbridge_dev { struct mem_ctl_info *mci; }; +struct knl_pvt { + struct pci_dev *pci_cha[KNL_MAX_CHAS]; + struct pci_dev *pci_channel[KNL_MAX_CHANNELS]; + struct pci_dev *pci_mc0; + struct pci_dev *pci_mc1; + struct pci_dev *pci_mc0_misc; + struct pci_dev *pci_mc1_misc; + struct pci_dev *pci_mc_info; /* tolm, tohm */ +}; + struct sbridge_pvt { struct pci_dev *pci_ta, *pci_ddrio, *pci_ras; struct pci_dev *pci_sad0, *pci_sad1; @@ -336,6 +375,7 @@ struct sbridge_pvt { /* Memory description */ u64 tolm, tohm; + struct knl_pvt knl; }; #define PCI_DESCR(device_id, opt) \ @@ -509,6 +549,50 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = { {0,} /* 0 terminated list. */ }; +/* Knight's Landing Support */ +/* + * KNL's memory channels are swizzled between memory controllers. + * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2 + */ +#define knl_channel_remap(channel) ((channel + 3) % 6) + +/* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840 +/* DRAM channel stuff; bank addrs, dimmmtr, etc.. 2-8-2 - 2-9-4 (6 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL 0x7843 +/* kdrwdbu TAD limits/offsets, MCMTR - 2-10-1, 2-11-1 (2 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_TA 0x7844 +/* CHA broadcast registers, dram rules - 1-29-0 (1 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0 0x782a +/* SAD target - 1-29-1 (1 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1 0x782b +/* Caching / Home Agent */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHA 0x782c +/* Device with TOLM and TOHM, 0-5-0 (1 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM 0x7810 + +/* + * KNL differs from SB, IB, and Haswell in that it has multiple + * instances of the same device with the same device ID, so we handle that + * by creating as many copies in the table as we expect to find. + * (Like device ID must be grouped together.) + */ + +static const struct pci_id_descr pci_dev_descr_knl[] = { + [0] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0, 0) }, + [1] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1, 0) }, + [2 ... 3] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC, 0)}, + [4 ... 41] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA, 0) }, + [42 ... 47] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL, 0) }, + [48] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA, 0) }, + [49] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0) }, +}; + +static const struct pci_id_table pci_dev_descr_knl_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_knl), + {0,} +}; + /* * Broadwell support * @@ -585,6 +669,7 @@ static const struct pci_device_id sbridge_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0)}, {0,} /* 0 terminated list. */ }; @@ -598,7 +683,7 @@ static inline int numrank(enum type type, u32 mtr) int ranks = (1 << RANK_CNT_BITS(mtr)); int max = 4; - if (type == HASWELL || type == BROADWELL) + if (type == HASWELL || type == BROADWELL || type == KNIGHTS_LANDING) max = 8; if (ranks > max) { @@ -636,10 +721,19 @@ static inline int numcol(u32 mtr) return 1 << cols; } -static struct sbridge_dev *get_sbridge_dev(u8 bus) +static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus) { struct sbridge_dev *sbridge_dev; + /* + * If we have devices scattered across several busses that pertain + * to the same memory controller, we'll lump them all together. + */ + if (multi_bus) { + return list_first_entry_or_null(&sbridge_edac_list, + struct sbridge_dev, list); + } + list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { if (sbridge_dev->bus == bus) return sbridge_dev; @@ -718,6 +812,67 @@ static u64 rir_limit(u32 reg) return ((u64)GET_BITFIELD(reg, 1, 10) << 29) | 0x1fffffff; } +static u64 sad_limit(u32 reg) +{ + return (GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff; +} + +static u32 interleave_mode(u32 reg) +{ + return GET_BITFIELD(reg, 1, 1); +} + +char *show_interleave_mode(u32 reg) +{ + return interleave_mode(reg) ? "8:6" : "[8:6]XOR[18:16]"; +} + +static u32 dram_attr(u32 reg) +{ + return GET_BITFIELD(reg, 2, 3); +} + +static u64 knl_sad_limit(u32 reg) +{ + return (GET_BITFIELD(reg, 7, 26) << 26) | 0x3ffffff; +} + +static u32 knl_interleave_mode(u32 reg) +{ + return GET_BITFIELD(reg, 1, 2); +} + +static char *knl_show_interleave_mode(u32 reg) +{ + char *s; + + switch (knl_interleave_mode(reg)) { + case 0: + s = "use address bits [8:6]"; + break; + case 1: + s = "use address bits [10:8]"; + break; + case 2: + s = "use address bits [14:12]"; + break; + case 3: + s = "use address bits [32:30]"; + break; + default: + WARN_ON(1); + break; + } + + return s; +} + +static u32 dram_attr_knl(u32 reg) +{ + return GET_BITFIELD(reg, 3, 4); +} + + static enum mem_type get_memory_type(struct sbridge_pvt *pvt) { u32 reg; @@ -769,6 +924,12 @@ out: return mtype; } +static enum dev_type knl_get_width(struct sbridge_pvt *pvt, u32 mtr) +{ + /* for KNL value is fixed */ + return DEV_X16; +} + static enum dev_type sbridge_get_width(struct sbridge_pvt *pvt, u32 mtr) { /* there's no way to figure out */ @@ -812,6 +973,12 @@ static enum dev_type broadwell_get_width(struct sbridge_pvt *pvt, u32 mtr) return __ibridge_get_width(GET_BITFIELD(mtr, 8, 9)); } +static enum mem_type knl_get_memory_type(struct sbridge_pvt *pvt) +{ + /* DDR4 RDIMMS and LRDIMMS are supported */ + return MEM_RDDR4; +} + static u8 get_node_id(struct sbridge_pvt *pvt) { u32 reg; @@ -827,6 +994,15 @@ static u8 haswell_get_node_id(struct sbridge_pvt *pvt) return GET_BITFIELD(reg, 0, 3); } +static u8 knl_get_node_id(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_sad1, SAD_CONTROL, ®); + return GET_BITFIELD(reg, 0, 2); +} + + static u64 haswell_get_tolm(struct sbridge_pvt *pvt) { u32 reg; @@ -848,6 +1024,26 @@ static u64 haswell_get_tohm(struct sbridge_pvt *pvt) return rc | 0x1ffffff; } +static u64 knl_get_tolm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOLM, ®); + return (GET_BITFIELD(reg, 26, 31) << 26) | 0x3ffffff; +} + +static u64 knl_get_tohm(struct sbridge_pvt *pvt) +{ + u64 rc; + u32 reg_lo, reg_hi; + + pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOHM_0, ®_lo); + pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOHM_1, ®_hi); + rc = ((u64)reg_hi << 32) | reg_lo; + return rc | 0x3ffffff; +} + + static u64 haswell_rir_limit(u32 reg) { return (((u64)GET_BITFIELD(reg, 1, 11) + 1) << 29) - 1; @@ -905,11 +1101,22 @@ static int check_if_ecc_is_active(const u8 bus, enum type type) case BROADWELL: id = PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA; break; + case KNIGHTS_LANDING: + /* + * KNL doesn't group things by bus the same way + * SB/IB/Haswell does. + */ + id = PCI_DEVICE_ID_INTEL_KNL_IMC_TA; + break; default: return -ENODEV; } - pdev = get_pdev_same_bus(bus, id); + if (type != KNIGHTS_LANDING) + pdev = get_pdev_same_bus(bus, id); + else + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, 0); + if (!pdev) { sbridge_printk(KERN_ERR, "Couldn't find PCI device " "%04x:%04x! on bus %02d\n", @@ -917,7 +1124,8 @@ static int check_if_ecc_is_active(const u8 bus, enum type type) return -ENODEV; } - pci_read_config_dword(pdev, MCMTR, &mcmtr); + pci_read_config_dword(pdev, + type == KNIGHTS_LANDING ? KNL_MCMTR : MCMTR, &mcmtr); if (!IS_ECC_ENABLED(mcmtr)) { sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); return -ENODEV; @@ -925,6 +1133,476 @@ static int check_if_ecc_is_active(const u8 bus, enum type type) return 0; } +/* Low bits of TAD limit, and some metadata. */ +static const u32 knl_tad_dram_limit_lo[] = { + 0x400, 0x500, 0x600, 0x700, + 0x800, 0x900, 0xa00, 0xb00, +}; + +/* Low bits of TAD offset. */ +static const u32 knl_tad_dram_offset_lo[] = { + 0x404, 0x504, 0x604, 0x704, + 0x804, 0x904, 0xa04, 0xb04, +}; + +/* High 16 bits of TAD limit and offset. */ +static const u32 knl_tad_dram_hi[] = { + 0x408, 0x508, 0x608, 0x708, + 0x808, 0x908, 0xa08, 0xb08, +}; + +/* Number of ways a tad entry is interleaved. */ +static const u32 knl_tad_ways[] = { + 8, 6, 4, 3, 2, 1, +}; + +/* + * Retrieve the n'th Target Address Decode table entry + * from the memory controller's TAD table. + * + * @pvt: driver private data + * @entry: which entry you want to retrieve + * @mc: which memory controller (0 or 1) + * @offset: output tad range offset + * @limit: output address of first byte above tad range + * @ways: output number of interleave ways + * + * The offset value has curious semantics. It's a sort of running total + * of the sizes of all the memory regions that aren't mapped in this + * tad table. + */ +static int knl_get_tad(const struct sbridge_pvt *pvt, + const int entry, + const int mc, + u64 *offset, + u64 *limit, + int *ways) +{ + u32 reg_limit_lo, reg_offset_lo, reg_hi; + struct pci_dev *pci_mc; + int way_id; + + switch (mc) { + case 0: + pci_mc = pvt->knl.pci_mc0; + break; + case 1: + pci_mc = pvt->knl.pci_mc1; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + pci_read_config_dword(pci_mc, + knl_tad_dram_limit_lo[entry], ®_limit_lo); + pci_read_config_dword(pci_mc, + knl_tad_dram_offset_lo[entry], ®_offset_lo); + pci_read_config_dword(pci_mc, + knl_tad_dram_hi[entry], ®_hi); + + /* Is this TAD entry enabled? */ + if (!GET_BITFIELD(reg_limit_lo, 0, 0)) + return -ENODEV; + + way_id = GET_BITFIELD(reg_limit_lo, 3, 5); + + if (way_id < ARRAY_SIZE(knl_tad_ways)) { + *ways = knl_tad_ways[way_id]; + } else { + *ways = 0; + sbridge_printk(KERN_ERR, + "Unexpected value %d in mc_tad_limit_lo wayness field\n", + way_id); + return -ENODEV; + } + + /* + * The least significant 6 bits of base and limit are truncated. + * For limit, we fill the missing bits with 1s. + */ + *offset = ((u64) GET_BITFIELD(reg_offset_lo, 6, 31) << 6) | + ((u64) GET_BITFIELD(reg_hi, 0, 15) << 32); + *limit = ((u64) GET_BITFIELD(reg_limit_lo, 6, 31) << 6) | 63 | + ((u64) GET_BITFIELD(reg_hi, 16, 31) << 32); + + return 0; +} + +/* Determine which memory controller is responsible for a given channel. */ +static int knl_channel_mc(int channel) +{ + WARN_ON(channel < 0 || channel >= 6); + + return channel < 3 ? 1 : 0; +} + +/* + * Get the Nth entry from EDC_ROUTE_TABLE register. + * (This is the per-tile mapping of logical interleave targets to + * physical EDC modules.) + * + * entry 0: 0:2 + * 1: 3:5 + * 2: 6:8 + * 3: 9:11 + * 4: 12:14 + * 5: 15:17 + * 6: 18:20 + * 7: 21:23 + * reserved: 24:31 + */ +static u32 knl_get_edc_route(int entry, u32 reg) +{ + WARN_ON(entry >= KNL_MAX_EDCS); + return GET_BITFIELD(reg, entry*3, (entry*3)+2); +} + +/* + * Get the Nth entry from MC_ROUTE_TABLE register. + * (This is the per-tile mapping of logical interleave targets to + * physical DRAM channels modules.) + * + * entry 0: mc 0:2 channel 18:19 + * 1: mc 3:5 channel 20:21 + * 2: mc 6:8 channel 22:23 + * 3: mc 9:11 channel 24:25 + * 4: mc 12:14 channel 26:27 + * 5: mc 15:17 channel 28:29 + * reserved: 30:31 + * + * Though we have 3 bits to identify the MC, we should only see + * the values 0 or 1. + */ + +static u32 knl_get_mc_route(int entry, u32 reg) +{ + int mc, chan; + + WARN_ON(entry >= KNL_MAX_CHANNELS); + + mc = GET_BITFIELD(reg, entry*3, (entry*3)+2); + chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1); + + return knl_channel_remap(mc*3 + chan); +} + +/* + * Render the EDC_ROUTE register in human-readable form. + * Output string s should be at least KNL_MAX_EDCS*2 bytes. + */ +static void knl_show_edc_route(u32 reg, char *s) +{ + int i; + + for (i = 0; i < KNL_MAX_EDCS; i++) { + s[i*2] = knl_get_edc_route(i, reg) + '0'; + s[i*2+1] = '-'; + } + + s[KNL_MAX_EDCS*2 - 1] = '\0'; +} + +/* + * Render the MC_ROUTE register in human-readable form. + * Output string s should be at least KNL_MAX_CHANNELS*2 bytes. + */ +static void knl_show_mc_route(u32 reg, char *s) +{ + int i; + + for (i = 0; i < KNL_MAX_CHANNELS; i++) { + s[i*2] = knl_get_mc_route(i, reg) + '0'; + s[i*2+1] = '-'; + } + + s[KNL_MAX_CHANNELS*2 - 1] = '\0'; +} + +#define KNL_EDC_ROUTE 0xb8 +#define KNL_MC_ROUTE 0xb4 + +/* Is this dram rule backed by regular DRAM in flat mode? */ +#define KNL_EDRAM(reg) GET_BITFIELD(reg, 29, 29) + +/* Is this dram rule cached? */ +#define KNL_CACHEABLE(reg) GET_BITFIELD(reg, 28, 28) + +/* Is this rule backed by edc ? */ +#define KNL_EDRAM_ONLY(reg) GET_BITFIELD(reg, 29, 29) + +/* Is this rule backed by DRAM, cacheable in EDRAM? */ +#define KNL_CACHEABLE(reg) GET_BITFIELD(reg, 28, 28) + +/* Is this rule mod3? */ +#define KNL_MOD3(reg) GET_BITFIELD(reg, 27, 27) + +/* + * Figure out how big our RAM modules are. + * + * The DIMMMTR register in KNL doesn't tell us the size of the DIMMs, so we + * have to figure this out from the SAD rules, interleave lists, route tables, + * and TAD rules. + * + * SAD rules can have holes in them (e.g. the 3G-4G hole), so we have to + * inspect the TAD rules to figure out how large the SAD regions really are. + * + * When we know the real size of a SAD region and how many ways it's + * interleaved, we know the individual contribution of each channel to + * TAD is size/ways. + * + * Finally, we have to check whether each channel participates in each SAD + * region. + * + * Fortunately, KNL only supports one DIMM per channel, so once we know how + * much memory the channel uses, we know the DIMM is at least that large. + * (The BIOS might possibly choose not to map all available memory, in which + * case we will underreport the size of the DIMM.) + * + * In theory, we could try to determine the EDC sizes as well, but that would + * only work in flat mode, not in cache mode. + * + * @mc_sizes: Output sizes of channels (must have space for KNL_MAX_CHANNELS + * elements) + */ +static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes) +{ + u64 sad_base, sad_size, sad_limit = 0; + u64 tad_base, tad_size, tad_limit, tad_deadspace, tad_livespace; + int sad_rule = 0; + int tad_rule = 0; + int intrlv_ways, tad_ways; + u32 first_pkg, pkg; + int i; + u64 sad_actual_size[2]; /* sad size accounting for holes, per mc */ + u32 dram_rule, interleave_reg; + u32 mc_route_reg[KNL_MAX_CHAS]; + u32 edc_route_reg[KNL_MAX_CHAS]; + int edram_only; + char edc_route_string[KNL_MAX_EDCS*2]; + char mc_route_string[KNL_MAX_CHANNELS*2]; + int cur_reg_start; + int mc; + int channel; + int way; + int participants[KNL_MAX_CHANNELS]; + int participant_count = 0; + + for (i = 0; i < KNL_MAX_CHANNELS; i++) + mc_sizes[i] = 0; + + /* Read the EDC route table in each CHA. */ + cur_reg_start = 0; + for (i = 0; i < KNL_MAX_CHAS; i++) { + pci_read_config_dword(pvt->knl.pci_cha[i], + KNL_EDC_ROUTE, &edc_route_reg[i]); + + if (i > 0 && edc_route_reg[i] != edc_route_reg[i-1]) { + knl_show_edc_route(edc_route_reg[i-1], + edc_route_string); + if (cur_reg_start == i-1) + edac_dbg(0, "edc route table for CHA %d: %s\n", + cur_reg_start, edc_route_string); + else + edac_dbg(0, "edc route table for CHA %d-%d: %s\n", + cur_reg_start, i-1, edc_route_string); + cur_reg_start = i; + } + } + knl_show_edc_route(edc_route_reg[i-1], edc_route_string); + if (cur_reg_start == i-1) + edac_dbg(0, "edc route table for CHA %d: %s\n", + cur_reg_start, edc_route_string); + else + edac_dbg(0, "edc route table for CHA %d-%d: %s\n", + cur_reg_start, i-1, edc_route_string); + + /* Read the MC route table in each CHA. */ + cur_reg_start = 0; + for (i = 0; i < KNL_MAX_CHAS; i++) { + pci_read_config_dword(pvt->knl.pci_cha[i], + KNL_MC_ROUTE, &mc_route_reg[i]); + + if (i > 0 && mc_route_reg[i] != mc_route_reg[i-1]) { + knl_show_mc_route(mc_route_reg[i-1], mc_route_string); + if (cur_reg_start == i-1) + edac_dbg(0, "mc route table for CHA %d: %s\n", + cur_reg_start, mc_route_string); + else + edac_dbg(0, "mc route table for CHA %d-%d: %s\n", + cur_reg_start, i-1, mc_route_string); + cur_reg_start = i; + } + } + knl_show_mc_route(mc_route_reg[i-1], mc_route_string); + if (cur_reg_start == i-1) + edac_dbg(0, "mc route table for CHA %d: %s\n", + cur_reg_start, mc_route_string); + else + edac_dbg(0, "mc route table for CHA %d-%d: %s\n", + cur_reg_start, i-1, mc_route_string); + + /* Process DRAM rules */ + for (sad_rule = 0; sad_rule < pvt->info.max_sad; sad_rule++) { + /* previous limit becomes the new base */ + sad_base = sad_limit; + + pci_read_config_dword(pvt->pci_sad0, + pvt->info.dram_rule[sad_rule], &dram_rule); + + if (!DRAM_RULE_ENABLE(dram_rule)) + break; + + edram_only = KNL_EDRAM_ONLY(dram_rule); + + sad_limit = pvt->info.sad_limit(dram_rule)+1; + sad_size = sad_limit - sad_base; + + pci_read_config_dword(pvt->pci_sad0, + pvt->info.interleave_list[sad_rule], &interleave_reg); + + /* + * Find out how many ways this dram rule is interleaved. + * We stop when we see the first channel again. + */ + first_pkg = sad_pkg(pvt->info.interleave_pkg, + interleave_reg, 0); + for (intrlv_ways = 1; intrlv_ways < 8; intrlv_ways++) { + pkg = sad_pkg(pvt->info.interleave_pkg, + interleave_reg, intrlv_ways); + + if ((pkg & 0x8) == 0) { + /* + * 0 bit means memory is non-local, + * which KNL doesn't support + */ + edac_dbg(0, "Unexpected interleave target %d\n", + pkg); + return -1; + } + + if (pkg == first_pkg) + break; + } + if (KNL_MOD3(dram_rule)) + intrlv_ways *= 3; + + edac_dbg(3, "dram rule %d (base 0x%llx, limit 0x%llx), %d way interleave%s\n", + sad_rule, + sad_base, + sad_limit, + intrlv_ways, + edram_only ? ", EDRAM" : ""); + + /* + * Find out how big the SAD region really is by iterating + * over TAD tables (SAD regions may contain holes). + * Each memory controller might have a different TAD table, so + * we have to look at both. + * + * Livespace is the memory that's mapped in this TAD table, + * deadspace is the holes (this could be the MMIO hole, or it + * could be memory that's mapped by the other TAD table but + * not this one). + */ + for (mc = 0; mc < 2; mc++) { + sad_actual_size[mc] = 0; + tad_livespace = 0; + for (tad_rule = 0; + tad_rule < ARRAY_SIZE( + knl_tad_dram_limit_lo); + tad_rule++) { + if (knl_get_tad(pvt, + tad_rule, + mc, + &tad_deadspace, + &tad_limit, + &tad_ways)) + break; + + tad_size = (tad_limit+1) - + (tad_livespace + tad_deadspace); + tad_livespace += tad_size; + tad_base = (tad_limit+1) - tad_size; + + if (tad_base < sad_base) { + if (tad_limit > sad_base) + edac_dbg(0, "TAD region overlaps lower SAD boundary -- TAD tables may be configured incorrectly.\n"); + } else if (tad_base < sad_limit) { + if (tad_limit+1 > sad_limit) { + edac_dbg(0, "TAD region overlaps upper SAD boundary -- TAD tables may be configured incorrectly.\n"); + } else { + /* TAD region is completely inside SAD region */ + edac_dbg(3, "TAD region %d 0x%llx - 0x%llx (%lld bytes) table%d\n", + tad_rule, tad_base, + tad_limit, tad_size, + mc); + sad_actual_size[mc] += tad_size; + } + } + tad_base = tad_limit+1; + } + } + + for (mc = 0; mc < 2; mc++) { + edac_dbg(3, " total TAD DRAM footprint in table%d : 0x%llx (%lld bytes)\n", + mc, sad_actual_size[mc], sad_actual_size[mc]); + } + + /* Ignore EDRAM rule */ + if (edram_only) + continue; + + /* Figure out which channels participate in interleave. */ + for (channel = 0; channel < KNL_MAX_CHANNELS; channel++) + participants[channel] = 0; + + /* For each channel, does at least one CHA have + * this channel mapped to the given target? + */ + for (channel = 0; channel < KNL_MAX_CHANNELS; channel++) { + for (way = 0; way < intrlv_ways; way++) { + int target; + int cha; + + if (KNL_MOD3(dram_rule)) + target = way; + else + target = 0x7 & sad_pkg( + pvt->info.interleave_pkg, interleave_reg, way); + + for (cha = 0; cha < KNL_MAX_CHAS; cha++) { + if (knl_get_mc_route(target, + mc_route_reg[cha]) == channel + && !participants[channel]) { + participant_count++; + participants[channel] = 1; + break; + } + } + } + } + + if (participant_count != intrlv_ways) + edac_dbg(0, "participant_count (%d) != interleave_ways (%d): DIMM size may be incorrect\n", + participant_count, intrlv_ways); + + for (channel = 0; channel < KNL_MAX_CHANNELS; channel++) { + mc = knl_channel_mc(channel); + if (participants[channel]) { + edac_dbg(4, "mc channel %d contributes %lld bytes via sad entry %d\n", + channel, + sad_actual_size[mc]/intrlv_ways, + sad_rule); + mc_sizes[channel] += + sad_actual_size[mc]/intrlv_ways; + } + } + } + + return 0; +} + static int get_dimm_config(struct mem_ctl_info *mci) { struct sbridge_pvt *pvt = mci->pvt_info; @@ -934,13 +1612,20 @@ static int get_dimm_config(struct mem_ctl_info *mci) u32 reg; enum edac_type mode; enum mem_type mtype; + int channels = pvt->info.type == KNIGHTS_LANDING ? + KNL_MAX_CHANNELS : NUM_CHANNELS; + u64 knl_mc_sizes[KNL_MAX_CHANNELS]; - if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) + if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL || + pvt->info.type == KNIGHTS_LANDING) pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, ®); else pci_read_config_dword(pvt->pci_br0, SAD_TARGET, ®); - pvt->sbridge_dev->source_id = SOURCE_ID(reg); + if (pvt->info.type == KNIGHTS_LANDING) + pvt->sbridge_dev->source_id = SOURCE_ID_KNL(reg); + else + pvt->sbridge_dev->source_id = SOURCE_ID(reg); pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt); edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n", @@ -948,31 +1633,42 @@ static int get_dimm_config(struct mem_ctl_info *mci) pvt->sbridge_dev->node_id, pvt->sbridge_dev->source_id); - pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); - if (IS_MIRROR_ENABLED(reg)) { - edac_dbg(0, "Memory mirror is enabled\n"); - pvt->is_mirrored = true; - } else { - edac_dbg(0, "Memory mirror is disabled\n"); + /* KNL doesn't support mirroring or lockstep, + * and is always closed page + */ + if (pvt->info.type == KNIGHTS_LANDING) { + mode = EDAC_S4ECD4ED; pvt->is_mirrored = false; - } - pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); - if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { - edac_dbg(0, "Lockstep is enabled\n"); - mode = EDAC_S8ECD8ED; - pvt->is_lockstep = true; - } else { - edac_dbg(0, "Lockstep is disabled\n"); - mode = EDAC_S4ECD4ED; - pvt->is_lockstep = false; - } - if (IS_CLOSE_PG(pvt->info.mcmtr)) { - edac_dbg(0, "address map is on closed page mode\n"); - pvt->is_close_pg = true; + if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0) + return -1; } else { - edac_dbg(0, "address map is on open page mode\n"); - pvt->is_close_pg = false; + pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); + if (IS_MIRROR_ENABLED(reg)) { + edac_dbg(0, "Memory mirror is enabled\n"); + pvt->is_mirrored = true; + } else { + edac_dbg(0, "Memory mirror is disabled\n"); + pvt->is_mirrored = false; + } + + pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); + if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { + edac_dbg(0, "Lockstep is enabled\n"); + mode = EDAC_S8ECD8ED; + pvt->is_lockstep = true; + } else { + edac_dbg(0, "Lockstep is disabled\n"); + mode = EDAC_S4ECD4ED; + pvt->is_lockstep = false; + } + if (IS_CLOSE_PG(pvt->info.mcmtr)) { + edac_dbg(0, "address map is on closed page mode\n"); + pvt->is_close_pg = true; + } else { + edac_dbg(0, "address map is on open page mode\n"); + pvt->is_close_pg = false; + } } mtype = pvt->info.get_memory_type(pvt); @@ -988,23 +1684,46 @@ static int get_dimm_config(struct mem_ctl_info *mci) else banks = 8; - for (i = 0; i < NUM_CHANNELS; i++) { + for (i = 0; i < channels; i++) { u32 mtr; - if (!pvt->pci_tad[i]) - continue; - for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { + int max_dimms_per_channel; + + if (pvt->info.type == KNIGHTS_LANDING) { + max_dimms_per_channel = 1; + if (!pvt->knl.pci_channel[i]) + continue; + } else { + max_dimms_per_channel = ARRAY_SIZE(mtr_regs); + if (!pvt->pci_tad[i]) + continue; + } + + for (j = 0; j < max_dimms_per_channel; j++) { dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0); - pci_read_config_dword(pvt->pci_tad[i], - mtr_regs[j], &mtr); + if (pvt->info.type == KNIGHTS_LANDING) { + pci_read_config_dword(pvt->knl.pci_channel[i], + knl_mtr_reg, &mtr); + } else { + pci_read_config_dword(pvt->pci_tad[i], + mtr_regs[j], &mtr); + } edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); if (IS_DIMM_PRESENT(mtr)) { pvt->channel[i].dimms++; ranks = numrank(pvt->info.type, mtr); - rows = numrow(mtr); - cols = numcol(mtr); + + if (pvt->info.type == KNIGHTS_LANDING) { + /* For DDR4, this is fixed. */ + cols = 1 << 10; + rows = knl_mc_sizes[i] / + ((u64) cols * ranks * banks * 8); + } else { + rows = numrow(mtr); + cols = numcol(mtr); + } size = ((u64)rows * cols * banks * ranks) >> (20 - 3); npages = MiB_TO_PAGES(size); @@ -1069,7 +1788,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) /* SAD_LIMIT Address range is 45:26 */ pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], ®); - limit = SAD_LIMIT(reg); + limit = pvt->info.sad_limit(reg); if (!DRAM_RULE_ENABLE(reg)) continue; @@ -1081,10 +1800,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci) gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n", n_sads, - get_dram_attr(reg), + show_dram_attr(pvt->info.dram_attr(reg)), gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]", + pvt->info.show_interleave_mode(reg), reg); prv = limit; @@ -1101,6 +1820,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) } } + if (pvt->info.type == KNIGHTS_LANDING) + return; + /* * Step 3) Get TAD range */ @@ -1117,8 +1839,8 @@ static void get_memory_layout(const struct mem_ctl_info *mci) edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", n_tads, gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - (u32)TAD_SOCK(reg), - (u32)TAD_CH(reg), + (u32)(1 << TAD_SOCK(reg)), + (u32)TAD_CH(reg) + 1, (u32)TAD_TGT0(reg), (u32)TAD_TGT1(reg), (u32)TAD_TGT2(reg), @@ -1248,7 +1970,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (!DRAM_RULE_ENABLE(reg)) continue; - limit = SAD_LIMIT(reg); + limit = pvt->info.sad_limit(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory socket"); return -EINVAL; @@ -1262,8 +1984,8 @@ static int get_memory_error_data(struct mem_ctl_info *mci, return -EINVAL; } dram_rule = reg; - *area_type = get_dram_attr(dram_rule); - interleave_mode = INTERLEAVE_MODE(dram_rule); + *area_type = show_dram_attr(pvt->info.dram_attr(dram_rule)); + interleave_mode = pvt->info.interleave_mode(dram_rule); pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], ®); @@ -1396,7 +2118,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } ch_way = TAD_CH(reg) + 1; - sck_way = TAD_SOCK(reg) + 1; + sck_way = 1 << TAD_SOCK(reg); if (ch_way == 3) idx = addr >> 6; @@ -1453,7 +2175,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, n_tads, addr, limit, - (u32)TAD_SOCK(reg), + sck_way, ch_way, offset, idx, @@ -1468,18 +2190,12 @@ static int get_memory_error_data(struct mem_ctl_info *mci, offset, addr); return -EINVAL; } - addr -= offset; - /* Store the low bits [0:6] of the addr */ - ch_addr = addr & 0x7f; - /* Remove socket wayness and remove 6 bits */ - addr >>= 6; - addr = div_u64(addr, sck_xch); -#if 0 - /* Divide by channel way */ - addr = addr / ch_way; -#endif - /* Recover the last 6 bits */ - ch_addr |= addr << 6; + + ch_addr = addr - offset; + ch_addr >>= (6 + shiftup); + ch_addr /= ch_way * sck_way; + ch_addr <<= (6 + shiftup); + ch_addr |= addr & ((1 << (6 + shiftup)) - 1); /* * Step 3) Decode rank @@ -1567,7 +2283,8 @@ static void sbridge_put_all_devices(void) static int sbridge_get_onedevice(struct pci_dev **prev, u8 *num_mc, const struct pci_id_table *table, - const unsigned devno) + const unsigned devno, + const int multi_bus) { struct sbridge_dev *sbridge_dev; const struct pci_id_descr *dev_descr = &table->descr[devno]; @@ -1603,7 +2320,7 @@ static int sbridge_get_onedevice(struct pci_dev **prev, } bus = pdev->bus->number; - sbridge_dev = get_sbridge_dev(bus); + sbridge_dev = get_sbridge_dev(bus, multi_bus); if (!sbridge_dev) { sbridge_dev = alloc_sbridge_dev(bus, table); if (!sbridge_dev) { @@ -1652,21 +2369,32 @@ static int sbridge_get_onedevice(struct pci_dev **prev, * @num_mc: pointer to the memory controllers count, to be incremented in case * of success. * @table: model specific table + * @allow_dups: allow for multiple devices to exist with the same device id + * (as implemented, this isn't expected to work correctly in the + * multi-socket case). + * @multi_bus: don't assume devices on different buses belong to different + * memory controllers. * * returns 0 in case of success or error code */ -static int sbridge_get_all_devices(u8 *num_mc, - const struct pci_id_table *table) +static int sbridge_get_all_devices_full(u8 *num_mc, + const struct pci_id_table *table, + int allow_dups, + int multi_bus) { int i, rc; struct pci_dev *pdev = NULL; while (table && table->descr) { for (i = 0; i < table->n_devs; i++) { - pdev = NULL; + if (!allow_dups || i == 0 || + table->descr[i].dev_id != + table->descr[i-1].dev_id) { + pdev = NULL; + } do { rc = sbridge_get_onedevice(&pdev, num_mc, - table, i); + table, i, multi_bus); if (rc < 0) { if (i == 0) { i = table->n_devs; @@ -1675,7 +2403,7 @@ static int sbridge_get_all_devices(u8 *num_mc, sbridge_put_all_devices(); return -ENODEV; } - } while (pdev); + } while (pdev && !allow_dups); } table++; } @@ -1683,6 +2411,11 @@ static int sbridge_get_all_devices(u8 *num_mc, return 0; } +#define sbridge_get_all_devices(num_mc, table) \ + sbridge_get_all_devices_full(num_mc, table, 0, 0) +#define sbridge_get_all_devices_knl(num_mc, table) \ + sbridge_get_all_devices_full(num_mc, table, 1, 1) + static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, struct sbridge_dev *sbridge_dev) { @@ -2038,6 +2771,131 @@ enodev: return -ENODEV; } +static int knl_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + int dev, func; + + int i; + int devidx; + + for (i = 0; i < sbridge_dev->n_devs; i++) { + pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + + /* Extract PCI device and function. */ + dev = (pdev->devfn >> 3) & 0x1f; + func = pdev->devfn & 0x7; + + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_KNL_IMC_MC: + if (dev == 8) + pvt->knl.pci_mc0 = pdev; + else if (dev == 9) + pvt->knl.pci_mc1 = pdev; + else { + sbridge_printk(KERN_ERR, + "Memory controller in unexpected place! (dev %d, fn %d)\n", + dev, func); + continue; + } + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0: + pvt->pci_sad0 = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1: + pvt->pci_sad1 = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_CHA: + /* There are one of these per tile, and range from + * 1.14.0 to 1.18.5. + */ + devidx = ((dev-14)*8)+func; + + if (devidx < 0 || devidx >= KNL_MAX_CHAS) { + sbridge_printk(KERN_ERR, + "Caching and Home Agent in unexpected place! (dev %d, fn %d)\n", + dev, func); + continue; + } + + WARN_ON(pvt->knl.pci_cha[devidx] != NULL); + + pvt->knl.pci_cha[devidx] = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL: + devidx = -1; + + /* + * MC0 channels 0-2 are device 9 function 2-4, + * MC1 channels 3-5 are device 8 function 2-4. + */ + + if (dev == 9) + devidx = func-2; + else if (dev == 8) + devidx = 3 + (func-2); + + if (devidx < 0 || devidx >= KNL_MAX_CHANNELS) { + sbridge_printk(KERN_ERR, + "DRAM Channel Registers in unexpected place! (dev %d, fn %d)\n", + dev, func); + continue; + } + + WARN_ON(pvt->knl.pci_channel[devidx] != NULL); + pvt->knl.pci_channel[devidx] = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM: + pvt->knl.pci_mc_info = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_TA: + pvt->pci_ta = pdev; + break; + + default: + sbridge_printk(KERN_ERR, "Unexpected device %d\n", + pdev->device); + break; + } + } + + if (!pvt->knl.pci_mc0 || !pvt->knl.pci_mc1 || + !pvt->pci_sad0 || !pvt->pci_sad1 || + !pvt->pci_ta) { + goto enodev; + } + + for (i = 0; i < KNL_MAX_CHANNELS; i++) { + if (!pvt->knl.pci_channel[i]) { + sbridge_printk(KERN_ERR, "Missing channel %d\n", i); + goto enodev; + } + } + + for (i = 0; i < KNL_MAX_CHAS; i++) { + if (!pvt->knl.pci_cha[i]) { + sbridge_printk(KERN_ERR, "Missing CHA %d\n", i); + goto enodev; + } + } + + return 0; + +enodev: + sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); + return -ENODEV; +} + /**************************************************************************** Error check routines ****************************************************************************/ @@ -2127,8 +2985,36 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, if (!GET_BITFIELD(m->status, 58, 58)) return; - rc = get_memory_error_data(mci, m->addr, &socket, &ha, - &channel_mask, &rank, &area_type, msg); + if (pvt->info.type == KNIGHTS_LANDING) { + if (channel == 14) { + edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) + ? " recoverable" : "", + mscod, errcode, + m->bank); + } else { + char A = *("A"); + + channel = knl_channel_remap(channel); + channel_mask = 1 << channel; + snprintf(msg, sizeof(msg), + "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) + ? " recoverable" : " ", + mscod, errcode, channel, A + channel); + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + channel, 0, -1, + optype, msg); + } + return; + } else { + rc = get_memory_error_data(mci, m->addr, &socket, &ha, + &channel_mask, &rank, &area_type, msg); + } + if (rc < 0) goto err_parsing; new_mci = get_mci_for_node_id(socket); @@ -2359,10 +3245,11 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) /* allocate a new MC control structure */ layers[0].type = EDAC_MC_LAYER_CHANNEL; - layers[0].size = NUM_CHANNELS; + layers[0].size = type == KNIGHTS_LANDING ? + KNL_MAX_CHANNELS : NUM_CHANNELS; layers[0].is_virt_csrow = false; layers[1].type = EDAC_MC_LAYER_SLOT; - layers[1].size = MAX_DIMMS; + layers[1].size = type == KNIGHTS_LANDING ? 1 : MAX_DIMMS; layers[1].is_virt_csrow = true; mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, sizeof(*pvt)); @@ -2380,7 +3267,8 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->sbridge_dev = sbridge_dev; sbridge_dev->mci = mci; - mci->mtype_cap = MEM_FLAG_DDR3; + mci->mtype_cap = type == KNIGHTS_LANDING ? + MEM_FLAG_DDR4 : MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "sbridge_edac.c"; @@ -2401,6 +3289,10 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; pvt->info.rir_limit = rir_limit; + pvt->info.sad_limit = sad_limit; + pvt->info.interleave_mode = interleave_mode; + pvt->info.show_interleave_mode = show_interleave_mode; + pvt->info.dram_attr = dram_attr; pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); pvt->info.interleave_list = ibridge_interleave_list; pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); @@ -2421,6 +3313,10 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; pvt->info.rir_limit = rir_limit; + pvt->info.sad_limit = sad_limit; + pvt->info.interleave_mode = interleave_mode; + pvt->info.show_interleave_mode = show_interleave_mode; + pvt->info.dram_attr = dram_attr; pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule); pvt->info.interleave_list = sbridge_interleave_list; pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list); @@ -2441,6 +3337,10 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; pvt->info.rir_limit = haswell_rir_limit; + pvt->info.sad_limit = sad_limit; + pvt->info.interleave_mode = interleave_mode; + pvt->info.show_interleave_mode = show_interleave_mode; + pvt->info.dram_attr = dram_attr; pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); pvt->info.interleave_list = ibridge_interleave_list; pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); @@ -2461,6 +3361,10 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; pvt->info.rir_limit = haswell_rir_limit; + pvt->info.sad_limit = sad_limit; + pvt->info.interleave_mode = interleave_mode; + pvt->info.show_interleave_mode = show_interleave_mode; + pvt->info.dram_attr = dram_attr; pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); pvt->info.interleave_list = ibridge_interleave_list; pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); @@ -2473,6 +3377,30 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) if (unlikely(rc < 0)) goto fail0; break; + case KNIGHTS_LANDING: + /* pvt->info.rankcfgr == ??? */ + pvt->info.get_tolm = knl_get_tolm; + pvt->info.get_tohm = knl_get_tohm; + pvt->info.dram_rule = knl_dram_rule; + pvt->info.get_memory_type = knl_get_memory_type; + pvt->info.get_node_id = knl_get_node_id; + pvt->info.rir_limit = NULL; + pvt->info.sad_limit = knl_sad_limit; + pvt->info.interleave_mode = knl_interleave_mode; + pvt->info.show_interleave_mode = knl_show_interleave_mode; + pvt->info.dram_attr = dram_attr_knl; + pvt->info.max_sad = ARRAY_SIZE(knl_dram_rule); + pvt->info.interleave_list = knl_interleave_list; + pvt->info.max_interleave = ARRAY_SIZE(knl_interleave_list); + pvt->info.interleave_pkg = ibridge_interleave_pkg; + pvt->info.get_width = knl_get_width; + mci->ctl_name = kasprintf(GFP_KERNEL, + "Knights Landing Socket#%d", mci->mc_idx); + + rc = knl_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + break; } /* Get dimm basic config and the memory layout */ @@ -2527,20 +3455,29 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) switch (pdev->device) { case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: - rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_ibridge_table); + rc = sbridge_get_all_devices(&num_mc, + pci_dev_descr_ibridge_table); type = IVY_BRIDGE; break; case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_sbridge_table); + rc = sbridge_get_all_devices(&num_mc, + pci_dev_descr_sbridge_table); type = SANDY_BRIDGE; break; case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_haswell_table); + rc = sbridge_get_all_devices(&num_mc, + pci_dev_descr_haswell_table); type = HASWELL; break; case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_broadwell_table); + rc = sbridge_get_all_devices(&num_mc, + pci_dev_descr_broadwell_table); type = BROADWELL; + break; + case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0: + rc = sbridge_get_all_devices_knl(&num_mc, + pci_dev_descr_knl_table); + type = KNIGHTS_LANDING; break; } if (unlikely(rc < 0)) { diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c new file mode 100644 index 000000000000..1b8c07e44fd8 --- /dev/null +++ b/drivers/edac/wq.c @@ -0,0 +1,42 @@ +#include "edac_module.h" + +static struct workqueue_struct *wq; + +bool edac_queue_work(struct delayed_work *work, unsigned long delay) +{ + return queue_delayed_work(wq, work, delay); +} +EXPORT_SYMBOL_GPL(edac_queue_work); + +bool edac_mod_work(struct delayed_work *work, unsigned long delay) +{ + return mod_delayed_work(wq, work, delay); +} +EXPORT_SYMBOL_GPL(edac_mod_work); + +bool edac_stop_work(struct delayed_work *work) +{ + bool ret; + + ret = cancel_delayed_work_sync(work); + flush_workqueue(wq); + + return ret; +} +EXPORT_SYMBOL_GPL(edac_stop_work); + +int edac_workqueue_setup(void) +{ + wq = create_singlethread_workqueue("edac-poller"); + if (!wq) + return -ENODEV; + else + return 0; +} + +void edac_workqueue_teardown(void) +{ + flush_workqueue(wq); + destroy_workqueue(wq); + wq = NULL; +} diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 41f876414a18..bf19b6e3bd12 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -61,6 +61,7 @@ struct xgene_edac { struct regmap *mcba_map; struct regmap *mcbb_map; struct regmap *efuse_map; + struct regmap *rb_map; void __iomem *pcp_csr; spinlock_t lock; struct dentry *dfs; @@ -1057,7 +1058,7 @@ static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr) case 0x041: return true; } - } else if (L3C_ELR_ERRSYN(l3celr) == 9) + } else if (L3C_ELR_ERRWAY(l3celr) == 9) return true; return false; @@ -1353,6 +1354,17 @@ static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3) #define GLBL_MDED_ERRH 0x0848 #define GLBL_MDED_ERRHMASK 0x084c +/* IO Bus Registers */ +#define RBCSR 0x0000 +#define STICKYERR_MASK BIT(0) +#define RBEIR 0x0008 +#define AGENT_OFFLINE_ERR_MASK BIT(30) +#define UNIMPL_RBPAGE_ERR_MASK BIT(29) +#define WORD_ALIGNED_ERR_MASK BIT(28) +#define PAGE_ACCESS_ERR_MASK BIT(27) +#define WRITE_ACCESS_MASK BIT(26) +#define RBERRADDR_RD(src) ((src) & 0x03FFFFFF) + static const char * const soc_mem_err_v1[] = { "10GbE0", "10GbE1", @@ -1470,6 +1482,51 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) u32 err_addr_hi; u32 reg; + /* If the register bus resource isn't available, just skip it */ + if (!ctx->edac->rb_map) + goto rb_skip; + + /* + * Check RB access errors + * 1. Out of range + * 2. Un-implemented page + * 3. Un-aligned access + * 4. Offline slave IP + */ + if (regmap_read(ctx->edac->rb_map, RBCSR, ®)) + return; + if (reg & STICKYERR_MASK) { + bool write; + u32 address; + + dev_err(edac_dev->dev, "IOB bus access error(s)\n"); + if (regmap_read(ctx->edac->rb_map, RBEIR, ®)) + return; + write = reg & WRITE_ACCESS_MASK ? 1 : 0; + address = RBERRADDR_RD(reg); + if (reg & AGENT_OFFLINE_ERR_MASK) + dev_err(edac_dev->dev, + "IOB bus %s access to offline agent error\n", + write ? "write" : "read"); + if (reg & UNIMPL_RBPAGE_ERR_MASK) + dev_err(edac_dev->dev, + "IOB bus %s access to unimplemented page error\n", + write ? "write" : "read"); + if (reg & WORD_ALIGNED_ERR_MASK) + dev_err(edac_dev->dev, + "IOB bus %s word aligned access error\n", + write ? "write" : "read"); + if (reg & PAGE_ACCESS_ERR_MASK) + dev_err(edac_dev->dev, + "IOB bus %s to page out of range access error\n", + write ? "write" : "read"); + if (regmap_write(ctx->edac->rb_map, RBEIR, 0)) + return; + if (regmap_write(ctx->edac->rb_map, RBCSR, 0)) + return; + } +rb_skip: + /* IOB Bridge agent transaction error interrupt */ reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS); if (!reg) @@ -1852,6 +1909,17 @@ static int xgene_edac_probe(struct platform_device *pdev) goto out_err; } + /* + * NOTE: The register bus resource is optional for compatibility + * reason. + */ + edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-rb"); + if (IS_ERR(edac->rb_map)) { + dev_warn(edac->dev, "missing syscon regmap rb\n"); + edac->rb_map = NULL; + } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(edac->pcp_csr)) { |