diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats | 18 | ||||
-rw-r--r-- | drivers/firmware/efi/cper.c | 4 | ||||
-rw-r--r-- | drivers/pci/pci.c | 28 | ||||
-rw-r--r-- | drivers/pci/pci.h | 2 | ||||
-rw-r--r-- | drivers/pci/pcie/aer.c | 20 | ||||
-rw-r--r-- | drivers/pci/pcie/dpc.c | 14 | ||||
-rw-r--r-- | drivers/pci/pcie/err.c | 20 | ||||
-rw-r--r-- | include/linux/aer.h | 11 | ||||
-rw-r--r-- | include/ras/ras_event.h | 10 |
9 files changed, 80 insertions, 47 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats index 860db53037a5..d1f67bb81d5d 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats @@ -11,7 +11,7 @@ saw any problems). What: /sys/bus/pci/devices/<dev>/aer_dev_correctable Date: July 2018 -KernelVersion: 4.19.0 +KernelVersion: 4.19.0 Contact: linux-pci@vger.kernel.org, rajatja@google.com Description: List of correctable errors seen and reported by this PCI device using ERR_COR. Note that since multiple errors may @@ -32,7 +32,7 @@ Description: List of correctable errors seen and reported by this What: /sys/bus/pci/devices/<dev>/aer_dev_fatal Date: July 2018 -KernelVersion: 4.19.0 +KernelVersion: 4.19.0 Contact: linux-pci@vger.kernel.org, rajatja@google.com Description: List of uncorrectable fatal errors seen and reported by this PCI device using ERR_FATAL. Note that since multiple errors may @@ -62,7 +62,7 @@ Description: List of uncorrectable fatal errors seen and reported by this What: /sys/bus/pci/devices/<dev>/aer_dev_nonfatal Date: July 2018 -KernelVersion: 4.19.0 +KernelVersion: 4.19.0 Contact: linux-pci@vger.kernel.org, rajatja@google.com Description: List of uncorrectable nonfatal errors seen and reported by this PCI device using ERR_NONFATAL. Note that since multiple errors @@ -100,20 +100,20 @@ collectors) that are AER capable. These indicate the number of error messages as device, so these counters include them and are thus cumulative of all the error messages on the PCI hierarchy originating at that root port. -What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor +What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_cor Date: July 2018 -KernelVersion: 4.19.0 +KernelVersion: 4.19.0 Contact: linux-pci@vger.kernel.org, rajatja@google.com Description: Total number of ERR_COR messages reported to rootport. -What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal +What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_fatal Date: July 2018 -KernelVersion: 4.19.0 +KernelVersion: 4.19.0 Contact: linux-pci@vger.kernel.org, rajatja@google.com Description: Total number of ERR_FATAL messages reported to rootport. -What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal +What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_nonfatal Date: July 2018 -KernelVersion: 4.19.0 +KernelVersion: 4.19.0 Contact: linux-pci@vger.kernel.org, rajatja@google.com Description: Total number of ERR_NONFATAL messages reported to rootport. diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 35c37f667781..d3f98161171e 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -445,8 +445,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, printk("%saer_uncor_severity: 0x%08x\n", pfx, aer->uncor_severity); printk("%sTLP Header: %08x %08x %08x %08x\n", pfx, - aer->header_log.dw0, aer->header_log.dw1, - aer->header_log.dw2, aer->header_log.dw3); + aer->header_log.dw[0], aer->header_log.dw[1], + aer->header_log.dw[2], aer->header_log.dw[3]); } } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5a4b501a3f41..3125bf29506c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1068,6 +1068,34 @@ disable_acs_redir: } /** + * pcie_read_tlp_log - read TLP Header Log + * @dev: PCIe device + * @where: PCI Config offset of TLP Header Log + * @tlp_log: TLP Log structure to fill + * + * Fill @tlp_log from TLP Header Log registers, e.g., AER or DPC. + * + * Return: 0 on success and filled TLP Log structure, <0 on error. + */ +int pcie_read_tlp_log(struct pci_dev *dev, int where, + struct pcie_tlp_log *tlp_log) +{ + int i, ret; + + memset(tlp_log, 0, sizeof(*tlp_log)); + + for (i = 0; i < 4; i++) { + ret = pci_read_config_dword(dev, where + i * 4, + &tlp_log->dw[i]); + if (ret) + return pcibios_err_to_errno(ret); + } + + return 0; +} +EXPORT_SYMBOL_GPL(pcie_read_tlp_log); + +/** * pci_restore_bars - restore a device's BAR values (e.g. after wake-up) * @dev: PCI device to have its BARs restored * diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index eca5938deb07..ba7543cc9de0 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -408,7 +408,7 @@ struct aer_err_info { unsigned int status; /* COR/UNCOR Error Status */ unsigned int mask; /* COR/UNCOR Error Mask */ - struct aer_header_log_regs tlp; /* TLP Header */ + struct pcie_tlp_log tlp; /* TLP Header */ }; int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 05fc30bb5134..ac6293c24976 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -664,11 +664,10 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, } } -static void __print_tlp_header(struct pci_dev *dev, - struct aer_header_log_regs *t) +static void __print_tlp_header(struct pci_dev *dev, struct pcie_tlp_log *t) { pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", - t->dw0, t->dw1, t->dw2, t->dw3); + t->dw[0], t->dw[1], t->dw[2], t->dw[3]); } static void __aer_print_error(struct pci_dev *dev, @@ -1210,7 +1209,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) { int type = pci_pcie_type(dev); int aer = dev->aer_cap; - int temp; + u32 aercc; /* Must reset in this function */ info->status = 0; @@ -1241,19 +1240,12 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) return 0; /* Get First Error Pointer */ - pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp); - info->first_error = PCI_ERR_CAP_FEP(temp); + pci_read_config_dword(dev, aer + PCI_ERR_CAP, &aercc); + info->first_error = PCI_ERR_CAP_FEP(aercc); if (info->status & AER_LOG_TLP_MASKS) { info->tlp_header_valid = 1; - pci_read_config_dword(dev, - aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0); - pci_read_config_dword(dev, - aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); - pci_read_config_dword(dev, - aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); - pci_read_config_dword(dev, - aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); + pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, &info->tlp); } } diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 94111e438241..c197bc7f7f2c 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -190,7 +190,8 @@ out: static void dpc_process_rp_pio_error(struct pci_dev *pdev) { u16 cap = pdev->dpc_cap, dpc_status, first_error; - u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix; + u32 status, mask, sev, syserr, exc, log, prefix; + struct pcie_tlp_log tlp_log; int i; pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, &status); @@ -216,16 +217,9 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) if (pdev->dpc_rp_log_size < 4) goto clear_status; - pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, - &dw0); - pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4, - &dw1); - pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 8, - &dw2); - pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 12, - &dw3); + pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, &tlp_log); pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n", - dw0, dw1, dw2, dw3); + tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], tlp_log.dw[3]); if (pdev->dpc_rp_log_size < 5) goto clear_status; diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 59c90d04a609..705893b5f7b0 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -13,6 +13,7 @@ #define dev_fmt(fmt) "AER: " fmt #include <linux/pci.h> +#include <linux/pm_runtime.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -85,6 +86,18 @@ static int report_error_detected(struct pci_dev *dev, return 0; } +static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data) +{ + pm_runtime_get_sync(&pdev->dev); + return 0; +} + +static int pci_pm_runtime_put(struct pci_dev *pdev, void *data) +{ + pm_runtime_put(&pdev->dev); + return 0; +} + static int report_frozen_detected(struct pci_dev *dev, void *data) { return report_error_detected(dev, pci_channel_io_frozen, data); @@ -207,6 +220,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, else bridge = pci_upstream_bridge(dev); + pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL); + pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); @@ -251,10 +266,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pcie_clear_device_status(dev); pci_aer_clear_nonfatal_status(dev); } + + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_info(bridge, "device recovery successful\n"); return status; failed: + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ diff --git a/include/linux/aer.h b/include/linux/aer.h index ae0fae70d4bd..4b97f38f3fcf 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -18,11 +18,8 @@ struct pci_dev; -struct aer_header_log_regs { - u32 dw0; - u32 dw1; - u32 dw2; - u32 dw3; +struct pcie_tlp_log { + u32 dw[4]; }; struct aer_capability_regs { @@ -33,13 +30,15 @@ struct aer_capability_regs { u32 cor_status; u32 cor_mask; u32 cap_control; - struct aer_header_log_regs header_log; + struct pcie_tlp_log header_log; u32 root_command; u32 root_status; u16 cor_err_source; u16 uncor_err_source; }; +int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log); + #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index cbd3ddd7c33d..c011ea236e9b 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -300,7 +300,7 @@ TRACE_EVENT(aer_event, const u32 status, const u8 severity, const u8 tlp_header_valid, - struct aer_header_log_regs *tlp), + struct pcie_tlp_log *tlp), TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), @@ -318,10 +318,10 @@ TRACE_EVENT(aer_event, __entry->severity = severity; __entry->tlp_header_valid = tlp_header_valid; if (tlp_header_valid) { - __entry->tlp_header[0] = tlp->dw0; - __entry->tlp_header[1] = tlp->dw1; - __entry->tlp_header[2] = tlp->dw2; - __entry->tlp_header[3] = tlp->dw3; + __entry->tlp_header[0] = tlp->dw[0]; + __entry->tlp_header[1] = tlp->dw[1]; + __entry->tlp_header[2] = tlp->dw[2]; + __entry->tlp_header[3] = tlp->dw[3]; } ), |