From 9249dee611d6624bc9044fdf3877ace67d6143ab Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 2 Jan 2018 11:25:32 +0000 Subject: dt-bindings: Document devicetree binding for ARM DSU PMU This patch documents the devicetree bindings for ARM DSU PMU. Cc: Mark Rutland Cc: Will Deacon Cc: devicetree@vger.kernel.org Cc: frowand.list@gmail.com Acked-by: Rob Herring Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- .../devicetree/bindings/arm/arm-dsu-pmu.txt | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt new file mode 100644 index 000000000000..6efabba530f1 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt @@ -0,0 +1,27 @@ +* ARM DynamIQ Shared Unit (DSU) Performance Monitor Unit (PMU) + +ARM DyanmIQ Shared Unit (DSU) integrates one or more CPU cores +with a shared L3 memory system, control logic and external interfaces to +form a multicore cluster. The PMU enables to gather various statistics on +the operations of the DSU. The PMU provides independent 32bit counters that +can count any of the supported events, along with a 64bit cycle counter. +The PMU is accessed via CPU system registers and has no MMIO component. + +** DSU PMU required properties: + +- compatible : should be one of : + + "arm,dsu-pmu" + +- interrupts : Exactly 1 SPI must be listed. + +- cpus : List of phandles for the CPUs connected to this DSU instance. + + +** Example: + +dsu-pmu-0 { + compatible = "arm,dsu-pmu"; + interrupts = ; + cpus = <&cpu_0>, <&cpu_1>; +}; -- cgit v1.2.3-58-ga151 From 7520fa99246dade7ab6dde1573a146beed632abd Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 2 Jan 2018 11:25:33 +0000 Subject: perf: ARM DynamIQ Shared Unit PMU support Add support for the Cluster PMU part of the ARM DynamIQ Shared Unit (DSU). The DSU integrates one or more cores with an L3 memory system, control logic, and external interfaces to form a multicore cluster. The PMU allows counting the various events related to L3, SCU etc, along with providing a cycle counter. The PMU can be accessed via system registers, which are common to the cores in the same cluster. The PMU registers follow the semantics of the ARMv8 PMU, mostly, with the exception that the counters record the cluster wide events. This driver is mostly based on the ARMv8 and CCI PMU drivers. The driver only supports ARM64 at the moment. It can be extended to support ARM32 by providing register accessors like we do in arch/arm64/include/arm_dsu_pmu.h. Cc: Mark Rutland Cc: Will Deacon Reviewed-by: Jonathan Cameron Reviewed-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- Documentation/perf/arm_dsu_pmu.txt | 28 ++ arch/arm64/include/asm/arm_dsu_pmu.h | 129 ++++++ drivers/perf/Kconfig | 9 + drivers/perf/Makefile | 1 + drivers/perf/arm_dsu_pmu.c | 843 +++++++++++++++++++++++++++++++++++ 5 files changed, 1010 insertions(+) create mode 100644 Documentation/perf/arm_dsu_pmu.txt create mode 100644 arch/arm64/include/asm/arm_dsu_pmu.h create mode 100644 drivers/perf/arm_dsu_pmu.c (limited to 'Documentation') diff --git a/Documentation/perf/arm_dsu_pmu.txt b/Documentation/perf/arm_dsu_pmu.txt new file mode 100644 index 000000000000..d611e15f5add --- /dev/null +++ b/Documentation/perf/arm_dsu_pmu.txt @@ -0,0 +1,28 @@ +ARM DynamIQ Shared Unit (DSU) PMU +================================== + +ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system, +control logic and external interfaces to form a multicore cluster. The PMU +allows counting the various events related to the L3 cache, Snoop Control Unit +etc, using 32bit independent counters. It also provides a 64bit cycle counter. + +The PMU can only be accessed via CPU system registers and are common to the +cores connected to the same DSU. Like most of the other uncore PMUs, DSU +PMU doesn't support process specific events and cannot be used in sampling mode. + +The DSU provides a bitmap for a subset of implemented events via hardware +registers. There is no way for the driver to determine if the other events +are available or not. Hence the driver exposes only those events advertised +by the DSU, in "events" directory under : + + /sys/bus/event_sources/devices/arm_dsu_/ + +The user should refer to the TRM of the product to figure out the supported events +and use the raw event code for the unlisted events. + +The driver also exposes the CPUs connected to the DSU instance in "associated_cpus". + + +e.g usage : + + perf stat -a -e arm_dsu_0/cycles/ diff --git a/arch/arm64/include/asm/arm_dsu_pmu.h b/arch/arm64/include/asm/arm_dsu_pmu.h new file mode 100644 index 000000000000..82e5cc3356bf --- /dev/null +++ b/arch/arm64/include/asm/arm_dsu_pmu.h @@ -0,0 +1,129 @@ +/* + * ARM DynamIQ Shared Unit (DSU) PMU Low level register access routines. + * + * Copyright (C) ARM Limited, 2017. + * + * Author: Suzuki K Poulose + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2, as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + + +#define CLUSTERPMCR_EL1 sys_reg(3, 0, 15, 5, 0) +#define CLUSTERPMCNTENSET_EL1 sys_reg(3, 0, 15, 5, 1) +#define CLUSTERPMCNTENCLR_EL1 sys_reg(3, 0, 15, 5, 2) +#define CLUSTERPMOVSSET_EL1 sys_reg(3, 0, 15, 5, 3) +#define CLUSTERPMOVSCLR_EL1 sys_reg(3, 0, 15, 5, 4) +#define CLUSTERPMSELR_EL1 sys_reg(3, 0, 15, 5, 5) +#define CLUSTERPMINTENSET_EL1 sys_reg(3, 0, 15, 5, 6) +#define CLUSTERPMINTENCLR_EL1 sys_reg(3, 0, 15, 5, 7) +#define CLUSTERPMCCNTR_EL1 sys_reg(3, 0, 15, 6, 0) +#define CLUSTERPMXEVTYPER_EL1 sys_reg(3, 0, 15, 6, 1) +#define CLUSTERPMXEVCNTR_EL1 sys_reg(3, 0, 15, 6, 2) +#define CLUSTERPMMDCR_EL1 sys_reg(3, 0, 15, 6, 3) +#define CLUSTERPMCEID0_EL1 sys_reg(3, 0, 15, 6, 4) +#define CLUSTERPMCEID1_EL1 sys_reg(3, 0, 15, 6, 5) + +static inline u32 __dsu_pmu_read_pmcr(void) +{ + return read_sysreg_s(CLUSTERPMCR_EL1); +} + +static inline void __dsu_pmu_write_pmcr(u32 val) +{ + write_sysreg_s(val, CLUSTERPMCR_EL1); + isb(); +} + +static inline u32 __dsu_pmu_get_reset_overflow(void) +{ + u32 val = read_sysreg_s(CLUSTERPMOVSCLR_EL1); + /* Clear the bit */ + write_sysreg_s(val, CLUSTERPMOVSCLR_EL1); + isb(); + return val; +} + +static inline void __dsu_pmu_select_counter(int counter) +{ + write_sysreg_s(counter, CLUSTERPMSELR_EL1); + isb(); +} + +static inline u64 __dsu_pmu_read_counter(int counter) +{ + __dsu_pmu_select_counter(counter); + return read_sysreg_s(CLUSTERPMXEVCNTR_EL1); +} + +static inline void __dsu_pmu_write_counter(int counter, u64 val) +{ + __dsu_pmu_select_counter(counter); + write_sysreg_s(val, CLUSTERPMXEVCNTR_EL1); + isb(); +} + +static inline void __dsu_pmu_set_event(int counter, u32 event) +{ + __dsu_pmu_select_counter(counter); + write_sysreg_s(event, CLUSTERPMXEVTYPER_EL1); + isb(); +} + +static inline u64 __dsu_pmu_read_pmccntr(void) +{ + return read_sysreg_s(CLUSTERPMCCNTR_EL1); +} + +static inline void __dsu_pmu_write_pmccntr(u64 val) +{ + write_sysreg_s(val, CLUSTERPMCCNTR_EL1); + isb(); +} + +static inline void __dsu_pmu_disable_counter(int counter) +{ + write_sysreg_s(BIT(counter), CLUSTERPMCNTENCLR_EL1); + isb(); +} + +static inline void __dsu_pmu_enable_counter(int counter) +{ + write_sysreg_s(BIT(counter), CLUSTERPMCNTENSET_EL1); + isb(); +} + +static inline void __dsu_pmu_counter_interrupt_enable(int counter) +{ + write_sysreg_s(BIT(counter), CLUSTERPMINTENSET_EL1); + isb(); +} + +static inline void __dsu_pmu_counter_interrupt_disable(int counter) +{ + write_sysreg_s(BIT(counter), CLUSTERPMINTENCLR_EL1); + isb(); +} + + +static inline u32 __dsu_pmu_read_pmceid(int n) +{ + switch (n) { + case 0: + return read_sysreg_s(CLUSTERPMCEID0_EL1); + case 1: + return read_sysreg_s(CLUSTERPMCEID1_EL1); + default: + BUILD_BUG(); + return 0; + } +} diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index b8f44b068fc6..da5724cd89cf 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -17,6 +17,15 @@ config ARM_PMU_ACPI depends on ARM_PMU && ACPI def_bool y +config ARM_DSU_PMU + tristate "ARM DynamIQ Shared Unit (DSU) PMU" + depends on ARM64 + help + Provides support for performance monitor unit in ARM DynamIQ Shared + Unit (DSU). The DSU integrates one or more cores with an L3 memory + system, control logic. The PMU allows counting various events related + to DSU. + config HISI_PMU bool "HiSilicon SoC PMU" depends on ARM64 && ACPI diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 710a0135bd61..c2f27419bdf0 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_HISI_PMU) += hisilicon/ diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c new file mode 100644 index 000000000000..37c0526c93d5 --- /dev/null +++ b/drivers/perf/arm_dsu_pmu.c @@ -0,0 +1,843 @@ +/* + * ARM DynamIQ Shared Unit (DSU) PMU driver + * + * Copyright (C) ARM Limited, 2017. + * + * Based on ARM CCI-PMU, ARMv8 PMU-v3 drivers. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#define PMUNAME "arm_dsu" +#define DRVNAME PMUNAME "_pmu" +#define pr_fmt(fmt) DRVNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* PMU event codes */ +#define DSU_PMU_EVT_CYCLES 0x11 +#define DSU_PMU_EVT_CHAIN 0x1e + +#define DSU_PMU_MAX_COMMON_EVENTS 0x40 + +#define DSU_PMU_MAX_HW_CNTRS 32 +#define DSU_PMU_HW_COUNTER_MASK (DSU_PMU_MAX_HW_CNTRS - 1) + +#define CLUSTERPMCR_E BIT(0) +#define CLUSTERPMCR_P BIT(1) +#define CLUSTERPMCR_C BIT(2) +#define CLUSTERPMCR_N_SHIFT 11 +#define CLUSTERPMCR_N_MASK 0x1f +#define CLUSTERPMCR_IDCODE_SHIFT 16 +#define CLUSTERPMCR_IDCODE_MASK 0xff +#define CLUSTERPMCR_IMP_SHIFT 24 +#define CLUSTERPMCR_IMP_MASK 0xff +#define CLUSTERPMCR_RES_MASK 0x7e8 +#define CLUSTERPMCR_RES_VAL 0x40 + +#define DSU_ACTIVE_CPU_MASK 0x0 +#define DSU_ASSOCIATED_CPU_MASK 0x1 + +/* + * We use the index of the counters as they appear in the counter + * bit maps in the PMU registers (e.g CLUSTERPMSELR). + * i.e, + * counter 0 - Bit 0 + * counter 1 - Bit 1 + * ... + * Cycle counter - Bit 31 + */ +#define DSU_PMU_IDX_CYCLE_COUNTER 31 + +/* All event counters are 32bit, with a 64bit Cycle counter */ +#define DSU_PMU_COUNTER_WIDTH(idx) \ + (((idx) == DSU_PMU_IDX_CYCLE_COUNTER) ? 64 : 32) + +#define DSU_PMU_COUNTER_MASK(idx) \ + GENMASK_ULL((DSU_PMU_COUNTER_WIDTH((idx)) - 1), 0) + +#define DSU_EXT_ATTR(_name, _func, _config) \ + (&((struct dev_ext_attribute[]) { \ + { \ + .attr = __ATTR(_name, 0444, _func, NULL), \ + .var = (void *)_config \ + } \ + })[0].attr.attr) + +#define DSU_EVENT_ATTR(_name, _config) \ + DSU_EXT_ATTR(_name, dsu_pmu_sysfs_event_show, (unsigned long)_config) + +#define DSU_FORMAT_ATTR(_name, _config) \ + DSU_EXT_ATTR(_name, dsu_pmu_sysfs_format_show, (char *)_config) + +#define DSU_CPUMASK_ATTR(_name, _config) \ + DSU_EXT_ATTR(_name, dsu_pmu_cpumask_show, (unsigned long)_config) + +struct dsu_hw_events { + DECLARE_BITMAP(used_mask, DSU_PMU_MAX_HW_CNTRS); + struct perf_event *events[DSU_PMU_MAX_HW_CNTRS]; +}; + +/* + * struct dsu_pmu - DSU PMU descriptor + * + * @pmu_lock : Protects accesses to DSU PMU register from normal vs + * interrupt handler contexts. + * @hw_events : Holds the event counter state. + * @associated_cpus : CPUs attached to the DSU. + * @active_cpu : CPU to which the PMU is bound for accesses. + * @cpuhp_node : Node for CPU hotplug notifier link. + * @num_counters : Number of event counters implemented by the PMU, + * excluding the cycle counter. + * @irq : Interrupt line for counter overflow. + * @cpmceid_bitmap : Bitmap for the availability of architected common + * events (event_code < 0x40). + */ +struct dsu_pmu { + struct pmu pmu; + struct device *dev; + raw_spinlock_t pmu_lock; + struct dsu_hw_events hw_events; + cpumask_t associated_cpus; + cpumask_t active_cpu; + struct hlist_node cpuhp_node; + u8 num_counters; + int irq; + DECLARE_BITMAP(cpmceid_bitmap, DSU_PMU_MAX_COMMON_EVENTS); +}; + +static unsigned long dsu_pmu_cpuhp_state; + +static inline struct dsu_pmu *to_dsu_pmu(struct pmu *pmu) +{ + return container_of(pmu, struct dsu_pmu, pmu); +} + +static ssize_t dsu_pmu_sysfs_event_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr); + return snprintf(buf, PAGE_SIZE, "event=0x%lx\n", + (unsigned long)eattr->var); +} + +static ssize_t dsu_pmu_sysfs_format_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr); + return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var); +} + +static ssize_t dsu_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu); + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr); + unsigned long mask_id = (unsigned long)eattr->var; + const cpumask_t *cpumask; + + switch (mask_id) { + case DSU_ACTIVE_CPU_MASK: + cpumask = &dsu_pmu->active_cpu; + break; + case DSU_ASSOCIATED_CPU_MASK: + cpumask = &dsu_pmu->associated_cpus; + break; + default: + return 0; + } + return cpumap_print_to_pagebuf(true, buf, cpumask); +} + +static struct attribute *dsu_pmu_format_attrs[] = { + DSU_FORMAT_ATTR(event, "config:0-31"), + NULL, +}; + +static const struct attribute_group dsu_pmu_format_attr_group = { + .name = "format", + .attrs = dsu_pmu_format_attrs, +}; + +static struct attribute *dsu_pmu_event_attrs[] = { + DSU_EVENT_ATTR(cycles, 0x11), + DSU_EVENT_ATTR(bus_access, 0x19), + DSU_EVENT_ATTR(memory_error, 0x1a), + DSU_EVENT_ATTR(bus_cycles, 0x1d), + DSU_EVENT_ATTR(l3d_cache_allocate, 0x29), + DSU_EVENT_ATTR(l3d_cache_refill, 0x2a), + DSU_EVENT_ATTR(l3d_cache, 0x2b), + DSU_EVENT_ATTR(l3d_cache_wb, 0x2c), + NULL, +}; + +static umode_t +dsu_pmu_event_attr_is_visible(struct kobject *kobj, struct attribute *attr, + int unused) +{ + struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj)); + struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu); + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr.attr); + unsigned long evt = (unsigned long)eattr->var; + + return test_bit(evt, dsu_pmu->cpmceid_bitmap) ? attr->mode : 0; +} + +static const struct attribute_group dsu_pmu_events_attr_group = { + .name = "events", + .attrs = dsu_pmu_event_attrs, + .is_visible = dsu_pmu_event_attr_is_visible, +}; + +static struct attribute *dsu_pmu_cpumask_attrs[] = { + DSU_CPUMASK_ATTR(cpumask, DSU_ACTIVE_CPU_MASK), + DSU_CPUMASK_ATTR(associated_cpus, DSU_ASSOCIATED_CPU_MASK), + NULL, +}; + +static const struct attribute_group dsu_pmu_cpumask_attr_group = { + .attrs = dsu_pmu_cpumask_attrs, +}; + +static const struct attribute_group *dsu_pmu_attr_groups[] = { + &dsu_pmu_cpumask_attr_group, + &dsu_pmu_events_attr_group, + &dsu_pmu_format_attr_group, + NULL, +}; + +static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu) +{ + struct cpumask online_supported; + + cpumask_and(&online_supported, + &dsu_pmu->associated_cpus, cpu_online_mask); + return cpumask_any_but(&online_supported, cpu); +} + +static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx) +{ + return (idx < dsu_pmu->num_counters) || + (idx == DSU_PMU_IDX_CYCLE_COUNTER); +} + +static inline u64 dsu_pmu_read_counter(struct perf_event *event) +{ + u64 val; + unsigned long flags; + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); + int idx = event->hw.idx; + + if (WARN_ON(!cpumask_test_cpu(smp_processor_id(), + &dsu_pmu->associated_cpus))) + return 0; + + if (!dsu_pmu_counter_valid(dsu_pmu, idx)) { + dev_err(event->pmu->dev, + "Trying reading invalid counter %d\n", idx); + return 0; + } + + raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags); + if (idx == DSU_PMU_IDX_CYCLE_COUNTER) + val = __dsu_pmu_read_pmccntr(); + else + val = __dsu_pmu_read_counter(idx); + raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags); + + return val; +} + +static void dsu_pmu_write_counter(struct perf_event *event, u64 val) +{ + unsigned long flags; + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); + int idx = event->hw.idx; + + if (WARN_ON(!cpumask_test_cpu(smp_processor_id(), + &dsu_pmu->associated_cpus))) + return; + + if (!dsu_pmu_counter_valid(dsu_pmu, idx)) { + dev_err(event->pmu->dev, + "writing to invalid counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags); + if (idx == DSU_PMU_IDX_CYCLE_COUNTER) + __dsu_pmu_write_pmccntr(val); + else + __dsu_pmu_write_counter(idx, val); + raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags); +} + +static int dsu_pmu_get_event_idx(struct dsu_hw_events *hw_events, + struct perf_event *event) +{ + int idx; + unsigned long evtype = event->attr.config; + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); + unsigned long *used_mask = hw_events->used_mask; + + if (evtype == DSU_PMU_EVT_CYCLES) { + if (test_and_set_bit(DSU_PMU_IDX_CYCLE_COUNTER, used_mask)) + return -EAGAIN; + return DSU_PMU_IDX_CYCLE_COUNTER; + } + + idx = find_first_zero_bit(used_mask, dsu_pmu->num_counters); + if (idx >= dsu_pmu->num_counters) + return -EAGAIN; + set_bit(idx, hw_events->used_mask); + return idx; +} + +static void dsu_pmu_enable_counter(struct dsu_pmu *dsu_pmu, int idx) +{ + __dsu_pmu_counter_interrupt_enable(idx); + __dsu_pmu_enable_counter(idx); +} + +static void dsu_pmu_disable_counter(struct dsu_pmu *dsu_pmu, int idx) +{ + __dsu_pmu_disable_counter(idx); + __dsu_pmu_counter_interrupt_disable(idx); +} + +static inline void dsu_pmu_set_event(struct dsu_pmu *dsu_pmu, + struct perf_event *event) +{ + int idx = event->hw.idx; + unsigned long flags; + + if (!dsu_pmu_counter_valid(dsu_pmu, idx)) { + dev_err(event->pmu->dev, + "Trying to set invalid counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags); + __dsu_pmu_set_event(idx, event->hw.config_base); + raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags); +} + +static void dsu_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev_count, new_count; + + do { + /* We may also be called from the irq handler */ + prev_count = local64_read(&hwc->prev_count); + new_count = dsu_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_count, new_count) != + prev_count); + delta = (new_count - prev_count) & DSU_PMU_COUNTER_MASK(hwc->idx); + local64_add(delta, &event->count); +} + +static void dsu_pmu_read(struct perf_event *event) +{ + dsu_pmu_event_update(event); +} + +static inline u32 dsu_pmu_get_reset_overflow(void) +{ + return __dsu_pmu_get_reset_overflow(); +} + +/** + * dsu_pmu_set_event_period: Set the period for the counter. + * + * All DSU PMU event counters, except the cycle counter are 32bit + * counters. To handle cases of extreme interrupt latency, we program + * the counter with half of the max count for the counters. + */ +static void dsu_pmu_set_event_period(struct perf_event *event) +{ + int idx = event->hw.idx; + u64 val = DSU_PMU_COUNTER_MASK(idx) >> 1; + + local64_set(&event->hw.prev_count, val); + dsu_pmu_write_counter(event, val); +} + +static irqreturn_t dsu_pmu_handle_irq(int irq_num, void *dev) +{ + int i; + bool handled = false; + struct dsu_pmu *dsu_pmu = dev; + struct dsu_hw_events *hw_events = &dsu_pmu->hw_events; + unsigned long overflow; + + overflow = dsu_pmu_get_reset_overflow(); + if (!overflow) + return IRQ_NONE; + + for_each_set_bit(i, &overflow, DSU_PMU_MAX_HW_CNTRS) { + struct perf_event *event = hw_events->events[i]; + + if (!event) + continue; + dsu_pmu_event_update(event); + dsu_pmu_set_event_period(event); + handled = true; + } + + return IRQ_RETVAL(handled); +} + +static void dsu_pmu_start(struct perf_event *event, int pmu_flags) +{ + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); + + /* We always reprogram the counter */ + if (pmu_flags & PERF_EF_RELOAD) + WARN_ON(!(event->hw.state & PERF_HES_UPTODATE)); + dsu_pmu_set_event_period(event); + if (event->hw.idx != DSU_PMU_IDX_CYCLE_COUNTER) + dsu_pmu_set_event(dsu_pmu, event); + event->hw.state = 0; + dsu_pmu_enable_counter(dsu_pmu, event->hw.idx); +} + +static void dsu_pmu_stop(struct perf_event *event, int pmu_flags) +{ + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); + + if (event->hw.state & PERF_HES_STOPPED) + return; + dsu_pmu_disable_counter(dsu_pmu, event->hw.idx); + dsu_pmu_event_update(event); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int dsu_pmu_add(struct perf_event *event, int flags) +{ + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); + struct dsu_hw_events *hw_events = &dsu_pmu->hw_events; + struct hw_perf_event *hwc = &event->hw; + int idx; + + if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), + &dsu_pmu->associated_cpus))) + return -ENOENT; + + idx = dsu_pmu_get_event_idx(hw_events, event); + if (idx < 0) + return idx; + + hwc->idx = idx; + hw_events->events[idx] = event; + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + if (flags & PERF_EF_START) + dsu_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + return 0; +} + +static void dsu_pmu_del(struct perf_event *event, int flags) +{ + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); + struct dsu_hw_events *hw_events = &dsu_pmu->hw_events; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + dsu_pmu_stop(event, PERF_EF_UPDATE); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + perf_event_update_userpage(event); +} + +static void dsu_pmu_enable(struct pmu *pmu) +{ + u32 pmcr; + unsigned long flags; + struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu); + + /* If no counters are added, skip enabling the PMU */ + if (bitmap_empty(dsu_pmu->hw_events.used_mask, DSU_PMU_MAX_HW_CNTRS)) + return; + + raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags); + pmcr = __dsu_pmu_read_pmcr(); + pmcr |= CLUSTERPMCR_E; + __dsu_pmu_write_pmcr(pmcr); + raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags); +} + +static void dsu_pmu_disable(struct pmu *pmu) +{ + u32 pmcr; + unsigned long flags; + struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu); + + raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags); + pmcr = __dsu_pmu_read_pmcr(); + pmcr &= ~CLUSTERPMCR_E; + __dsu_pmu_write_pmcr(pmcr); + raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags); +} + +static bool dsu_pmu_validate_event(struct pmu *pmu, + struct dsu_hw_events *hw_events, + struct perf_event *event) +{ + if (is_software_event(event)) + return true; + /* Reject groups spanning multiple HW PMUs. */ + if (event->pmu != pmu) + return false; + return dsu_pmu_get_event_idx(hw_events, event) >= 0; +} + +/* + * Make sure the group of events can be scheduled at once + * on the PMU. + */ +static bool dsu_pmu_validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct dsu_hw_events fake_hw; + + if (event->group_leader == event) + return true; + + memset(fake_hw.used_mask, 0, sizeof(fake_hw.used_mask)); + if (!dsu_pmu_validate_event(event->pmu, &fake_hw, leader)) + return false; + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!dsu_pmu_validate_event(event->pmu, &fake_hw, sibling)) + return false; + } + return dsu_pmu_validate_event(event->pmu, &fake_hw, event); +} + +static int dsu_pmu_event_init(struct perf_event *event) +{ + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* We don't support sampling */ + if (is_sampling_event(event)) { + dev_dbg(dsu_pmu->pmu.dev, "Can't support sampling events\n"); + return -EOPNOTSUPP; + } + + /* We cannot support task bound events */ + if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) { + dev_dbg(dsu_pmu->pmu.dev, "Can't support per-task counters\n"); + return -EINVAL; + } + + if (has_branch_stack(event) || + event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) { + dev_dbg(dsu_pmu->pmu.dev, "Can't support filtering\n"); + return -EINVAL; + } + + if (!cpumask_test_cpu(event->cpu, &dsu_pmu->associated_cpus)) { + dev_dbg(dsu_pmu->pmu.dev, + "Requested cpu is not associated with the DSU\n"); + return -EINVAL; + } + /* + * Choose the current active CPU to read the events. We don't want + * to migrate the event contexts, irq handling etc to the requested + * CPU. As long as the requested CPU is within the same DSU, we + * are fine. + */ + event->cpu = cpumask_first(&dsu_pmu->active_cpu); + if (event->cpu >= nr_cpu_ids) + return -EINVAL; + if (!dsu_pmu_validate_group(event)) + return -EINVAL; + + event->hw.config_base = event->attr.config; + return 0; +} + +static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev) +{ + struct dsu_pmu *dsu_pmu; + + dsu_pmu = devm_kzalloc(&pdev->dev, sizeof(*dsu_pmu), GFP_KERNEL); + if (!dsu_pmu) + return ERR_PTR(-ENOMEM); + + raw_spin_lock_init(&dsu_pmu->pmu_lock); + /* + * Initialise the number of counters to -1, until we probe + * the real number on a connected CPU. + */ + dsu_pmu->num_counters = -1; + return dsu_pmu; +} + +/** + * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster. + */ +static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask) +{ + int i = 0, n, cpu; + struct device_node *cpu_node; + + n = of_count_phandle_with_args(dev, "cpus", NULL); + if (n <= 0) + return -ENODEV; + for (; i < n; i++) { + cpu_node = of_parse_phandle(dev, "cpus", i); + if (!cpu_node) + break; + cpu = of_cpu_node_to_id(cpu_node); + of_node_put(cpu_node); + /* + * We have to ignore the failures here and continue scanning + * the list to handle cases where the nr_cpus could be capped + * in the running kernel. + */ + if (cpu < 0) + continue; + cpumask_set_cpu(cpu, mask); + } + return 0; +} + +/* + * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster. + */ +static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu) +{ + u64 num_counters; + u32 cpmceid[2]; + + num_counters = (__dsu_pmu_read_pmcr() >> CLUSTERPMCR_N_SHIFT) & + CLUSTERPMCR_N_MASK; + /* We can only support up to 31 independent counters */ + if (WARN_ON(num_counters > 31)) + num_counters = 31; + dsu_pmu->num_counters = num_counters; + if (!dsu_pmu->num_counters) + return; + cpmceid[0] = __dsu_pmu_read_pmceid(0); + cpmceid[1] = __dsu_pmu_read_pmceid(1); + bitmap_from_u32array(dsu_pmu->cpmceid_bitmap, + DSU_PMU_MAX_COMMON_EVENTS, + cpmceid, + ARRAY_SIZE(cpmceid)); +} + +static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu) +{ + cpumask_set_cpu(cpu, &dsu_pmu->active_cpu); + if (irq_set_affinity_hint(dsu_pmu->irq, &dsu_pmu->active_cpu)) + pr_warn("Failed to set irq affinity to %d\n", cpu); +} + +/* + * dsu_pmu_init_pmu: Initialise the DSU PMU configurations if + * we haven't done it already. + */ +static void dsu_pmu_init_pmu(struct dsu_pmu *dsu_pmu) +{ + if (dsu_pmu->num_counters == -1) + dsu_pmu_probe_pmu(dsu_pmu); + /* Reset the interrupt overflow mask */ + dsu_pmu_get_reset_overflow(); +} + +static int dsu_pmu_device_probe(struct platform_device *pdev) +{ + int irq, rc; + struct dsu_pmu *dsu_pmu; + char *name; + static atomic_t pmu_idx = ATOMIC_INIT(-1); + + dsu_pmu = dsu_pmu_alloc(pdev); + if (IS_ERR(dsu_pmu)) + return PTR_ERR(dsu_pmu); + + rc = dsu_pmu_dt_get_cpus(pdev->dev.of_node, &dsu_pmu->associated_cpus); + if (rc) { + dev_warn(&pdev->dev, "Failed to parse the CPUs\n"); + return rc; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_warn(&pdev->dev, "Failed to find IRQ\n"); + return -EINVAL; + } + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_%d", + PMUNAME, atomic_inc_return(&pmu_idx)); + if (!name) + return -ENOMEM; + rc = devm_request_irq(&pdev->dev, irq, dsu_pmu_handle_irq, + IRQF_NOBALANCING, name, dsu_pmu); + if (rc) { + dev_warn(&pdev->dev, "Failed to request IRQ %d\n", irq); + return rc; + } + + dsu_pmu->irq = irq; + platform_set_drvdata(pdev, dsu_pmu); + rc = cpuhp_state_add_instance(dsu_pmu_cpuhp_state, + &dsu_pmu->cpuhp_node); + if (rc) + return rc; + + dsu_pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .module = THIS_MODULE, + .pmu_enable = dsu_pmu_enable, + .pmu_disable = dsu_pmu_disable, + .event_init = dsu_pmu_event_init, + .add = dsu_pmu_add, + .del = dsu_pmu_del, + .start = dsu_pmu_start, + .stop = dsu_pmu_stop, + .read = dsu_pmu_read, + + .attr_groups = dsu_pmu_attr_groups, + }; + + rc = perf_pmu_register(&dsu_pmu->pmu, name, -1); + if (rc) { + cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, + &dsu_pmu->cpuhp_node); + irq_set_affinity_hint(dsu_pmu->irq, NULL); + } + + return rc; +} + +static int dsu_pmu_device_remove(struct platform_device *pdev) +{ + struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&dsu_pmu->pmu); + cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node); + irq_set_affinity_hint(dsu_pmu->irq, NULL); + + return 0; +} + +static const struct of_device_id dsu_pmu_of_match[] = { + { .compatible = "arm,dsu-pmu", }, + {}, +}; + +static struct platform_driver dsu_pmu_driver = { + .driver = { + .name = DRVNAME, + .of_match_table = of_match_ptr(dsu_pmu_of_match), + }, + .probe = dsu_pmu_device_probe, + .remove = dsu_pmu_device_remove, +}; + +static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, + cpuhp_node); + + if (!cpumask_test_cpu(cpu, &dsu_pmu->associated_cpus)) + return 0; + + /* If the PMU is already managed, there is nothing to do */ + if (!cpumask_empty(&dsu_pmu->active_cpu)) + return 0; + + dsu_pmu_init_pmu(dsu_pmu); + dsu_pmu_set_active_cpu(cpu, dsu_pmu); + + return 0; +} + +static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) +{ + int dst; + struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, + cpuhp_node); + + if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu)) + return 0; + + dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu); + /* If there are no active CPUs in the DSU, leave IRQ disabled */ + if (dst >= nr_cpu_ids) { + irq_set_affinity_hint(dsu_pmu->irq, NULL); + return 0; + } + + perf_pmu_migrate_context(&dsu_pmu->pmu, cpu, dst); + dsu_pmu_set_active_cpu(dst, dsu_pmu); + + return 0; +} + +static int __init dsu_pmu_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + DRVNAME, + dsu_pmu_cpu_online, + dsu_pmu_cpu_teardown); + if (ret < 0) + return ret; + dsu_pmu_cpuhp_state = ret; + return platform_driver_register(&dsu_pmu_driver); +} + +static void __exit dsu_pmu_exit(void) +{ + platform_driver_unregister(&dsu_pmu_driver); + cpuhp_remove_multi_state(dsu_pmu_cpuhp_state); +} + +module_init(dsu_pmu_init); +module_exit(dsu_pmu_exit); + +MODULE_DEVICE_TABLE(of, dsu_pmu_of_match); +MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit"); +MODULE_AUTHOR("Suzuki K Poulose "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-58-ga151 From 3b3b681097fae73b7f5dcdd42db6cfdf32943d4c Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Wed, 13 Dec 2017 18:13:56 +0800 Subject: arm64: v8.4: Support for new floating point multiplication instructions ARM v8.4 extensions add new neon instructions for performing a multiplication of each FP16 element of one vector with the corresponding FP16 element of a second vector, and to add or subtract this without an intermediate rounding to the corresponding FP32 element in a third vector. This patch detects this feature and let the userspace know about it via a HWCAP bit and MRS emulation. Cc: Dave Martin Reviewed-by: Suzuki K Poulose Signed-off-by: Dongjiu Geng Reviewed-by: Dave Martin Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.txt | 4 +++- Documentation/arm64/elf_hwcaps.txt | 4 ++++ arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/kernel/cpuinfo.c | 1 + 6 files changed, 12 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index bd9b3faab2c4..a70090b28b07 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -110,7 +110,9 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| - | RES0 | [63-48] | n | + | RES0 | [63-52] | n | + |--------------------------------------------------| + | FHM | [51-48] | y | |--------------------------------------------------| | DP | [47-44] | y | |--------------------------------------------------| diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index 89edba12a9e0..57324ee55ecc 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -158,3 +158,7 @@ HWCAP_SHA512 HWCAP_SVE Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001. + +HWCAP_ASIMDFHM + + Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ec144f480b39..ab637886a6f9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -419,6 +419,7 @@ #define SCTLR_EL1_CP15BEN (1 << 5) /* id_aa64isar0 */ +#define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 #define ID_AA64ISAR0_SM4_SHIFT 40 #define ID_AA64ISAR0_SM3_SHIFT 36 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index cda76fa8b9b2..f018c3deea3b 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -43,5 +43,6 @@ #define HWCAP_ASIMDDP (1 << 20) #define HWCAP_SHA512 (1 << 21) #define HWCAP_SVE (1 << 22) +#define HWCAP_ASIMDFHM (1 << 23) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9f0545dfe497..1f024049eb30 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -123,6 +123,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), @@ -1032,6 +1033,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 1e2554543506..7f94623df8a5 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -76,6 +76,7 @@ static const char *const hwcap_str[] = { "asimddp", "sha512", "sve", + "asimdfhm", NULL }; -- cgit v1.2.3-58-ga151 From 86f04f640058143388f56c048f86e66ea5204ae2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 8 Jan 2018 15:38:08 +0000 Subject: Docs: dt: add devicetree binding for describing arm64 SDEI firmware The Software Delegated Exception Interface (SDEI) is an ARM standard for registering callbacks from the platform firmware into the OS. This is typically used to implement RAS notifications, or from an IRQ that has been promoted to a firmware-assisted NMI. Add a new devicetree binding to describe the SDE firmware interface. Signed-off-by: James Morse Acked-by: Rob Herring Signed-off-by: Catalin Marinas --- .../devicetree/bindings/arm/firmware/sdei.txt | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/firmware/sdei.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/firmware/sdei.txt b/Documentation/devicetree/bindings/arm/firmware/sdei.txt new file mode 100644 index 000000000000..ee3f0ff49889 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/firmware/sdei.txt @@ -0,0 +1,42 @@ +* Software Delegated Exception Interface (SDEI) + +Firmware implementing the SDEI functions described in ARM document number +ARM DEN 0054A ("Software Delegated Exception Interface") can be used by +Linux to receive notification of events such as those generated by +firmware-first error handling, or from an IRQ that has been promoted to +a firmware-assisted NMI. + +The interface provides a number of API functions for registering callbacks +and enabling/disabling events. Functions are invoked by trapping to the +privilege level of the SDEI firmware (specified as part of the binding +below) and passing arguments in a manner specified by the "SMC Calling +Convention (ARM DEN 0028B): + + r0 => 32-bit Function ID / return value + {r1 - r3} => Parameters + +Note that the immediate field of the trapping instruction must be set +to #0. + +The SDEI_EVENT_REGISTER function registers a callback in the kernel +text to handle the specified event number. + +The sdei node should be a child node of '/firmware' and have required +properties: + + - compatible : should contain: + * "arm,sdei-1.0" : For implementations complying to SDEI version 1.x. + + - method : The method of calling the SDEI firmware. Permitted + values are: + * "smc" : SMC #0, with the register assignments specified in this + binding. + * "hvc" : HVC #0, with the register assignments specified in this + binding. +Example: + firmware { + sdei { + compatible = "arm,sdei-1.0"; + method = "smc"; + }; + }; -- cgit v1.2.3-58-ga151 From bb48711800e6d7aedbf4dfd3367e0ab1270a6bea Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 13 Dec 2017 14:19:37 -0800 Subject: arm64: cpu_errata: Add Kryo to Falkor 1003 errata The Kryo CPUs are also affected by the Falkor 1003 errata, so we need to do the same workaround on Kryo CPUs. The MIDR is slightly more complicated here, where the PART number is not always the same when looking at all the bits from 15 to 4. Drop the lower 8 bits and just look at the top 4 to see if it's '2' and then consider those as Kryo CPUs. This covers all the combinations without having to list them all out. Fixes: 38fd94b0275c ("arm64: Work around Falkor erratum 1003") Acked-by: Will Deacon Signed-off-by: Stephen Boyd Signed-off-by: Catalin Marinas --- Documentation/arm64/silicon-errata.txt | 2 +- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 304bf22bb83c..b9d93e981a05 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -72,6 +72,6 @@ stable kernels. | Hisilicon | Hip0{6,7} | #161010701 | N/A | | Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 | | | | | | -| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | +| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | | Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cce5735a677c..2f8d39ed9c2e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -94,6 +94,7 @@ #define BRCM_CPU_PART_VULCAN 0x516 #define QCOM_CPU_PART_FALKOR_V1 0x800 +#define QCOM_CPU_PART_KRYO 0x200 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -106,6 +107,7 @@ #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) +#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 70e5f1826fd9..90a9e465339c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -30,6 +30,20 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) entry->midr_range_max); } +static bool __maybe_unused +is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) +{ + u32 model; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + model = read_cpuid_id(); + model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) | + MIDR_ARCHITECTURE_MASK; + + return model == entry->midr_model; +} + static bool has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, int scope) @@ -290,6 +304,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(0, 0)), }, + { + .desc = "Qualcomm Technologies Kryo erratum 1003", + .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, + .def_scope = SCOPE_LOCAL_CPU, + .midr_model = MIDR_QCOM_KRYO, + .matches = is_kryo_midr, + }, #endif #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 { -- cgit v1.2.3-58-ga151