summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 17:06:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 17:06:19 -0700
commitc89d780cc195a63dcd9c3d2fc239308b3920a9a1 (patch)
tree21022c5d078e67aba9cca442d05727b898ba27ef /drivers
parentbbb3556c014dc8ed1645b725ad84477603553743 (diff)
parent4f3a6c4de7d932be94cde2c52ae58feeec9c9dbf (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: "The biggest part is the virtual CPU hotplug that touches ACPI, irqchip. We also have some GICv3 optimisation for pseudo-NMIs that has been queued via the arm64 tree. Otherwise the usual perf updates, kselftest, various small cleanups. Core: - Virtual CPU hotplug support for arm64 ACPI systems - cpufeature infrastructure cleanups and making the FEAT_ECBHB ID bits visible to guests - CPU errata: expand the speculative SSBS workaround to more CPUs - GICv3, use compile-time PMR values: optimise the way regular IRQs are masked/unmasked when GICv3 pseudo-NMIs are used, removing the need for a static key in fast paths by using a priority value chosen dynamically at boot time ACPI: - 'acpi=nospcr' option to disable SPCR as default console for arm64 - Move some ACPI code (cpuidle, FFH) to drivers/acpi/arm64/ Perf updates: - Rework of the IMX PMU driver to enable support for I.MX95 - Enable support for tertiary match groups in the CMN PMU driver - Initial refactoring of the CPU PMU code to prepare for the fixed instruction counter introduced by Arm v9.4 - Add missing PMU driver MODULE_DESCRIPTION() strings - Hook up DT compatibles for recent CPU PMUs Kselftest updates: - Kernel mode NEON fp-stress - Cleanups, spelling mistakes Miscellaneous: - arm64 Documentation update with a minor clarification on TBI - Fix missing IPI statistics - Implement raw_smp_processor_id() using thread_info rather than a per-CPU variable (better code generation) - Make MTE checking of in-kernel asynchronous tag faults conditional on KASAN being enabled - Minor cleanups, typos" * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (69 commits) selftests: arm64: tags: remove the result script selftests: arm64: tags_test: conform test to TAP output perf: add missing MODULE_DESCRIPTION() macros arm64: smp: Fix missing IPI statistics irqchip/gic-v3: Fix 'broken_rdists' unused warning when !SMP and !ACPI ACPI: Add acpi=nospcr to disable ACPI SPCR as default console on ARM64 Documentation: arm64: Update memory.rst for TBI arm64/cpufeature: Replace custom macros with fields from ID_AA64PFR0_EL1 KVM: arm64: Replace custom macros with fields from ID_AA64PFR0_EL1 perf: arm_pmuv3: Include asm/arm_pmuv3.h from linux/perf/arm_pmuv3.h perf: arm_v6/7_pmu: Drop non-DT probe support perf/arm: Move 32-bit PMU drivers to drivers/perf/ perf: arm_pmuv3: Drop unnecessary IS_ENABLED(CONFIG_ARM64) check perf: arm_pmuv3: Avoid assigning fixed cycle counter with threshold arm64: Kconfig: Fix dependencies to enable ACPI_HOTPLUG_CPU perf: imx_perf: add support for i.MX95 platform perf: imx_perf: fix counter start and config sequence perf: imx_perf: refactor driver for imx93 perf: imx_perf: let the driver manage the counter usage rather the user perf: imx_perf: add macro definitions for parsing config attr ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/acpi_processor.c141
-rw-r--r--drivers/acpi/arm64/Makefile6
-rw-r--r--drivers/acpi/arm64/amba.c6
-rw-r--r--drivers/acpi/arm64/cpuidle.c70
-rw-r--r--drivers/acpi/arm64/ffh.c107
-rw-r--r--drivers/acpi/processor_core.c3
-rw-r--r--drivers/acpi/processor_driver.c43
-rw-r--r--drivers/acpi/scan.c47
-rw-r--r--drivers/base/cpu.c12
-rw-r--r--drivers/irqchip/irq-gic-common.c22
-rw-r--r--drivers/irqchip/irq-gic-common.h7
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c11
-rw-r--r--drivers/irqchip/irq-gic-v3.c282
-rw-r--r--drivers/irqchip/irq-gic.c10
-rw-r--r--drivers/irqchip/irq-hip04.c6
-rw-r--r--drivers/perf/Kconfig12
-rw-r--r--drivers/perf/Makefile3
-rw-r--r--drivers/perf/arm-ccn.c1
-rw-r--r--drivers/perf/arm-cmn.c116
-rw-r--r--drivers/perf/arm_cspmu/ampere_cspmu.c1
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.c1
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.c1
-rw-r--r--drivers/perf/arm_pmuv3.c26
-rw-r--r--drivers/perf/arm_v6_pmu.c430
-rw-r--r--drivers/perf/arm_v7_pmu.c1994
-rw-r--r--drivers/perf/arm_xscale_pmu.c745
-rw-r--r--drivers/perf/cxl_pmu.c1
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c1
-rw-r--r--drivers/perf/fsl_imx9_ddr_perf.c352
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c1
-rw-r--r--drivers/perf/marvell_cn10k_ddr_pmu.c1
31 files changed, 4061 insertions, 398 deletions
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 7a0dd35d62c9..64e10bad8072 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -35,6 +35,17 @@ EXPORT_PER_CPU_SYMBOL(processors);
struct acpi_processor_errata errata __read_mostly;
EXPORT_SYMBOL_GPL(errata);
+acpi_handle acpi_get_processor_handle(int cpu)
+{
+ struct acpi_processor *pr;
+
+ pr = per_cpu(processors, cpu);
+ if (pr)
+ return pr->handle;
+
+ return NULL;
+}
+
static int acpi_processor_errata_piix4(struct pci_dev *dev)
{
u8 value1 = 0;
@@ -183,20 +194,44 @@ static void __init acpi_pcc_cpufreq_init(void) {}
#endif /* CONFIG_X86 */
/* Initialization */
+static DEFINE_PER_CPU(void *, processor_device_array);
+
+static int acpi_processor_set_per_cpu(struct acpi_processor *pr,
+ struct acpi_device *device)
+{
+ BUG_ON(pr->id >= nr_cpu_ids);
+
+ /*
+ * Buggy BIOS check.
+ * ACPI id of processors can be reported wrongly by the BIOS.
+ * Don't trust it blindly
+ */
+ if (per_cpu(processor_device_array, pr->id) != NULL &&
+ per_cpu(processor_device_array, pr->id) != device) {
+ dev_warn(&device->dev,
+ "BIOS reported wrong ACPI id %d for the processor\n",
+ pr->id);
+ return -EINVAL;
+ }
+ /*
+ * processor_device_array is not cleared on errors to allow buggy BIOS
+ * checks.
+ */
+ per_cpu(processor_device_array, pr->id) = device;
+ per_cpu(processors, pr->id) = pr;
+
+ return 0;
+}
+
#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static int acpi_processor_hotadd_init(struct acpi_processor *pr)
+static int acpi_processor_hotadd_init(struct acpi_processor *pr,
+ struct acpi_device *device)
{
- unsigned long long sta;
- acpi_status status;
int ret;
if (invalid_phys_cpuid(pr->phys_id))
return -ENODEV;
- status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
- if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_PRESENT))
- return -ENODEV;
-
cpu_maps_update_begin();
cpus_write_lock();
@@ -204,19 +239,26 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr)
if (ret)
goto out;
+ ret = acpi_processor_set_per_cpu(pr, device);
+ if (ret) {
+ acpi_unmap_cpu(pr->id);
+ goto out;
+ }
+
ret = arch_register_cpu(pr->id);
if (ret) {
+ /* Leave the processor device array in place to detect buggy bios */
+ per_cpu(processors, pr->id) = NULL;
acpi_unmap_cpu(pr->id);
goto out;
}
/*
- * CPU got hot-added, but cpu_data is not initialized yet. Set a flag
- * to delay cpu_idle/throttling initialization and do it when the CPU
- * gets online for the first time.
+ * CPU got hot-added, but cpu_data is not initialized yet. Do
+ * cpu_idle/throttling initialization when the CPU gets online for
+ * the first time.
*/
pr_info("CPU%d has been hot-added\n", pr->id);
- pr->flags.need_hotplug_init = 1;
out:
cpus_write_unlock();
@@ -224,7 +266,8 @@ out:
return ret;
}
#else
-static inline int acpi_processor_hotadd_init(struct acpi_processor *pr)
+static inline int acpi_processor_hotadd_init(struct acpi_processor *pr,
+ struct acpi_device *device)
{
return -ENODEV;
}
@@ -239,6 +282,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
acpi_status status = AE_OK;
static int cpu0_initialized;
unsigned long long value;
+ int ret;
acpi_processor_errata();
@@ -315,19 +359,19 @@ static int acpi_processor_get_info(struct acpi_device *device)
}
/*
- * Extra Processor objects may be enumerated on MP systems with
- * less than the max # of CPUs. They should be ignored _iff
- * they are physically not present.
- *
- * NOTE: Even if the processor has a cpuid, it may not be present
- * because cpuid <-> apicid mapping is persistent now.
+ * This code is not called unless we know the CPU is present and
+ * enabled. The two paths are:
+ * a) Initially present CPUs on architectures that do not defer
+ * their arch_register_cpu() calls until this point.
+ * b) Hotplugged CPUs (enabled bit in _STA has transitioned from not
+ * enabled to enabled)
*/
- if (invalid_logical_cpuid(pr->id) || !cpu_present(pr->id)) {
- int ret = acpi_processor_hotadd_init(pr);
-
- if (ret)
- return ret;
- }
+ if (!get_cpu_device(pr->id))
+ ret = acpi_processor_hotadd_init(pr, device);
+ else
+ ret = acpi_processor_set_per_cpu(pr, device);
+ if (ret)
+ return ret;
/*
* On some boxes several processors use the same processor bus id.
@@ -372,8 +416,6 @@ static int acpi_processor_get_info(struct acpi_device *device)
* (cpu_data(cpu)) values, like CPU feature flags, family, model, etc.
* Such things have to be put in and set up by the processor driver's .probe().
*/
-static DEFINE_PER_CPU(void *, processor_device_array);
-
static int acpi_processor_add(struct acpi_device *device,
const struct acpi_device_id *id)
{
@@ -400,39 +442,17 @@ static int acpi_processor_add(struct acpi_device *device,
result = acpi_processor_get_info(device);
if (result) /* Processor is not physically present or unavailable */
- return 0;
-
- BUG_ON(pr->id >= nr_cpu_ids);
-
- /*
- * Buggy BIOS check.
- * ACPI id of processors can be reported wrongly by the BIOS.
- * Don't trust it blindly
- */
- if (per_cpu(processor_device_array, pr->id) != NULL &&
- per_cpu(processor_device_array, pr->id) != device) {
- dev_warn(&device->dev,
- "BIOS reported wrong ACPI id %d for the processor\n",
- pr->id);
- /* Give up, but do not abort the namespace scan. */
- goto err;
- }
- /*
- * processor_device_array is not cleared on errors to allow buggy BIOS
- * checks.
- */
- per_cpu(processor_device_array, pr->id) = device;
- per_cpu(processors, pr->id) = pr;
+ goto err_clear_driver_data;
dev = get_cpu_device(pr->id);
if (!dev) {
result = -ENODEV;
- goto err;
+ goto err_clear_per_cpu;
}
result = acpi_bind_one(dev, device);
if (result)
- goto err;
+ goto err_clear_per_cpu;
pr->dev = dev;
@@ -443,10 +463,11 @@ static int acpi_processor_add(struct acpi_device *device,
dev_err(dev, "Processor driver could not be attached\n");
acpi_unbind_one(dev);
- err:
- free_cpumask_var(pr->throttling.shared_cpu_map);
- device->driver_data = NULL;
+ err_clear_per_cpu:
per_cpu(processors, pr->id) = NULL;
+ err_clear_driver_data:
+ device->driver_data = NULL;
+ free_cpumask_var(pr->throttling.shared_cpu_map);
err_free_pr:
kfree(pr);
return result;
@@ -454,7 +475,7 @@ static int acpi_processor_add(struct acpi_device *device,
#ifdef CONFIG_ACPI_HOTPLUG_CPU
/* Removal */
-static void acpi_processor_remove(struct acpi_device *device)
+static void acpi_processor_post_eject(struct acpi_device *device)
{
struct acpi_processor *pr;
@@ -476,10 +497,6 @@ static void acpi_processor_remove(struct acpi_device *device)
device_release_driver(pr->dev);
acpi_unbind_one(pr->dev);
- /* Clean up. */
- per_cpu(processor_device_array, pr->id) = NULL;
- per_cpu(processors, pr->id) = NULL;
-
cpu_maps_update_begin();
cpus_write_lock();
@@ -487,6 +504,10 @@ static void acpi_processor_remove(struct acpi_device *device)
arch_unregister_cpu(pr->id);
acpi_unmap_cpu(pr->id);
+ /* Clean up. */
+ per_cpu(processor_device_array, pr->id) = NULL;
+ per_cpu(processors, pr->id) = NULL;
+
cpus_write_unlock();
cpu_maps_update_done();
@@ -622,7 +643,7 @@ static struct acpi_scan_handler processor_handler = {
.ids = processor_device_ids,
.attach = acpi_processor_add,
#ifdef CONFIG_ACPI_HOTPLUG_CPU
- .detach = acpi_processor_remove,
+ .post_eject = acpi_processor_post_eject,
#endif
.hotplug = {
.enabled = true,
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 726944648c9b..05ecde9eaabe 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -1,8 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_ACPI_AGDI) += agdi.o
-obj-$(CONFIG_ACPI_IORT) += iort.o
-obj-$(CONFIG_ACPI_GTDT) += gtdt.o
obj-$(CONFIG_ACPI_APMT) += apmt.o
+obj-$(CONFIG_ACPI_FFH) += ffh.o
+obj-$(CONFIG_ACPI_GTDT) += gtdt.o
+obj-$(CONFIG_ACPI_IORT) += iort.o
+obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o
obj-$(CONFIG_ARM_AMBA) += amba.o
obj-y += dma.o init.o
obj-y += thermal_cpufreq.o
diff --git a/drivers/acpi/arm64/amba.c b/drivers/acpi/arm64/amba.c
index e1f0bbb8f393..1350083bce5f 100644
--- a/drivers/acpi/arm64/amba.c
+++ b/drivers/acpi/arm64/amba.c
@@ -27,11 +27,7 @@ static const struct acpi_device_id amba_id_list[] = {
static void amba_register_dummy_clk(void)
{
- static struct clk *amba_dummy_clk;
-
- /* If clock already registered */
- if (amba_dummy_clk)
- return;
+ struct clk *amba_dummy_clk;
amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 0);
clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
diff --git a/drivers/acpi/arm64/cpuidle.c b/drivers/acpi/arm64/cpuidle.c
new file mode 100644
index 000000000000..801f9c450142
--- /dev/null
+++ b/drivers/acpi/arm64/cpuidle.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARM64 CPU idle arch support
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/psci.h>
+#include <acpi/processor.h>
+
+#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags))
+
+static int psci_acpi_cpu_init_idle(unsigned int cpu)
+{
+ int i, count;
+ struct acpi_lpi_state *lpi;
+ struct acpi_processor *pr = per_cpu(processors, cpu);
+
+ if (unlikely(!pr || !pr->flags.has_lpi))
+ return -EINVAL;
+
+ /*
+ * If the PSCI cpu_suspend function hook has not been initialized
+ * idle states must not be enabled, so bail out
+ */
+ if (!psci_ops.cpu_suspend)
+ return -EOPNOTSUPP;
+
+ count = pr->power.count - 1;
+ if (count <= 0)
+ return -ENODEV;
+
+ for (i = 0; i < count; i++) {
+ u32 state;
+
+ lpi = &pr->power.lpi_states[i + 1];
+ /*
+ * Only bits[31:0] represent a PSCI power_state while
+ * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification
+ */
+ state = lpi->address;
+ if (!psci_power_state_is_valid(state)) {
+ pr_warn("Invalid PSCI power state %#x\n", state);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int acpi_processor_ffh_lpi_probe(unsigned int cpu)
+{
+ return psci_acpi_cpu_init_idle(cpu);
+}
+
+__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
+{
+ u32 state = lpi->address;
+
+ if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
+ return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter,
+ lpi->index, state);
+ else
+ return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter,
+ lpi->index, state);
+}
diff --git a/drivers/acpi/arm64/ffh.c b/drivers/acpi/arm64/ffh.c
new file mode 100644
index 000000000000..877edc6557e9
--- /dev/null
+++ b/drivers/acpi/arm64/ffh.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
+#include <linux/slab.h>
+
+/*
+ * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as
+ * specified in https://developer.arm.com/docs/den0048/latest
+ */
+struct acpi_ffh_data {
+ struct acpi_ffh_info info;
+ void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ unsigned long a4, unsigned long a5,
+ unsigned long a6, unsigned long a7,
+ struct arm_smccc_res *args,
+ struct arm_smccc_quirk *res);
+ void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args,
+ struct arm_smccc_1_2_regs *res);
+};
+
+int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt)
+{
+ enum arm_smccc_conduit conduit;
+ struct acpi_ffh_data *ffh_ctxt;
+
+ if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2)
+ return -EOPNOTSUPP;
+
+ conduit = arm_smccc_1_1_get_conduit();
+ if (conduit == SMCCC_CONDUIT_NONE) {
+ pr_err("%s: invalid SMCCC conduit\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL);
+ if (!ffh_ctxt)
+ return -ENOMEM;
+
+ if (conduit == SMCCC_CONDUIT_SMC) {
+ ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc;
+ ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc;
+ } else {
+ ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc;
+ ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc;
+ }
+
+ memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info));
+
+ *region_ctxt = ffh_ctxt;
+ return AE_OK;
+}
+
+static bool acpi_ffh_smccc_owner_allowed(u32 fid)
+{
+ int owner = ARM_SMCCC_OWNER_NUM(fid);
+
+ if (owner == ARM_SMCCC_OWNER_STANDARD ||
+ owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM)
+ return true;
+
+ return false;
+}
+
+int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context)
+{
+ int ret = 0;
+ struct acpi_ffh_data *ffh_ctxt = region_context;
+
+ if (ffh_ctxt->info.offset == 0) {
+ /* SMC/HVC 32bit call */
+ struct arm_smccc_res res;
+ u32 a[8] = { 0 }, *ptr = (u32 *)value;
+
+ if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) ||
+ !acpi_ffh_smccc_owner_allowed(*ptr) ||
+ ffh_ctxt->info.length > 32) {
+ ret = AE_ERROR;
+ } else {
+ int idx, len = ffh_ctxt->info.length >> 2;
+
+ for (idx = 0; idx < len; idx++)
+ a[idx] = *(ptr + idx);
+
+ ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4],
+ a[5], a[6], a[7], &res, NULL);
+ memcpy(value, &res, sizeof(res));
+ }
+
+ } else if (ffh_ctxt->info.offset == 1) {
+ /* SMC/HVC 64bit call */
+ struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value;
+
+ if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) ||
+ !acpi_ffh_smccc_owner_allowed(r->a0) ||
+ ffh_ctxt->info.length > sizeof(*r)) {
+ ret = AE_ERROR;
+ } else {
+ ffh_ctxt->invoke_ffh64_fn(r, r);
+ memcpy(value, r, ffh_ctxt->info.length);
+ }
+ } else {
+ ret = AE_ERROR;
+ }
+
+ return ret;
+}
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index b203cfe28550..b04b684f3190 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -90,7 +90,8 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
struct acpi_madt_generic_interrupt *gicc =
container_of(entry, struct acpi_madt_generic_interrupt, header);
- if (!acpi_gicc_is_usable(gicc))
+ if (!(gicc->flags &
+ (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE)))
return -ENODEV;
/* device_declaration means Device object in DSDT, in the
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 67db60eda370..cb52dd000b95 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -33,7 +33,6 @@ MODULE_AUTHOR("Paul Diefenbaugh");
MODULE_DESCRIPTION("ACPI Processor Driver");
MODULE_LICENSE("GPL");
-static int acpi_processor_start(struct device *dev);
static int acpi_processor_stop(struct device *dev);
static const struct acpi_device_id processor_device_ids[] = {
@@ -47,7 +46,6 @@ static struct device_driver acpi_processor_driver = {
.name = "processor",
.bus = &cpu_subsys,
.acpi_match_table = processor_device_ids,
- .probe = acpi_processor_start,
.remove = acpi_processor_stop,
};
@@ -115,12 +113,9 @@ static int acpi_soft_cpu_online(unsigned int cpu)
* CPU got physically hotplugged and onlined for the first time:
* Initialize missing things.
*/
- if (pr->flags.need_hotplug_init) {
+ if (!pr->flags.previously_online) {
int ret;
- pr_info("Will online and init hotplugged CPU: %d\n",
- pr->id);
- pr->flags.need_hotplug_init = 0;
ret = __acpi_processor_start(device);
WARN(ret, "Failed to start CPU: %d\n", pr->id);
} else {
@@ -167,9 +162,6 @@ static int __acpi_processor_start(struct acpi_device *device)
if (!pr)
return -ENODEV;
- if (pr->flags.need_hotplug_init)
- return 0;
-
result = acpi_cppc_processor_probe(pr);
if (result && !IS_ENABLED(CONFIG_ACPI_CPU_FREQ_PSS))
dev_dbg(&device->dev, "CPPC data invalid or not present\n");
@@ -185,32 +177,21 @@ static int __acpi_processor_start(struct acpi_device *device)
status = acpi_install_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
acpi_processor_notify, device);
- if (ACPI_SUCCESS(status))
- return 0;
+ if (!ACPI_SUCCESS(status)) {
+ result = -ENODEV;
+ goto err_thermal_exit;
+ }
+ pr->flags.previously_online = 1;
- result = -ENODEV;
- acpi_processor_thermal_exit(pr, device);
+ return 0;
+err_thermal_exit:
+ acpi_processor_thermal_exit(pr, device);
err_power_exit:
acpi_processor_power_exit(pr);
return result;
}
-static int acpi_processor_start(struct device *dev)
-{
- struct acpi_device *device = ACPI_COMPANION(dev);
- int ret;
-
- if (!device)
- return -ENODEV;
-
- /* Protect against concurrent CPU hotplug operations */
- cpu_hotplug_disable();
- ret = __acpi_processor_start(device);
- cpu_hotplug_enable();
- return ret;
-}
-
static int acpi_processor_stop(struct device *dev)
{
struct acpi_device *device = ACPI_COMPANION(dev);
@@ -279,9 +260,9 @@ static int __init acpi_processor_driver_init(void)
if (result < 0)
return result;
- result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "acpi/cpu-drv:online",
- acpi_soft_cpu_online, NULL);
+ result = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "acpi/cpu-drv:online",
+ acpi_soft_cpu_online, NULL);
if (result < 0)
goto err;
hp_online = result;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 503773707e01..0aa20623525a 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -243,13 +243,17 @@ static int acpi_scan_try_to_offline(struct acpi_device *device)
return 0;
}
-static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check)
+#define ACPI_SCAN_CHECK_FLAG_STATUS BIT(0)
+#define ACPI_SCAN_CHECK_FLAG_EJECT BIT(1)
+
+static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p)
{
struct acpi_scan_handler *handler = adev->handler;
+ uintptr_t flags = (uintptr_t)p;
- acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, check);
+ acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, p);
- if (check) {
+ if (flags & ACPI_SCAN_CHECK_FLAG_STATUS) {
acpi_bus_get_status(adev);
/*
* Skip devices that are still there and take the enabled
@@ -269,8 +273,6 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check)
if (handler) {
if (handler->detach)
handler->detach(adev);
-
- adev->handler = NULL;
} else {
device_release_driver(&adev->dev);
}
@@ -280,6 +282,28 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check)
*/
acpi_device_set_power(adev, ACPI_STATE_D3_COLD);
adev->flags.initialized = false;
+
+ /* For eject this is deferred to acpi_bus_post_eject() */
+ if (!(flags & ACPI_SCAN_CHECK_FLAG_EJECT)) {
+ adev->handler = NULL;
+ acpi_device_clear_enumerated(adev);
+ }
+ return 0;
+}
+
+static int acpi_bus_post_eject(struct acpi_device *adev, void *not_used)
+{
+ struct acpi_scan_handler *handler = adev->handler;
+
+ acpi_dev_for_each_child_reverse(adev, acpi_bus_post_eject, NULL);
+
+ if (handler) {
+ if (handler->post_eject)
+ handler->post_eject(adev);
+
+ adev->handler = NULL;
+ }
+
acpi_device_clear_enumerated(adev);
return 0;
@@ -287,7 +311,9 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check)
static void acpi_scan_check_subtree(struct acpi_device *adev)
{
- acpi_scan_check_and_detach(adev, (void *)true);
+ uintptr_t flags = ACPI_SCAN_CHECK_FLAG_STATUS;
+
+ acpi_scan_check_and_detach(adev, (void *)flags);
}
static int acpi_scan_hot_remove(struct acpi_device *device)
@@ -295,6 +321,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
acpi_handle handle = device->handle;
unsigned long long sta;
acpi_status status;
+ uintptr_t flags = ACPI_SCAN_CHECK_FLAG_EJECT;
if (device->handler && device->handler->hotplug.demand_offline) {
if (!acpi_scan_is_offline(device, true))
@@ -307,7 +334,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
acpi_handle_debug(handle, "Ejecting\n");
- acpi_bus_trim(device);
+ acpi_scan_check_and_detach(device, (void *)flags);
acpi_evaluate_lck(handle, 0);
/*
@@ -330,6 +357,8 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
} else if (sta & ACPI_STA_DEVICE_ENABLED) {
acpi_handle_warn(handle,
"Eject incomplete - status 0x%llx\n", sta);
+ } else {
+ acpi_bus_post_eject(device, NULL);
}
return 0;
@@ -2596,7 +2625,9 @@ EXPORT_SYMBOL(acpi_bus_scan);
*/
void acpi_bus_trim(struct acpi_device *adev)
{
- acpi_scan_check_and_detach(adev, NULL);
+ uintptr_t flags = 0;
+
+ acpi_scan_check_and_detach(adev, (void *)flags);
}
EXPORT_SYMBOL_GPL(acpi_bus_trim);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index c61ecb0c2ae2..b57326fd48d4 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -95,6 +95,7 @@ void unregister_cpu(struct cpu *cpu)
{
int logical_cpu = cpu->dev.id;
+ set_cpu_enabled(logical_cpu, false);
unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu));
device_unregister(&cpu->dev);
@@ -273,6 +274,13 @@ static ssize_t print_cpus_offline(struct device *dev,
}
static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL);
+static ssize_t print_cpus_enabled(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(cpu_enabled_mask));
+}
+static DEVICE_ATTR(enabled, 0444, print_cpus_enabled, NULL);
+
static ssize_t print_cpus_isolated(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -413,6 +421,7 @@ int register_cpu(struct cpu *cpu, int num)
register_cpu_under_node(num, cpu_to_node(num));
dev_pm_qos_expose_latency_limit(&cpu->dev,
PM_QOS_RESUME_LATENCY_NO_CONSTRAINT);
+ set_cpu_enabled(num, true);
return 0;
}
@@ -494,6 +503,7 @@ static struct attribute *cpu_root_attrs[] = {
&cpu_attrs[2].attr.attr,
&dev_attr_kernel_max.attr,
&dev_attr_offline.attr,
+ &dev_attr_enabled.attr,
&dev_attr_isolated.attr,
#ifdef CONFIG_NO_HZ_FULL
&dev_attr_nohz_full.attr,
@@ -558,7 +568,7 @@ static void __init cpu_dev_register_generic(void)
for_each_present_cpu(i) {
ret = arch_register_cpu(i);
- if (ret)
+ if (ret && ret != -EPROBE_DEFER)
pr_warn("register_cpu %d failed (%d)\n", i, ret);
}
}
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index afd6a1841715..c776f9142610 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -7,6 +7,7 @@
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/irqchip/arm-gic.h>
+#include <linux/kernel.h>
#include "irq-gic-common.h"
@@ -45,7 +46,7 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
}
int gic_configure_irq(unsigned int irq, unsigned int type,
- void __iomem *base, void (*sync_access)(void))
+ void __iomem *base)
{
u32 confmask = 0x2 << ((irq % 16) * 2);
u32 confoff = (irq / 16) * 4;
@@ -84,14 +85,10 @@ int gic_configure_irq(unsigned int irq, unsigned int type,
raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
- if (sync_access)
- sync_access();
-
return ret;
}
-void gic_dist_config(void __iomem *base, int gic_irqs,
- void (*sync_access)(void))
+void gic_dist_config(void __iomem *base, int gic_irqs, u8 priority)
{
unsigned int i;
@@ -106,7 +103,8 @@ void gic_dist_config(void __iomem *base, int gic_irqs,
* Set priority on all global interrupts.
*/
for (i = 32; i < gic_irqs; i += 4)
- writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i);
+ writel_relaxed(REPEAT_BYTE_U32(priority),
+ base + GIC_DIST_PRI + i);
/*
* Deactivate and disable all SPIs. Leave the PPI and SGIs
@@ -118,12 +116,9 @@ void gic_dist_config(void __iomem *base, int gic_irqs,
writel_relaxed(GICD_INT_EN_CLR_X32,
base + GIC_DIST_ENABLE_CLEAR + i / 8);
}
-
- if (sync_access)
- sync_access();
}
-void gic_cpu_config(void __iomem *base, int nr, void (*sync_access)(void))
+void gic_cpu_config(void __iomem *base, int nr, u8 priority)
{
int i;
@@ -142,9 +137,6 @@ void gic_cpu_config(void __iomem *base, int nr, void (*sync_access)(void))
* Set priority on PPI and SGI interrupts
*/
for (i = 0; i < nr; i += 4)
- writel_relaxed(GICD_INT_DEF_PRI_X4,
+ writel_relaxed(REPEAT_BYTE_U32(priority),
base + GIC_DIST_PRI + i * 4 / 4);
-
- if (sync_access)
- sync_access();
}
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index f407cce9ecaa..e8eab72ef195 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -20,10 +20,9 @@ struct gic_quirk {
};
int gic_configure_irq(unsigned int irq, unsigned int type,
- void __iomem *base, void (*sync_access)(void));
-void gic_dist_config(void __iomem *base, int gic_irqs,
- void (*sync_access)(void));
-void gic_cpu_config(void __iomem *base, int nr, void (*sync_access)(void));
+ void __iomem *base);
+void gic_dist_config(void __iomem *base, int gic_irqs, u8 priority);
+void gic_cpu_config(void __iomem *base, int nr, u8 priority);
void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void *data);
void gic_enable_of_quirks(const struct device_node *np,
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 3c755d5dad6e..42e63272154e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -59,7 +59,7 @@ static u32 lpi_id_bits;
#define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K)
#define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K)
-#define LPI_PROP_DEFAULT_PRIO GICD_INT_DEF_PRI
+static u8 __ro_after_init lpi_prop_prio;
/*
* Collection structure - just an ID, and a redistributor address to
@@ -1926,7 +1926,7 @@ static int its_vlpi_unmap(struct irq_data *d)
/* and restore the physical one */
irqd_clr_forwarded_to_vcpu(d);
its_send_mapti(its_dev, d->hwirq, event);
- lpi_update_config(d, 0xff, (LPI_PROP_DEFAULT_PRIO |
+ lpi_update_config(d, 0xff, (lpi_prop_prio |
LPI_PROP_ENABLED |
LPI_PROP_GROUP1));
@@ -2181,8 +2181,8 @@ static void its_lpi_free(unsigned long *bitmap, u32 base, u32 nr_ids)
static void gic_reset_prop_table(void *va)
{
- /* Priority 0xa0, Group-1, disabled */
- memset(va, LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1, LPI_PROPBASE_SZ);
+ /* Regular IRQ priority, Group-1, disabled */
+ memset(va, lpi_prop_prio | LPI_PROP_GROUP1, LPI_PROPBASE_SZ);
/* Make sure the GIC will observe the written configuration */
gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ);
@@ -5650,7 +5650,7 @@ int __init its_lpi_memreserve_init(void)
}
int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
- struct irq_domain *parent_domain)
+ struct irq_domain *parent_domain, u8 irq_prio)
{
struct device_node *of_node;
struct its_node *its;
@@ -5660,6 +5660,7 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
gic_rdists = rdists;
+ lpi_prop_prio = irq_prio;
its_parent = parent_domain;
of_node = to_of_node(handle);
if (of_node)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 6fb276504bcc..6393f3d780e9 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -12,6 +12,7 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irqdomain.h>
+#include <linux/kernel.h>
#include <linux/kstrtox.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -24,6 +25,7 @@
#include <linux/irqchip.h>
#include <linux/irqchip/arm-gic-common.h>
#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/irqchip/arm-gic-v3-prio.h>
#include <linux/irqchip/irq-partition-percpu.h>
#include <linux/bitfield.h>
#include <linux/bits.h>
@@ -36,7 +38,8 @@
#include "irq-gic-common.h"
-#define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80)
+static u8 dist_prio_irq __ro_after_init = GICV3_PRIO_IRQ;
+static u8 dist_prio_nmi __ro_after_init = GICV3_PRIO_NMI;
#define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0)
#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1)
@@ -44,6 +47,8 @@
#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1)
+static struct cpumask broken_rdists __read_mostly __maybe_unused;
+
struct redist_region {
void __iomem *redist_base;
phys_addr_t phys_base;
@@ -108,29 +113,96 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
*/
static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
-DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities);
-EXPORT_SYMBOL(gic_nonsecure_priorities);
+static u32 gic_get_pribits(void)
+{
+ u32 pribits;
-/*
- * When the Non-secure world has access to group 0 interrupts (as a
- * consequence of SCR_EL3.FIQ == 0), reading the ICC_RPR_EL1 register will
- * return the Distributor's view of the interrupt priority.
- *
- * When GIC security is enabled (GICD_CTLR.DS == 0), the interrupt priority
- * written by software is moved to the Non-secure range by the Distributor.
- *
- * If both are true (which is when gic_nonsecure_priorities gets enabled),
- * we need to shift down the priority programmed by software to match it
- * against the value returned by ICC_RPR_EL1.
- */
-#define GICD_INT_RPR_PRI(priority) \
- ({ \
- u32 __priority = (priority); \
- if (static_branch_unlikely(&gic_nonsecure_priorities)) \
- __priority = 0x80 | (__priority >> 1); \
- \
- __priority; \
- })
+ pribits = gic_read_ctlr();
+ pribits &= ICC_CTLR_EL1_PRI_BITS_MASK;
+ pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT;
+ pribits++;
+
+ return pribits;
+}
+
+static bool gic_has_group0(void)
+{
+ u32 val;
+ u32 old_pmr;
+
+ old_pmr = gic_read_pmr();
+
+ /*
+ * Let's find out if Group0 is under control of EL3 or not by
+ * setting the highest possible, non-zero priority in PMR.
+ *
+ * If SCR_EL3.FIQ is set, the priority gets shifted down in
+ * order for the CPU interface to set bit 7, and keep the
+ * actual priority in the non-secure range. In the process, it
+ * looses the least significant bit and the actual priority
+ * becomes 0x80. Reading it back returns 0, indicating that
+ * we're don't have access to Group0.
+ */
+ gic_write_pmr(BIT(8 - gic_get_pribits()));
+ val = gic_read_pmr();
+
+ gic_write_pmr(old_pmr);
+
+ return val != 0;
+}
+
+static inline bool gic_dist_security_disabled(void)
+{
+ return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS;
+}
+
+static bool cpus_have_security_disabled __ro_after_init;
+static bool cpus_have_group0 __ro_after_init;
+
+static void __init gic_prio_init(void)
+{
+ cpus_have_security_disabled = gic_dist_security_disabled();
+ cpus_have_group0 = gic_has_group0();
+
+ /*
+ * How priority values are used by the GIC depends on two things:
+ * the security state of the GIC (controlled by the GICD_CTRL.DS bit)
+ * and if Group 0 interrupts can be delivered to Linux in the non-secure
+ * world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the
+ * way priorities are presented in ICC_PMR_EL1 and in the distributor:
+ *
+ * GICD_CTRL.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Distributor
+ * -------------------------------------------------------
+ * 1 | - | unchanged | unchanged
+ * -------------------------------------------------------
+ * 0 | 1 | non-secure | non-secure
+ * -------------------------------------------------------
+ * 0 | 0 | unchanged | non-secure
+ *
+ * In the non-secure view reads and writes are modified:
+ *
+ * - A value written is right-shifted by one and the MSB is set,
+ * forcing the priority into the non-secure range.
+ *
+ * - A value read is left-shifted by one.
+ *
+ * In the first two cases, where ICC_PMR_EL1 and the interrupt priority
+ * are both either modified or unchanged, we can use the same set of
+ * priorities.
+ *
+ * In the last case, where only the interrupt priorities are modified to
+ * be in the non-secure range, we program the non-secure values into
+ * the distributor to match the PMR values we want.
+ */
+ if (cpus_have_group0 & !cpus_have_security_disabled) {
+ dist_prio_irq = __gicv3_prio_to_ns(dist_prio_irq);
+ dist_prio_nmi = __gicv3_prio_to_ns(dist_prio_nmi);
+ }
+
+ pr_info("GICD_CTRL.DS=%d, SCR_EL3.FIQ=%d\n",
+ cpus_have_security_disabled,
+ !cpus_have_group0);
+}
/* rdist_nmi_refs[n] == number of cpus having the rdist interrupt n set as NMI */
static refcount_t *rdist_nmi_refs;
@@ -556,7 +628,7 @@ static int gic_irq_nmi_setup(struct irq_data *d)
desc->handle_irq = handle_fasteoi_nmi;
}
- gic_irq_set_prio(d, GICD_INT_NMI_PRI);
+ gic_irq_set_prio(d, dist_prio_nmi);
return 0;
}
@@ -591,7 +663,7 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
desc->handle_irq = handle_fasteoi_irq;
}
- gic_irq_set_prio(d, GICD_INT_DEF_PRI);
+ gic_irq_set_prio(d, dist_prio_irq);
}
static bool gic_arm64_erratum_2941627_needed(struct irq_data *d)
@@ -670,7 +742,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
offset = convert_offset_index(d, GICD_ICFGR, &index);
- ret = gic_configure_irq(index, type, base + offset, NULL);
+ ret = gic_configure_irq(index, type, base + offset);
if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) {
/* Misconfigured PPIs are usually not fatal */
pr_warn("GIC: PPI INTID%ld is secure or misconfigured\n", irq);
@@ -753,7 +825,7 @@ static bool gic_rpr_is_nmi_prio(void)
if (!gic_supports_nmi())
return false;
- return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI));
+ return unlikely(gic_read_rpr() == GICV3_PRIO_NMI);
}
static bool gic_irqnr_is_special(u32 irqnr)
@@ -866,44 +938,6 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
__gic_handle_irq_from_irqson(regs);
}
-static u32 gic_get_pribits(void)
-{
- u32 pribits;
-
- pribits = gic_read_ctlr();
- pribits &= ICC_CTLR_EL1_PRI_BITS_MASK;
- pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT;
- pribits++;
-
- return pribits;
-}
-
-static bool gic_has_group0(void)
-{
- u32 val;
- u32 old_pmr;
-
- old_pmr = gic_read_pmr();
-
- /*
- * Let's find out if Group0 is under control of EL3 or not by
- * setting the highest possible, non-zero priority in PMR.
- *
- * If SCR_EL3.FIQ is set, the priority gets shifted down in
- * order for the CPU interface to set bit 7, and keep the
- * actual priority in the non-secure range. In the process, it
- * looses the least significant bit and the actual priority
- * becomes 0x80. Reading it back returns 0, indicating that
- * we're don't have access to Group0.
- */
- gic_write_pmr(BIT(8 - gic_get_pribits()));
- val = gic_read_pmr();
-
- gic_write_pmr(old_pmr);
-
- return val != 0;
-}
-
static void __init gic_dist_init(void)
{
unsigned int i;
@@ -937,10 +971,11 @@ static void __init gic_dist_init(void)
writel_relaxed(0, base + GICD_ICFGRnE + i / 4);
for (i = 0; i < GIC_ESPI_NR; i += 4)
- writel_relaxed(GICD_INT_DEF_PRI_X4, base + GICD_IPRIORITYRnE + i);
+ writel_relaxed(REPEAT_BYTE_U32(dist_prio_irq),
+ base + GICD_IPRIORITYRnE + i);
/* Now do the common stuff */
- gic_dist_config(base, GIC_LINE_NR, NULL);
+ gic_dist_config(base, GIC_LINE_NR, dist_prio_irq);
val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1;
if (gic_data.rdists.gicd_typer2 & GICD_TYPER2_nASSGIcap) {
@@ -1119,12 +1154,6 @@ static void gic_update_rdist_properties(void)
gic_data.rdists.has_vpend_valid_dirty ? "Valid+Dirty " : "");
}
-/* Check whether it's single security state view */
-static inline bool gic_dist_security_disabled(void)
-{
- return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS;
-}
-
static void gic_cpu_sys_reg_init(void)
{
int i, cpu = smp_processor_id();
@@ -1152,18 +1181,14 @@ static void gic_cpu_sys_reg_init(void)
write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1);
} else if (gic_supports_nmi()) {
/*
- * Mismatch configuration with boot CPU, the system is likely
- * to die as interrupt masking will not work properly on all
- * CPUs
+ * Check that all CPUs use the same priority space.
*
- * The boot CPU calls this function before enabling NMI support,
- * and as a result we'll never see this warning in the boot path
- * for that CPU.
+ * If there's a mismatch with the boot CPU, the system is
+ * likely to die as interrupt masking will not work properly on
+ * all CPUs.
*/
- if (static_branch_unlikely(&gic_nonsecure_priorities))
- WARN_ON(!group0 || gic_dist_security_disabled());
- else
- WARN_ON(group0 && !gic_dist_security_disabled());
+ WARN_ON(group0 != cpus_have_group0);
+ WARN_ON(gic_dist_security_disabled() != cpus_have_security_disabled);
}
/*
@@ -1282,7 +1307,8 @@ static void gic_cpu_init(void)
for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32)
writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8);
- gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, gic_redist_wait_for_rwp);
+ gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, dist_prio_irq);
+ gic_redist_wait_for_rwp();
/* initialise system registers */
gic_cpu_sys_reg_init();
@@ -1293,6 +1319,18 @@ static void gic_cpu_init(void)
#define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT)
#define MPIDR_TO_SGI_CLUSTER_ID(mpidr) ((mpidr) & ~0xFUL)
+/*
+ * gic_starting_cpu() is called after the last point where cpuhp is allowed
+ * to fail. So pre check for problems earlier.
+ */
+static int gic_check_rdist(unsigned int cpu)
+{
+ if (cpumask_test_cpu(cpu, &broken_rdists))
+ return -EINVAL;
+
+ return 0;
+}
+
static int gic_starting_cpu(unsigned int cpu)
{
gic_cpu_init();
@@ -1384,6 +1422,10 @@ static void __init gic_smp_init(void)
};
int base_sgi;
+ cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
+ "irqchip/arm/gicv3:checkrdist",
+ gic_check_rdist, NULL);
+
cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
"irqchip/arm/gicv3:starting",
gic_starting_cpu, NULL);
@@ -1948,36 +1990,6 @@ static void gic_enable_nmi_support(void)
pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n",
gic_has_relaxed_pmr_sync() ? "relaxed" : "forced");
- /*
- * How priority values are used by the GIC depends on two things:
- * the security state of the GIC (controlled by the GICD_CTRL.DS bit)
- * and if Group 0 interrupts can be delivered to Linux in the non-secure
- * world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the
- * ICC_PMR_EL1 register and the priority that software assigns to
- * interrupts:
- *
- * GICD_CTRL.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Group 1 priority
- * -----------------------------------------------------------
- * 1 | - | unchanged | unchanged
- * -----------------------------------------------------------
- * 0 | 1 | non-secure | non-secure
- * -----------------------------------------------------------
- * 0 | 0 | unchanged | non-secure
- *
- * where non-secure means that the value is right-shifted by one and the
- * MSB bit set, to make it fit in the non-secure priority range.
- *
- * In the first two cases, where ICC_PMR_EL1 and the interrupt priority
- * are both either modified or unchanged, we can use the same set of
- * priorities.
- *
- * In the last case, where only the interrupt priorities are modified to
- * be in the non-secure range, we use a different PMR value to mask IRQs
- * and the rest of the values that we use remain unchanged.
- */
- if (gic_has_group0() && !gic_dist_security_disabled())
- static_branch_enable(&gic_nonsecure_priorities);
-
static_branch_enable(&supports_pseudo_nmis);
if (static_branch_likely(&supports_deactivate_key))
@@ -2058,6 +2070,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gic_update_rdist_properties();
+ gic_prio_init();
gic_dist_init();
gic_cpu_init();
gic_enable_nmi_support();
@@ -2065,7 +2078,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gic_cpu_pm_init();
if (gic_dist_supports_lpis()) {
- its_init(handle, &gic_data.rdists, gic_data.domain);
+ its_init(handle, &gic_data.rdists, gic_data.domain, dist_prio_irq);
its_cpu_init();
its_lpi_memreserve_init();
} else {
@@ -2365,8 +2378,24 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
void __iomem *redist_base;
- if (!acpi_gicc_is_usable(gicc))
+ /* Neither enabled or online capable means it doesn't exist, skip it */
+ if (!(gicc->flags & (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE)))
+ return 0;
+
+ /*
+ * Capable but disabled CPUs can be brought online later. What about
+ * the redistributor? ACPI doesn't want to say!
+ * Virtual hotplug systems can use the MADT's "always-on" GICR entries.
+ * Otherwise, prevent such CPUs from being brought online.
+ */
+ if (!(gicc->flags & ACPI_MADT_ENABLED)) {
+ int cpu = get_cpu_for_acpi_id(gicc->uid);
+
+ pr_warn("CPU %u's redistributor is inaccessible: this CPU can't be brought online\n", cpu);
+ if (cpu >= 0)
+ cpumask_set_cpu(cpu, &broken_rdists);
return 0;
+ }
redist_base = ioremap(gicc->gicr_base_address, size);
if (!redist_base)
@@ -2413,21 +2442,15 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
/*
* If GICC is enabled and has valid gicr base address, then it means
- * GICR base is presented via GICC
+ * GICR base is presented via GICC. The redistributor is only known to
+ * be accessible if the GICC is marked as enabled. If this bit is not
+ * set, we'd need to add the redistributor at runtime, which isn't
+ * supported.
*/
- if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) {
+ if (gicc->flags & ACPI_MADT_ENABLED && gicc->gicr_base_address)
acpi_data.enabled_rdists++;
- return 0;
- }
- /*
- * It's perfectly valid firmware can pass disabled GICC entry, driver
- * should not treat as errors, skip the entry instead of probe fail.
- */
- if (!acpi_gicc_is_usable(gicc))
- return 0;
-
- return -ENODEV;
+ return 0;
}
static int __init gic_acpi_count_gicr_regions(void)
@@ -2483,7 +2506,8 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea
int maint_irq_mode;
static int first_madt = true;
- if (!acpi_gicc_is_usable(gicc))
+ if (!(gicc->flags &
+ (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE)))
return 0;
maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 98aa383e39db..3be7bd8cd8cd 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -303,7 +303,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
type != IRQ_TYPE_EDGE_RISING)
return -EINVAL;
- ret = gic_configure_irq(gicirq, type, base + GIC_DIST_CONFIG, NULL);
+ ret = gic_configure_irq(gicirq, type, base + GIC_DIST_CONFIG);
if (ret && gicirq < 32) {
/* Misconfigured PPIs are usually not fatal */
pr_warn("GIC: PPI%ld is secure or misconfigured\n", gicirq - 16);
@@ -479,7 +479,7 @@ static void gic_dist_init(struct gic_chip_data *gic)
for (i = 32; i < gic_irqs; i += 4)
writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
- gic_dist_config(base, gic_irqs, NULL);
+ gic_dist_config(base, gic_irqs, GICD_INT_DEF_PRI);
writel_relaxed(GICD_ENABLE, base + GIC_DIST_CTRL);
}
@@ -516,7 +516,7 @@ static int gic_cpu_init(struct gic_chip_data *gic)
gic_cpu_map[i] &= ~cpu_mask;
}
- gic_cpu_config(dist_base, 32, NULL);
+ gic_cpu_config(dist_base, 32, GICD_INT_DEF_PRI);
writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK);
gic_cpu_if_up(gic);
@@ -608,7 +608,7 @@ void gic_dist_restore(struct gic_chip_data *gic)
dist_base + GIC_DIST_CONFIG + i * 4);
for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
- writel_relaxed(GICD_INT_DEF_PRI_X4,
+ writel_relaxed(REPEAT_BYTE_U32(GICD_INT_DEF_PRI),
dist_base + GIC_DIST_PRI + i * 4);
for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
@@ -697,7 +697,7 @@ void gic_cpu_restore(struct gic_chip_data *gic)
writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4);
for (i = 0; i < DIV_ROUND_UP(32, 4); i++)
- writel_relaxed(GICD_INT_DEF_PRI_X4,
+ writel_relaxed(REPEAT_BYTE_U32(GICD_INT_DEF_PRI),
dist_base + GIC_DIST_PRI + i * 4);
writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK);
diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c
index 46161f6ff289..31c3f70a5d5e 100644
--- a/drivers/irqchip/irq-hip04.c
+++ b/drivers/irqchip/irq-hip04.c
@@ -130,7 +130,7 @@ static int hip04_irq_set_type(struct irq_data *d, unsigned int type)
raw_spin_lock(&irq_controller_lock);
- ret = gic_configure_irq(irq, type, base + GIC_DIST_CONFIG, NULL);
+ ret = gic_configure_irq(irq, type, base + GIC_DIST_CONFIG);
if (ret && irq < 32) {
/* Misconfigured PPIs are usually not fatal */
pr_warn("GIC: PPI%d is secure or misconfigured\n", irq - 16);
@@ -260,7 +260,7 @@ static void __init hip04_irq_dist_init(struct hip04_irq_data *intc)
for (i = 32; i < nr_irqs; i += 2)
writel_relaxed(cpumask, base + GIC_DIST_TARGET + ((i * 2) & ~3));
- gic_dist_config(base, nr_irqs, NULL);
+ gic_dist_config(base, nr_irqs, GICD_INT_DEF_PRI);
writel_relaxed(1, base + GIC_DIST_CTRL);
}
@@ -287,7 +287,7 @@ static void hip04_irq_cpu_init(struct hip04_irq_data *intc)
if (i != cpu)
hip04_cpu_map[i] &= ~cpu_mask;
- gic_cpu_config(dist_base, 32, NULL);
+ gic_cpu_config(dist_base, 32, GICD_INT_DEF_PRI);
writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
writel_relaxed(1, base + GIC_CPU_CTRL);
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 7526a9e714fa..aa9530b4064f 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -56,6 +56,18 @@ config ARM_PMU
Say y if you want to use CPU performance monitors on ARM-based
systems.
+config ARM_V6_PMU
+ depends on ARM_PMU && (CPU_V6 || CPU_V6K)
+ def_bool y
+
+config ARM_V7_PMU
+ depends on ARM_PMU && CPU_V7
+ def_bool y
+
+config ARM_XSCALE_PMU
+ depends on ARM_PMU && CPU_XSCALE
+ def_bool y
+
config RISCV_PMU
depends on RISCV
bool "RISC-V PMU framework"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 29b1c28203ef..d43df81d52f7 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -6,6 +6,9 @@ obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o
+obj-$(CONFIG_ARM_V6_PMU) += arm_v6_pmu.o
+obj-$(CONFIG_ARM_V7_PMU) += arm_v7_pmu.o
+obj-$(CONFIG_ARM_XSCALE_PMU) += arm_xscale_pmu.o
obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 86ef31ac7503..5c66b9278862 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1561,4 +1561,5 @@ module_init(arm_ccn_init);
module_exit(arm_ccn_exit);
MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
+MODULE_DESCRIPTION("ARM CCN (Cache Coherent Network) Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index e26ad1d3ed0b..c932d9d355cf 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -174,9 +174,8 @@
#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(30, 27)
#define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48)
#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51)
-/* Note that we don't yet support the tertiary match group on newer IPs */
-#define CMN_CONFIG_WP_GRP BIT_ULL(56)
-#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57)
+#define CMN_CONFIG_WP_GRP GENMASK_ULL(57, 56)
+#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(58)
#define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0)
#define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0)
@@ -590,6 +589,13 @@ struct arm_cmn_hw_event {
s8 dtc_idx[CMN_MAX_DTCS];
u8 num_dns;
u8 dtm_offset;
+
+ /*
+ * WP config registers are divided to UP and DOWN events. We need to
+ * keep to track only one of them.
+ */
+ DECLARE_BITMAP(wp_idx, CMN_MAX_XPS);
+
bool wide_sel;
enum cmn_filter_select filter_sel;
};
@@ -617,6 +623,17 @@ static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos)
return (x[pos / 32] >> ((pos % 32) * 2)) & 3;
}
+static void arm_cmn_set_wp_idx(unsigned long *wp_idx, unsigned int pos, bool val)
+{
+ if (val)
+ set_bit(pos, wp_idx);
+}
+
+static unsigned int arm_cmn_get_wp_idx(unsigned long *wp_idx, unsigned int pos)
+{
+ return test_bit(pos, wp_idx);
+}
+
struct arm_cmn_event_attr {
struct device_attribute attr;
enum cmn_model model;
@@ -1336,12 +1353,37 @@ static const struct attribute_group *arm_cmn_attr_groups[] = {
NULL
};
-static int arm_cmn_wp_idx(struct perf_event *event)
+static int arm_cmn_find_free_wp_idx(struct arm_cmn_dtm *dtm,
+ struct perf_event *event)
+{
+ int wp_idx = CMN_EVENT_EVENTID(event);
+
+ if (dtm->wp_event[wp_idx] >= 0)
+ if (dtm->wp_event[++wp_idx] >= 0)
+ return -ENOSPC;
+
+ return wp_idx;
+}
+
+static int arm_cmn_get_assigned_wp_idx(struct perf_event *event,
+ struct arm_cmn_hw_event *hw,
+ unsigned int pos)
+{
+ return CMN_EVENT_EVENTID(event) + arm_cmn_get_wp_idx(hw->wp_idx, pos);
+}
+
+static void arm_cmn_claim_wp_idx(struct arm_cmn_dtm *dtm,
+ struct perf_event *event,
+ unsigned int dtc, int wp_idx,
+ unsigned int pos)
{
- return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
+ struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+
+ dtm->wp_event[wp_idx] = hw->dtc_idx[dtc];
+ arm_cmn_set_wp_idx(hw->wp_idx, pos, wp_idx - CMN_EVENT_EVENTID(event));
}
-static u32 arm_cmn_wp_config(struct perf_event *event)
+static u32 arm_cmn_wp_config(struct perf_event *event, int wp_idx)
{
u32 config;
u32 dev = CMN_EVENT_WP_DEV_SEL(event);
@@ -1351,6 +1393,10 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
u32 combine = CMN_EVENT_WP_COMBINE(event);
bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600;
+ /* CMN-600 supports only primary and secondary matching groups */
+ if (is_cmn600)
+ grp &= 1;
+
config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
@@ -1358,7 +1404,9 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
if (exc)
config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
- if (combine && !grp)
+
+ /* wp_combine is available only on WP0 and WP2 */
+ if (combine && !(wp_idx & 0x1))
config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE :
CMN_DTM_WPn_CONFIG_WP_COMBINE;
return config;
@@ -1520,12 +1568,12 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
cmn->dtc[i].cc_active = true;
} else if (type == CMN_TYPE_WP) {
- int wp_idx = arm_cmn_wp_idx(event);
u64 val = CMN_EVENT_WP_VAL(event);
u64 mask = CMN_EVENT_WP_MASK(event);
for_each_hw_dn(hw, dn, i) {
void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
writeq_relaxed(val, base + CMN_DTM_WPn_VAL(wp_idx));
writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx));
@@ -1550,10 +1598,9 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
i = hw->dtc_idx[0];
cmn->dtc[i].cc_active = false;
} else if (type == CMN_TYPE_WP) {
- int wp_idx = arm_cmn_wp_idx(event);
-
for_each_hw_dn(hw, dn, i) {
void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
writeq_relaxed(0, base + CMN_DTM_WPn_MASK(wp_idx));
writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx));
@@ -1571,10 +1618,23 @@ struct arm_cmn_val {
u8 dtm_count[CMN_MAX_DTMS];
u8 occupid[CMN_MAX_DTMS][SEL_MAX];
u8 wp[CMN_MAX_DTMS][4];
+ u8 wp_combine[CMN_MAX_DTMS][2];
int dtc_count[CMN_MAX_DTCS];
bool cycles;
};
+static int arm_cmn_val_find_free_wp_config(struct perf_event *event,
+ struct arm_cmn_val *val, int dtm)
+{
+ int wp_idx = CMN_EVENT_EVENTID(event);
+
+ if (val->wp[dtm][wp_idx])
+ if (val->wp[dtm][++wp_idx])
+ return -ENOSPC;
+
+ return wp_idx;
+}
+
static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
struct perf_event *event)
{
@@ -1606,8 +1666,9 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
if (type != CMN_TYPE_WP)
continue;
- wp_idx = arm_cmn_wp_idx(event);
- val->wp[dtm][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1;
+ wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm);
+ val->wp[dtm][wp_idx] = 1;
+ val->wp_combine[dtm][wp_idx >> 1] += !!CMN_EVENT_WP_COMBINE(event);
}
}
@@ -1631,6 +1692,7 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
return -ENOMEM;
arm_cmn_val_add_event(cmn, val, leader);
+
for_each_sibling_event(sibling, leader)
arm_cmn_val_add_event(cmn, val, sibling);
@@ -1645,7 +1707,7 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
goto done;
for_each_hw_dn(hw, dn, i) {
- int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
+ int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS)
goto done;
@@ -1657,12 +1719,12 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
if (type != CMN_TYPE_WP)
continue;
- wp_idx = arm_cmn_wp_idx(event);
- if (val->wp[dtm][wp_idx])
+ wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm);
+ if (wp_idx < 0)
goto done;
- wp_cmb = val->wp[dtm][wp_idx ^ 1];
- if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1)
+ if (wp_idx & 1 &&
+ val->wp_combine[dtm][wp_idx >> 1] != !!CMN_EVENT_WP_COMBINE(event))
goto done;
}
@@ -1773,8 +1835,11 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset;
unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
- if (type == CMN_TYPE_WP)
- dtm->wp_event[arm_cmn_wp_idx(event)] = -1;
+ if (type == CMN_TYPE_WP) {
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
+
+ dtm->wp_event[wp_idx] = -1;
+ }
if (hw->filter_sel > SEL_NONE)
hw->dn[i].occupid[hw->filter_sel].count--;
@@ -1783,6 +1848,7 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
}
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
+ memset(hw->wp_idx, 0, sizeof(hw->wp_idx));
for_each_hw_dtc_idx(hw, j, idx)
cmn->dtc[j].counters[idx] = NULL;
@@ -1836,19 +1902,23 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
if (type == CMN_TYPE_XP) {
input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
} else if (type == CMN_TYPE_WP) {
- int tmp, wp_idx = arm_cmn_wp_idx(event);
- u32 cfg = arm_cmn_wp_config(event);
+ int tmp, wp_idx;
+ u32 cfg;
- if (dtm->wp_event[wp_idx] >= 0)
+ wp_idx = arm_cmn_find_free_wp_idx(dtm, event);
+ if (wp_idx < 0)
goto free_dtms;
+ cfg = arm_cmn_wp_config(event, wp_idx);
+
tmp = dtm->wp_event[wp_idx ^ 1];
if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
goto free_dtms;
input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
- dtm->wp_event[wp_idx] = hw->dtc_idx[d];
+
+ arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
} else {
struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c
index f146a455e838..f72f5689923c 100644
--- a/drivers/perf/arm_cspmu/ampere_cspmu.c
+++ b/drivers/perf/arm_cspmu/ampere_cspmu.c
@@ -269,4 +269,5 @@ static void __exit ampere_cspmu_exit(void)
module_init(ampere_cspmu_init);
module_exit(ampere_cspmu_exit);
+MODULE_DESCRIPTION("Ampere SoC Performance Monitor Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index c318dc909767..2158a5975c90 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -1427,4 +1427,5 @@ EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
module_init(arm_cspmu_init);
module_exit(arm_cspmu_exit);
+MODULE_DESCRIPTION("ARM CoreSight Architecture Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index 5b84b701ad62..d0ef611240aa 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -417,4 +417,5 @@ static void __exit nvidia_cspmu_exit(void)
module_init(nvidia_cspmu_init);
module_exit(nvidia_cspmu_exit);
+MODULE_DESCRIPTION("NVIDIA Coresight Architecture Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 23fa6c5da82c..cf0430c266a6 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -25,8 +25,6 @@
#include <linux/smp.h>
#include <linux/nmi.h>
-#include <asm/arm_pmuv3.h>
-
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
@@ -338,6 +336,11 @@ static bool armv8pmu_event_want_user_access(struct perf_event *event)
return ATTR_CFG_GET_FLD(&event->attr, rdpmc);
}
+static u32 armv8pmu_event_get_threshold(struct perf_event_attr *attr)
+{
+ return ATTR_CFG_GET_FLD(attr, threshold);
+}
+
static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
{
u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare);
@@ -941,7 +944,8 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
/* Always prefer to place a cycle counter into the cycle counter. */
- if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
+ if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) &&
+ !armv8pmu_event_get_threshold(&event->attr)) {
if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
return ARMV8_IDX_CYCLE_COUNTER;
else if (armv8pmu_event_is_64bit(event) &&
@@ -1033,13 +1037,13 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
* If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will
* be 0 and will also trigger this check, preventing it from being used.
*/
- th = ATTR_CFG_GET_FLD(attr, threshold);
+ th = armv8pmu_event_get_threshold(attr);
if (th > threshold_max(cpu_pmu)) {
pr_debug("PMU event threshold exceeds max value\n");
return -EINVAL;
}
- if (IS_ENABLED(CONFIG_ARM64) && th) {
+ if (th) {
config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
armv8pmu_event_threshold_control(attr));
@@ -1340,14 +1344,20 @@ PMUV3_INIT_SIMPLE(armv9_cortex_a520)
PMUV3_INIT_SIMPLE(armv9_cortex_a710)
PMUV3_INIT_SIMPLE(armv9_cortex_a715)
PMUV3_INIT_SIMPLE(armv9_cortex_a720)
+PMUV3_INIT_SIMPLE(armv9_cortex_a725)
PMUV3_INIT_SIMPLE(armv8_cortex_x1)
PMUV3_INIT_SIMPLE(armv9_cortex_x2)
PMUV3_INIT_SIMPLE(armv9_cortex_x3)
PMUV3_INIT_SIMPLE(armv9_cortex_x4)
+PMUV3_INIT_SIMPLE(armv9_cortex_x925)
PMUV3_INIT_SIMPLE(armv8_neoverse_e1)
PMUV3_INIT_SIMPLE(armv8_neoverse_n1)
PMUV3_INIT_SIMPLE(armv9_neoverse_n2)
+PMUV3_INIT_SIMPLE(armv9_neoverse_n3)
PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v2)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v3)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae)
PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
@@ -1379,14 +1389,20 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init},
{.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init},
{.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init},
+ {.compatible = "arm,cortex-a725-pmu", .data = armv9_cortex_a725_pmu_init},
{.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init},
{.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init},
{.compatible = "arm,cortex-x3-pmu", .data = armv9_cortex_x3_pmu_init},
{.compatible = "arm,cortex-x4-pmu", .data = armv9_cortex_x4_pmu_init},
+ {.compatible = "arm,cortex-x925-pmu", .data = armv9_cortex_x925_pmu_init},
{.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init},
{.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init},
{.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init},
+ {.compatible = "arm,neoverse-n3-pmu", .data = armv9_neoverse_n3_pmu_init},
{.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init},
+ {.compatible = "arm,neoverse-v2-pmu", .data = armv8_neoverse_v2_pmu_init},
+ {.compatible = "arm,neoverse-v3-pmu", .data = armv8_neoverse_v3_pmu_init},
+ {.compatible = "arm,neoverse-v3ae-pmu", .data = armv8_neoverse_v3ae_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init},
{.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init},
{.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init},
diff --git a/drivers/perf/arm_v6_pmu.c b/drivers/perf/arm_v6_pmu.c
new file mode 100644
index 000000000000..0bb685b4bac5
--- /dev/null
+++ b/drivers/perf/arm_v6_pmu.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv6 Performance counter handling code.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ *
+ * ARMv6 has 2 configurable performance counters and a single cycle counter.
+ * They all share a single reset bit but can be written to zero so we can use
+ * that for a reset.
+ *
+ * The counters can't be individually enabled or disabled so when we remove
+ * one event and replace it with another we could get spurious counts from the
+ * wrong event. However, we can take advantage of the fact that the
+ * performance counters can export events to the event bus, and the event bus
+ * itself can be monitored. This requires that we *don't* export the events to
+ * the event bus. The procedure for disabling a configurable counter is:
+ * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
+ * effectively stops the counter from counting.
+ * - disable the counter's interrupt generation (each counter has it's
+ * own interrupt enable bit).
+ * Once stopped, the counter value can be written as 0 to reset.
+ *
+ * To enable a counter:
+ * - enable the counter's interrupt generation.
+ * - set the new event type.
+ *
+ * Note: the dedicated cycle counter only counts cycles and can't be
+ * enabled/disabled independently of the others. When we want to disable the
+ * cycle counter, we have to just disable the interrupt reporting and start
+ * ignoring that counter. When re-enabling, we have to reset the value and
+ * enable the interrupt.
+ */
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+enum armv6_perf_types {
+ ARMV6_PERFCTR_ICACHE_MISS = 0x0,
+ ARMV6_PERFCTR_IBUF_STALL = 0x1,
+ ARMV6_PERFCTR_DDEP_STALL = 0x2,
+ ARMV6_PERFCTR_ITLB_MISS = 0x3,
+ ARMV6_PERFCTR_DTLB_MISS = 0x4,
+ ARMV6_PERFCTR_BR_EXEC = 0x5,
+ ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
+ ARMV6_PERFCTR_INSTR_EXEC = 0x7,
+ ARMV6_PERFCTR_DCACHE_HIT = 0x9,
+ ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
+ ARMV6_PERFCTR_DCACHE_MISS = 0xB,
+ ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
+ ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
+ ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
+ ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
+ ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
+ ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
+ ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
+ ARMV6_PERFCTR_NOP = 0x20,
+};
+
+enum armv6_counters {
+ ARMV6_CYCLE_COUNTER = 0,
+ ARMV6_COUNTER0,
+ ARMV6_COUNTER1,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
+};
+
+static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
+
+ /*
+ * The ARM performance counters can count micro DTLB misses, micro ITLB
+ * misses and main TLB misses. There isn't an event for TLB misses, so
+ * use the micro misses here and if users want the main TLB misses they
+ * can use a raw counter.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+};
+
+static inline unsigned long
+armv6_pmcr_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
+ return val;
+}
+
+static inline void
+armv6_pmcr_write(unsigned long val)
+{
+ asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
+}
+
+#define ARMV6_PMCR_ENABLE (1 << 0)
+#define ARMV6_PMCR_CTR01_RESET (1 << 1)
+#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
+#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
+#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
+#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
+#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
+#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
+#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
+#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
+#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
+#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
+#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
+#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
+
+#define ARMV6_PMCR_OVERFLOWED_MASK \
+ (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
+ ARMV6_PMCR_CCOUNT_OVERFLOW)
+
+static inline int
+armv6_pmcr_has_overflowed(unsigned long pmcr)
+{
+ return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
+}
+
+static inline int
+armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
+ enum armv6_counters counter)
+{
+ int ret = 0;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
+ else if (ARMV6_COUNTER0 == counter)
+ ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
+ else if (ARMV6_COUNTER1 == counter)
+ ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+ return ret;
+}
+
+static inline u64 armv6pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ unsigned long value = 0;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
+ else if (ARMV6_COUNTER0 == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
+ else if (ARMV6_COUNTER1 == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+ return value;
+}
+
+static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
+ else if (ARMV6_COUNTER0 == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
+ else if (ARMV6_COUNTER1 == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+}
+
+static void armv6pmu_enable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == idx) {
+ mask = 0;
+ evt = ARMV6_PMCR_CCOUNT_IEN;
+ } else if (ARMV6_COUNTER0 == idx) {
+ mask = ARMV6_PMCR_EVT_COUNT0_MASK;
+ evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
+ ARMV6_PMCR_COUNT0_IEN;
+ } else if (ARMV6_COUNTER1 == idx) {
+ mask = ARMV6_PMCR_EVT_COUNT1_MASK;
+ evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
+ ARMV6_PMCR_COUNT1_IEN;
+ } else {
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ /*
+ * Mask out the current event and set the counter to count the event
+ * that we're interested in.
+ */
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+}
+
+static irqreturn_t
+armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmcr = armv6_pmcr_read();
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ if (!armv6_pmcr_has_overflowed(pmcr))
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ /*
+ * The interrupts are cleared by writing the overflow flags back to
+ * the control register. All of the other bits don't have any effect
+ * if they are rewritten, so write the whole value back.
+ */
+ armv6_pmcr_write(pmcr);
+
+ for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+static void armv6pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = armv6_pmcr_read();
+ val |= ARMV6_PMCR_ENABLE;
+ armv6_pmcr_write(val);
+}
+
+static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = armv6_pmcr_read();
+ val &= ~ARMV6_PMCR_ENABLE;
+ armv6_pmcr_write(val);
+}
+
+static int
+armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ /* Always place a cycle counter into the cycle counter. */
+ if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
+ if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return ARMV6_CYCLE_COUNTER;
+ } else {
+ /*
+ * For anything other than a cycle counter, try and use
+ * counter0 and counter1.
+ */
+ if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
+ return ARMV6_COUNTER1;
+
+ if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
+ return ARMV6_COUNTER0;
+
+ /* The counters are all in use. */
+ return -EAGAIN;
+ }
+}
+
+static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void armv6pmu_disable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == idx) {
+ mask = ARMV6_PMCR_CCOUNT_IEN;
+ evt = 0;
+ } else if (ARMV6_COUNTER0 == idx) {
+ mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
+ evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
+ } else if (ARMV6_COUNTER1 == idx) {
+ mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
+ evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
+ } else {
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ /*
+ * Mask out the current event and set the counter to count the number
+ * of ETM bus signal assertion cycles. The external reporting should
+ * be disabled and so this should never increment.
+ */
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+}
+
+static int armv6_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv6_perf_map,
+ &armv6_perf_cache_map, 0xFF);
+}
+
+static void armv6pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = armv6pmu_handle_irq;
+ cpu_pmu->enable = armv6pmu_enable_event;
+ cpu_pmu->disable = armv6pmu_disable_event;
+ cpu_pmu->read_counter = armv6pmu_read_counter;
+ cpu_pmu->write_counter = armv6pmu_write_counter;
+ cpu_pmu->get_event_idx = armv6pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx;
+ cpu_pmu->start = armv6pmu_start;
+ cpu_pmu->stop = armv6pmu_stop;
+ cpu_pmu->map_event = armv6_map_event;
+ cpu_pmu->num_events = 3;
+}
+
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1136";
+ return 0;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1176";
+ return 0;
+}
+
+static const struct of_device_id armv6_pmu_of_device_ids[] = {
+ {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
+ {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
+ { /* sentinel value */ }
+};
+
+static int armv6_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver armv6_pmu_driver = {
+ .driver = {
+ .name = "armv6-pmu",
+ .of_match_table = armv6_pmu_of_device_ids,
+ },
+ .probe = armv6_pmu_device_probe,
+};
+
+builtin_platform_driver(armv6_pmu_driver);
diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c
new file mode 100644
index 000000000000..928ac3d626ed
--- /dev/null
+++ b/drivers/perf/arm_v7_pmu.c
@@ -0,0 +1,1994 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
+ *
+ * ARMv7 support: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ *
+ * Copied from ARMv6 code, with the low level code inspired
+ * by the ARMv7 Oprofile code.
+ *
+ * Cortex-A8 has up to 4 configurable performance counters and
+ * a single cycle counter.
+ * Cortex-A9 has up to 31 configurable performance counters and
+ * a single cycle counter.
+ *
+ * All counters can be enabled/disabled and IRQ masked separately. The cycle
+ * counter and all 4 performance counters together can be reset separately.
+ */
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/vfp.h>
+#include "../vfp/vfpinstr.h"
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+/*
+ * Common ARMv7 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ */
+#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00
+#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01
+#define ARMV7_PERFCTR_ITLB_REFILL 0x02
+#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03
+#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04
+#define ARMV7_PERFCTR_DTLB_REFILL 0x05
+#define ARMV7_PERFCTR_MEM_READ 0x06
+#define ARMV7_PERFCTR_MEM_WRITE 0x07
+#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08
+#define ARMV7_PERFCTR_EXC_TAKEN 0x09
+#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A
+#define ARMV7_PERFCTR_CID_WRITE 0x0B
+
+/*
+ * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
+ * It counts:
+ * - all (taken) branch instructions,
+ * - instructions that explicitly write the PC,
+ * - exception generating instructions.
+ */
+#define ARMV7_PERFCTR_PC_WRITE 0x0C
+#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D
+#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E
+#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F
+#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10
+#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11
+#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12
+
+/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */
+#define ARMV7_PERFCTR_MEM_ACCESS 0x13
+#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14
+#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15
+#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16
+#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17
+#define ARMV7_PERFCTR_L2_CACHE_WB 0x18
+#define ARMV7_PERFCTR_BUS_ACCESS 0x19
+#define ARMV7_PERFCTR_MEM_ERROR 0x1A
+#define ARMV7_PERFCTR_INSTR_SPEC 0x1B
+#define ARMV7_PERFCTR_TTBR_WRITE 0x1C
+#define ARMV7_PERFCTR_BUS_CYCLES 0x1D
+
+#define ARMV7_PERFCTR_CPU_CYCLES 0xFF
+
+/* ARMv7 Cortex-A8 specific event types */
+#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43
+#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44
+#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50
+#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56
+
+/* ARMv7 Cortex-A9 specific event types */
+#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68
+#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60
+#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66
+
+/* ARMv7 Cortex-A5 specific event types */
+#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2
+#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3
+
+/* ARMv7 Cortex-A15 specific event types */
+#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40
+#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41
+#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42
+#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43
+
+#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C
+#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D
+
+#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50
+#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51
+#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52
+#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53
+
+#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76
+
+/* ARMv7 Cortex-A12 specific event types */
+#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40
+#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41
+
+#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50
+#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51
+
+#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76
+
+#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7
+
+/* ARMv7 Krait specific event types */
+#define KRAIT_PMRESR0_GROUP0 0xcc
+#define KRAIT_PMRESR1_GROUP0 0xd0
+#define KRAIT_PMRESR2_GROUP0 0xd4
+#define KRAIT_VPMRESR0_GROUP0 0xd8
+
+#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011
+#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010
+
+#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222
+#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210
+
+/* ARMv7 Scorpion specific event types */
+#define SCORPION_LPM0_GROUP0 0x4c
+#define SCORPION_LPM1_GROUP0 0x50
+#define SCORPION_LPM2_GROUP0 0x54
+#define SCORPION_L2LPM_GROUP0 0x58
+#define SCORPION_VLPM_GROUP0 0x5c
+
+#define SCORPION_ICACHE_ACCESS 0x10053
+#define SCORPION_ICACHE_MISS 0x10052
+
+#define SCORPION_DTLB_ACCESS 0x12013
+#define SCORPION_DTLB_MISS 0x12012
+
+#define SCORPION_ITLB_MISS 0x12021
+
+/*
+ * Cortex-A8 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE,
+};
+
+static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A9 HW events mapping
+ */
+static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH,
+};
+
+static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A5 HW events mapping
+ */
+static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+ /*
+ * The prefetch counters don't differentiate between the I side and the
+ * D side.
+ */
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A15 HW events mapping
+ */
+static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A7 HW events mapping
+ */
+static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A12 HW events mapping
+ */
+static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Krait HW events mapping
+ */
+static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Scorpion HW events mapping
+ */
+static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
+ /*
+ * Only ITLB misses and DTLB refills are supported. If users want the
+ * DTLB refills misses a raw counter must be used.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *armv7_pmu_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = armv7_pmu_format_attrs,
+};
+
+#define ARMV7_EVENT_ATTR_RESOLVE(m) #m
+#define ARMV7_EVENT_ATTR(name, config) \
+ PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \
+ "event=" ARMV7_EVENT_ATTR_RESOLVE(config))
+
+ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR);
+ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL);
+ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL);
+ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL);
+ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS);
+ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL);
+ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ);
+ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE);
+ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED);
+ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN);
+ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED);
+ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE);
+ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE);
+ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH);
+ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN);
+ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS);
+ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED);
+ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES);
+ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED);
+
+static struct attribute *armv7_pmuv1_event_attrs[] = {
+ &armv7_event_attr_sw_incr.attr.attr,
+ &armv7_event_attr_l1i_cache_refill.attr.attr,
+ &armv7_event_attr_l1i_tlb_refill.attr.attr,
+ &armv7_event_attr_l1d_cache_refill.attr.attr,
+ &armv7_event_attr_l1d_cache.attr.attr,
+ &armv7_event_attr_l1d_tlb_refill.attr.attr,
+ &armv7_event_attr_ld_retired.attr.attr,
+ &armv7_event_attr_st_retired.attr.attr,
+ &armv7_event_attr_inst_retired.attr.attr,
+ &armv7_event_attr_exc_taken.attr.attr,
+ &armv7_event_attr_exc_return.attr.attr,
+ &armv7_event_attr_cid_write_retired.attr.attr,
+ &armv7_event_attr_pc_write_retired.attr.attr,
+ &armv7_event_attr_br_immed_retired.attr.attr,
+ &armv7_event_attr_br_return_retired.attr.attr,
+ &armv7_event_attr_unaligned_ldst_retired.attr.attr,
+ &armv7_event_attr_br_mis_pred.attr.attr,
+ &armv7_event_attr_cpu_cycles.attr.attr,
+ &armv7_event_attr_br_pred.attr.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmuv1_events_attr_group = {
+ .name = "events",
+ .attrs = armv7_pmuv1_event_attrs,
+};
+
+ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS);
+ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS);
+ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB);
+ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS);
+ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL);
+ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB);
+ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS);
+ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR);
+ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC);
+ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE);
+ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES);
+
+static struct attribute *armv7_pmuv2_event_attrs[] = {
+ &armv7_event_attr_sw_incr.attr.attr,
+ &armv7_event_attr_l1i_cache_refill.attr.attr,
+ &armv7_event_attr_l1i_tlb_refill.attr.attr,
+ &armv7_event_attr_l1d_cache_refill.attr.attr,
+ &armv7_event_attr_l1d_cache.attr.attr,
+ &armv7_event_attr_l1d_tlb_refill.attr.attr,
+ &armv7_event_attr_ld_retired.attr.attr,
+ &armv7_event_attr_st_retired.attr.attr,
+ &armv7_event_attr_inst_retired.attr.attr,
+ &armv7_event_attr_exc_taken.attr.attr,
+ &armv7_event_attr_exc_return.attr.attr,
+ &armv7_event_attr_cid_write_retired.attr.attr,
+ &armv7_event_attr_pc_write_retired.attr.attr,
+ &armv7_event_attr_br_immed_retired.attr.attr,
+ &armv7_event_attr_br_return_retired.attr.attr,
+ &armv7_event_attr_unaligned_ldst_retired.attr.attr,
+ &armv7_event_attr_br_mis_pred.attr.attr,
+ &armv7_event_attr_cpu_cycles.attr.attr,
+ &armv7_event_attr_br_pred.attr.attr,
+ &armv7_event_attr_mem_access.attr.attr,
+ &armv7_event_attr_l1i_cache.attr.attr,
+ &armv7_event_attr_l1d_cache_wb.attr.attr,
+ &armv7_event_attr_l2d_cache.attr.attr,
+ &armv7_event_attr_l2d_cache_refill.attr.attr,
+ &armv7_event_attr_l2d_cache_wb.attr.attr,
+ &armv7_event_attr_bus_access.attr.attr,
+ &armv7_event_attr_memory_error.attr.attr,
+ &armv7_event_attr_inst_spec.attr.attr,
+ &armv7_event_attr_ttbr_write_retired.attr.attr,
+ &armv7_event_attr_bus_cycles.attr.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmuv2_events_attr_group = {
+ .name = "events",
+ .attrs = armv7_pmuv2_event_attrs,
+};
+
+/*
+ * Perf Events' indices
+ */
+#define ARMV7_IDX_CYCLE_COUNTER 0
+#define ARMV7_IDX_COUNTER0 1
+#define ARMV7_IDX_COUNTER_LAST(cpu_pmu) \
+ (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+
+#define ARMV7_MAX_COUNTERS 32
+#define ARMV7_COUNTER_MASK (ARMV7_MAX_COUNTERS - 1)
+
+/*
+ * ARMv7 low level PMNC access
+ */
+
+/*
+ * Perf Event to low level counters mapping
+ */
+#define ARMV7_IDX_TO_COUNTER(x) \
+ (((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK)
+
+/*
+ * Per-CPU PMNC: config reg
+ */
+#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
+#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
+#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
+#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
+#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
+#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
+#define ARMV7_PMNC_N_MASK 0x1f
+#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
+
+/*
+ * FLAG: counters overflow flag status reg
+ */
+#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
+#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */
+#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */
+
+/*
+ * Event filters for PMUv2
+ */
+#define ARMV7_EXCLUDE_PL1 BIT(31)
+#define ARMV7_EXCLUDE_USER BIT(30)
+#define ARMV7_INCLUDE_HYP BIT(27)
+
+/*
+ * Secure debug enable reg
+ */
+#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */
+
+static inline u32 armv7_pmnc_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
+ return val;
+}
+
+static inline void armv7_pmnc_write(u32 val)
+{
+ val &= ARMV7_PMNC_MASK;
+ isb();
+ asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+}
+
+static inline int armv7_pmnc_has_overflowed(u32 pmnc)
+{
+ return pmnc & ARMV7_OVERFLOWED_MASK;
+}
+
+static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx)
+{
+ return idx >= ARMV7_IDX_CYCLE_COUNTER &&
+ idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu);
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
+{
+ return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
+}
+
+static inline void armv7_pmnc_select_counter(int idx)
+{
+ u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+ asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
+ isb();
+}
+
+static inline u64 armv7pmu_read_counter(struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u32 value = 0;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u reading wrong counter %d\n",
+ smp_processor_id(), idx);
+ } else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+ } else {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
+ }
+
+ return value;
+}
+
+static inline void armv7pmu_write_counter(struct perf_event *event, u64 value)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u writing wrong counter %d\n",
+ smp_processor_id(), idx);
+ } else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
+ asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value));
+ } else {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value));
+ }
+}
+
+static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
+{
+ armv7_pmnc_select_counter(idx);
+ val &= ARMV7_EVTYPE_MASK;
+ asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+}
+
+static inline void armv7_pmnc_enable_counter(int idx)
+{
+ u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
+}
+
+static inline void armv7_pmnc_disable_counter(int idx)
+{
+ u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+ asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
+}
+
+static inline void armv7_pmnc_enable_intens(int idx)
+{
+ u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+ asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
+}
+
+static inline void armv7_pmnc_disable_intens(int idx)
+{
+ u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+ asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
+ isb();
+ /* Clear the overflow flag in case an interrupt is pending. */
+ asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter)));
+ isb();
+}
+
+static inline u32 armv7_pmnc_getreset_flags(void)
+{
+ u32 val;
+
+ /* Read */
+ asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+
+ /* Write to clear flags */
+ val &= ARMV7_FLAG_MASK;
+ asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+
+ return val;
+}
+
+#ifdef DEBUG
+static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
+{
+ u32 val;
+ unsigned int cnt;
+
+ pr_info("PMNC registers dump:\n");
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+ pr_info("PMNC =0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
+ pr_info("CNTENS=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
+ pr_info("INTENS=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+ pr_info("FLAGS =0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
+ pr_info("SELECT=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+ pr_info("CCNT =0x%08x\n", val);
+
+ for (cnt = ARMV7_IDX_COUNTER0;
+ cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+ armv7_pmnc_select_counter(cnt);
+ asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+ pr_info("CNT[%d] count =0x%08x\n",
+ ARMV7_IDX_TO_COUNTER(cnt), val);
+ asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
+ pr_info("CNT[%d] evtsel=0x%08x\n",
+ ARMV7_IDX_TO_COUNTER(cnt), val);
+ }
+}
+#endif
+
+static void armv7pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+ smp_processor_id(), idx);
+ return;
+ }
+
+ /*
+ * Enable counter and interrupt, and set the counter to count
+ * the event that we're interested in.
+ */
+
+ /*
+ * Disable counter
+ */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We only need to set the event for the cycle counter if we
+ * have the ability to perform event filtering.
+ */
+ if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ /*
+ * Enable interrupt for this counter
+ */
+ armv7_pmnc_enable_intens(idx);
+
+ /*
+ * Enable counter
+ */
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void armv7pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+ smp_processor_id(), idx);
+ return;
+ }
+
+ /*
+ * Disable counter and interrupt
+ */
+
+ /*
+ * Disable counter
+ */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Disable interrupt for this counter
+ */
+ armv7_pmnc_disable_intens(idx);
+}
+
+static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ u32 pmnc;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /*
+ * Get and reset the IRQ flags
+ */
+ pmnc = armv7_pmnc_getreset_flags();
+
+ /*
+ * Did an overflow occur?
+ */
+ if (!armv7_pmnc_has_overflowed(pmnc))
+ return IRQ_NONE;
+
+ /*
+ * Handle the counter(s) overflow(s)
+ */
+ regs = get_irq_regs();
+
+ for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+static void armv7pmu_start(struct arm_pmu *cpu_pmu)
+{
+ /* Enable all counters */
+ armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
+}
+
+static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ /* Disable all counters */
+ armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
+}
+
+static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT;
+
+ /* Always place a cycle counter into the cycle counter. */
+ if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
+ if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return ARMV7_IDX_CYCLE_COUNTER;
+ }
+
+ /*
+ * For anything other than a cycle counter, try and use
+ * the events counters
+ */
+ for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ }
+
+ /* The counters are all in use. */
+ return -EAGAIN;
+}
+
+static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv7pmu_set_event_filter(struct hw_perf_event *event,
+ struct perf_event_attr *attr)
+{
+ unsigned long config_base = 0;
+
+ if (attr->exclude_idle) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
+ if (attr->exclude_user)
+ config_base |= ARMV7_EXCLUDE_USER;
+ if (attr->exclude_kernel)
+ config_base |= ARMV7_EXCLUDE_PL1;
+ if (!attr->exclude_hv)
+ config_base |= ARMV7_INCLUDE_HYP;
+
+ /*
+ * Install the filter into config_base as this is used to
+ * construct the event type.
+ */
+ event->config_base = config_base;
+
+ return 0;
+}
+
+static void armv7pmu_reset(void *info)
+{
+ struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
+ u32 idx, nb_cnt = cpu_pmu->num_events, val;
+
+ if (cpu_pmu->secure_access) {
+ asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
+ val |= ARMV7_SDER_SUNIDEN;
+ asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val));
+ }
+
+ /* The counter and interrupt enable registers are unknown at reset. */
+ for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+ armv7_pmnc_disable_counter(idx);
+ armv7_pmnc_disable_intens(idx);
+ }
+
+ /* Initialize & Reset PMNC: C and P bits */
+ armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
+}
+
+static int armv7_a8_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a8_perf_map,
+ &armv7_a8_perf_cache_map, 0xFF);
+}
+
+static int armv7_a9_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a9_perf_map,
+ &armv7_a9_perf_cache_map, 0xFF);
+}
+
+static int armv7_a5_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a5_perf_map,
+ &armv7_a5_perf_cache_map, 0xFF);
+}
+
+static int armv7_a15_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a15_perf_map,
+ &armv7_a15_perf_cache_map, 0xFF);
+}
+
+static int armv7_a7_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a7_perf_map,
+ &armv7_a7_perf_cache_map, 0xFF);
+}
+
+static int armv7_a12_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a12_perf_map,
+ &armv7_a12_perf_cache_map, 0xFF);
+}
+
+static int krait_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &krait_perf_map,
+ &krait_perf_cache_map, 0xFFFFF);
+}
+
+static int krait_map_event_no_branch(struct perf_event *event)
+{
+ return armpmu_map_event(event, &krait_perf_map_no_branch,
+ &krait_perf_cache_map, 0xFFFFF);
+}
+
+static int scorpion_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &scorpion_perf_map,
+ &scorpion_perf_cache_map, 0xFFFFF);
+}
+
+static void armv7pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = armv7pmu_handle_irq;
+ cpu_pmu->enable = armv7pmu_enable_event;
+ cpu_pmu->disable = armv7pmu_disable_event;
+ cpu_pmu->read_counter = armv7pmu_read_counter;
+ cpu_pmu->write_counter = armv7pmu_write_counter;
+ cpu_pmu->get_event_idx = armv7pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx;
+ cpu_pmu->start = armv7pmu_start;
+ cpu_pmu->stop = armv7pmu_stop;
+ cpu_pmu->reset = armv7pmu_reset;
+};
+
+static void armv7_read_num_pmnc_events(void *info)
+{
+ int *nb_cnt = info;
+
+ /* Read the nb of CNTx counters supported from PMNC */
+ *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+
+ /* Add the CPU cycles counter */
+ *nb_cnt += 1;
+}
+
+static int armv7_probe_num_events(struct arm_pmu *arm_pmu)
+{
+ return smp_call_function_any(&arm_pmu->supported_cpus,
+ armv7_read_num_pmnc_events,
+ &arm_pmu->num_events, 1);
+}
+
+static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a8";
+ cpu_pmu->map_event = armv7_a8_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a9";
+ cpu_pmu->map_event = armv7_a9_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a5";
+ cpu_pmu->map_event = armv7_a5_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a15";
+ cpu_pmu->map_event = armv7_a15_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a7";
+ cpu_pmu->map_event = armv7_a7_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a12";
+ cpu_pmu->map_event = armv7_a12_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ int ret = armv7_a12_pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a17";
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return ret;
+}
+
+/*
+ * Krait Performance Monitor Region Event Selection Register (PMRESRn)
+ *
+ * 31 30 24 16 8 0
+ * +--------------------------------+
+ * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0
+ * +--------------------------------+
+ * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1
+ * +--------------------------------+
+ * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2
+ * +--------------------------------+
+ * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ?
+ * +--------------------------------+
+ * EN | G=3 | G=2 | G=1 | G=0
+ *
+ * Event Encoding:
+ *
+ * hwc->config_base = 0xNRCCG
+ *
+ * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR)
+ * R = region register
+ * CC = class of events the group G is choosing from
+ * G = group or particular event
+ *
+ * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2
+ *
+ * A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ * unit, etc.) while the event code (CC) corresponds to a particular class of
+ * events (interrupts for example). An event code is broken down into
+ * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ * example).
+ */
+
+#define KRAIT_EVENT (1 << 16)
+#define VENUM_EVENT (2 << 16)
+#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT)
+#define PMRESRn_EN BIT(31)
+
+#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */
+#define EVENT_GROUP(event) ((event) & 0xf) /* G */
+#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */
+#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */
+#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */
+
+static u32 krait_read_pmresrn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val));
+ break;
+ case 1:
+ asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val));
+ break;
+ case 2:
+ asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+ }
+
+ return val;
+}
+
+static void krait_write_pmresrn(int n, u32 val)
+{
+ switch (n) {
+ case 0:
+ asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val));
+ break;
+ case 1:
+ asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val));
+ break;
+ case 2:
+ asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+ }
+}
+
+static u32 venum_read_pmresr(void)
+{
+ u32 val;
+ asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
+ return val;
+}
+
+static void venum_write_pmresr(u32 val)
+{
+ asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
+}
+
+static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val)
+{
+ u32 venum_new_val;
+ u32 fp_new_val;
+
+ BUG_ON(preemptible());
+ /* CPACR Enable CP10 and CP11 access */
+ *venum_orig_val = get_copro_access();
+ venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11);
+ set_copro_access(venum_new_val);
+
+ /* Enable FPEXC */
+ *fp_orig_val = fmrx(FPEXC);
+ fp_new_val = *fp_orig_val | FPEXC_EN;
+ fmxr(FPEXC, fp_new_val);
+}
+
+static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val)
+{
+ BUG_ON(preemptible());
+ /* Restore FPEXC */
+ fmxr(FPEXC, fp_orig_val);
+ isb();
+ /* Restore CPACR */
+ set_copro_access(venum_orig_val);
+}
+
+static u32 krait_get_pmresrn_event(unsigned int region)
+{
+ static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0,
+ KRAIT_PMRESR1_GROUP0,
+ KRAIT_PMRESR2_GROUP0 };
+ return pmresrn_table[region];
+}
+
+static void krait_evt_setup(int idx, u32 config_base)
+{
+ u32 val;
+ u32 mask;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ unsigned int code = EVENT_CODE(config_base);
+ unsigned int group_shift;
+ bool venum_event = EVENT_VENUM(config_base);
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+
+ /* Configure evtsel for the region and group */
+ if (venum_event)
+ val = KRAIT_VPMRESR0_GROUP0;
+ else
+ val = krait_get_pmresrn_event(region);
+ val += group;
+ /* Mix in mode-exclusion bits */
+ val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+ armv7_pmnc_write_evtsel(idx, val);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = krait_read_pmresrn(region);
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ krait_write_pmresrn(region, val);
+ }
+}
+
+static u32 clear_pmresrn_group(u32 val, int group)
+{
+ u32 mask;
+ int group_shift;
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+ val &= ~mask;
+
+ /* Don't clear enable bit if entire region isn't disabled */
+ if (val & ~PMRESRn_EN)
+ return val |= PMRESRn_EN;
+
+ return 0;
+}
+
+static void krait_clearpmu(u32 config_base)
+{
+ u32 val;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ bool venum_event = EVENT_VENUM(config_base);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val = clear_pmresrn_group(val, group);
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = krait_read_pmresrn(region);
+ val = clear_pmresrn_group(val, group);
+ krait_write_pmresrn(region, val);
+ }
+}
+
+static void krait_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /* Disable counter and interrupt */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Clear pmresr code (if destined for PMNx counters)
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ krait_clearpmu(hwc->config_base);
+
+ /* Disable interrupt for this counter */
+ armv7_pmnc_disable_intens(idx);
+}
+
+static void krait_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /*
+ * Enable counter and interrupt, and set the counter to count
+ * the event that we're interested in.
+ */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We set the event for the cycle counter because we
+ * have the ability to perform event filtering.
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ krait_evt_setup(idx, hwc->config_base);
+ else
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ /* Enable interrupt for this counter */
+ armv7_pmnc_enable_intens(idx);
+
+ /* Enable counter */
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void krait_pmu_reset(void *info)
+{
+ u32 vval, fval;
+ struct arm_pmu *cpu_pmu = info;
+ u32 idx, nb_cnt = cpu_pmu->num_events;
+
+ armv7pmu_reset(info);
+
+ /* Clear all pmresrs */
+ krait_write_pmresrn(0, 0);
+ krait_write_pmresrn(1, 0);
+ krait_write_pmresrn(2, 0);
+
+ venum_pre_pmresr(&vval, &fval);
+ venum_write_pmresr(0);
+ venum_post_pmresr(vval, fval);
+
+ /* Reset PMxEVNCTCR to sane default */
+ for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+ }
+
+}
+
+static int krait_event_to_bit(struct perf_event *event, unsigned int region,
+ unsigned int group)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+ if (hwc->config_base & VENUM_EVENT)
+ bit = KRAIT_VPMRESR0_GROUP0;
+ else
+ bit = krait_get_pmresrn_event(region);
+ bit -= krait_get_pmresrn_event(0);
+ bit += group;
+ /*
+ * Lower bits are reserved for use by the counters (see
+ * armv7pmu_get_event_idx() for more info)
+ */
+ bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
+
+ return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ int bit = -1;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int code = EVENT_CODE(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool krait_event = EVENT_CPU(hwc->config_base);
+
+ if (venum_event || krait_event) {
+ /* Ignore invalid events */
+ if (group > 3 || region > 2)
+ return -EINVAL;
+ if (venum_event && (code & 0xe0))
+ return -EINVAL;
+
+ bit = krait_event_to_bit(event, region, group);
+ if (test_and_set_bit(bit, cpuc->used_mask))
+ return -EAGAIN;
+ }
+
+ idx = armv7pmu_get_event_idx(cpuc, event);
+ if (idx < 0 && bit >= 0)
+ clear_bit(bit, cpuc->used_mask);
+
+ return idx;
+}
+
+static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool krait_event = EVENT_CPU(hwc->config_base);
+
+ armv7pmu_clear_event_idx(cpuc, event);
+ if (venum_event || krait_event) {
+ bit = krait_event_to_bit(event, region, group);
+ clear_bit(bit, cpuc->used_mask);
+ }
+}
+
+static int krait_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_krait";
+ /* Some early versions of Krait don't support PC write events */
+ if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node,
+ "qcom,no-pc-write"))
+ cpu_pmu->map_event = krait_map_event_no_branch;
+ else
+ cpu_pmu->map_event = krait_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->reset = krait_pmu_reset;
+ cpu_pmu->enable = krait_pmu_enable_event;
+ cpu_pmu->disable = krait_pmu_disable_event;
+ cpu_pmu->get_event_idx = krait_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+/*
+ * Scorpion Local Performance Monitor Register (LPMn)
+ *
+ * 31 30 24 16 8 0
+ * +--------------------------------+
+ * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0
+ * +--------------------------------+
+ * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1
+ * +--------------------------------+
+ * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2
+ * +--------------------------------+
+ * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3
+ * +--------------------------------+
+ * VLPM | EN | CC | CC | CC | CC | N = 2, R = ?
+ * +--------------------------------+
+ * EN | G=3 | G=2 | G=1 | G=0
+ *
+ *
+ * Event Encoding:
+ *
+ * hwc->config_base = 0xNRCCG
+ *
+ * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
+ * R = region register
+ * CC = class of events the group G is choosing from
+ * G = group or particular event
+ *
+ * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
+ *
+ * A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ * unit, etc.) while the event code (CC) corresponds to a particular class of
+ * events (interrupts for example). An event code is broken down into
+ * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ * example).
+ */
+
+static u32 scorpion_read_pmresrn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 1:
+ asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 2:
+ asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 3:
+ asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+ }
+
+ return val;
+}
+
+static void scorpion_write_pmresrn(int n, u32 val)
+{
+ switch (n) {
+ case 0:
+ asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 1:
+ asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 2:
+ asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 3:
+ asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+ }
+}
+
+static u32 scorpion_get_pmresrn_event(unsigned int region)
+{
+ static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
+ SCORPION_LPM1_GROUP0,
+ SCORPION_LPM2_GROUP0,
+ SCORPION_L2LPM_GROUP0 };
+ return pmresrn_table[region];
+}
+
+static void scorpion_evt_setup(int idx, u32 config_base)
+{
+ u32 val;
+ u32 mask;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ unsigned int code = EVENT_CODE(config_base);
+ unsigned int group_shift;
+ bool venum_event = EVENT_VENUM(config_base);
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+
+ /* Configure evtsel for the region and group */
+ if (venum_event)
+ val = SCORPION_VLPM_GROUP0;
+ else
+ val = scorpion_get_pmresrn_event(region);
+ val += group;
+ /* Mix in mode-exclusion bits */
+ val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+ armv7_pmnc_write_evtsel(idx, val);
+
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = scorpion_read_pmresrn(region);
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ scorpion_write_pmresrn(region, val);
+ }
+}
+
+static void scorpion_clearpmu(u32 config_base)
+{
+ u32 val;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ bool venum_event = EVENT_VENUM(config_base);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val = clear_pmresrn_group(val, group);
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = scorpion_read_pmresrn(region);
+ val = clear_pmresrn_group(val, group);
+ scorpion_write_pmresrn(region, val);
+ }
+}
+
+static void scorpion_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /* Disable counter and interrupt */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Clear pmresr code (if destined for PMNx counters)
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ scorpion_clearpmu(hwc->config_base);
+
+ /* Disable interrupt for this counter */
+ armv7_pmnc_disable_intens(idx);
+}
+
+static void scorpion_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /*
+ * Enable counter and interrupt, and set the counter to count
+ * the event that we're interested in.
+ */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We don't set the event for the cycle counter because we
+ * don't have the ability to perform event filtering.
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ scorpion_evt_setup(idx, hwc->config_base);
+ else if (idx != ARMV7_IDX_CYCLE_COUNTER)
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ /* Enable interrupt for this counter */
+ armv7_pmnc_enable_intens(idx);
+
+ /* Enable counter */
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void scorpion_pmu_reset(void *info)
+{
+ u32 vval, fval;
+ struct arm_pmu *cpu_pmu = info;
+ u32 idx, nb_cnt = cpu_pmu->num_events;
+
+ armv7pmu_reset(info);
+
+ /* Clear all pmresrs */
+ scorpion_write_pmresrn(0, 0);
+ scorpion_write_pmresrn(1, 0);
+ scorpion_write_pmresrn(2, 0);
+ scorpion_write_pmresrn(3, 0);
+
+ venum_pre_pmresr(&vval, &fval);
+ venum_write_pmresr(0);
+ venum_post_pmresr(vval, fval);
+
+ /* Reset PMxEVNCTCR to sane default */
+ for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+ }
+}
+
+static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
+ unsigned int group)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+ if (hwc->config_base & VENUM_EVENT)
+ bit = SCORPION_VLPM_GROUP0;
+ else
+ bit = scorpion_get_pmresrn_event(region);
+ bit -= scorpion_get_pmresrn_event(0);
+ bit += group;
+ /*
+ * Lower bits are reserved for use by the counters (see
+ * armv7pmu_get_event_idx() for more info)
+ */
+ bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
+
+ return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ int bit = -1;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+ if (venum_event || scorpion_event) {
+ /* Ignore invalid events */
+ if (group > 3 || region > 3)
+ return -EINVAL;
+
+ bit = scorpion_event_to_bit(event, region, group);
+ if (test_and_set_bit(bit, cpuc->used_mask))
+ return -EAGAIN;
+ }
+
+ idx = armv7pmu_get_event_idx(cpuc, event);
+ if (idx < 0 && bit >= 0)
+ clear_bit(bit, cpuc->used_mask);
+
+ return idx;
+}
+
+static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+ armv7pmu_clear_event_idx(cpuc, event);
+ if (venum_event || scorpion_event) {
+ bit = scorpion_event_to_bit(event, region, group);
+ clear_bit(bit, cpuc->used_mask);
+ }
+}
+
+static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_scorpion";
+ cpu_pmu->map_event = scorpion_map_event;
+ cpu_pmu->reset = scorpion_pmu_reset;
+ cpu_pmu->enable = scorpion_pmu_enable_event;
+ cpu_pmu->disable = scorpion_pmu_disable_event;
+ cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_scorpion_mp";
+ cpu_pmu->map_event = scorpion_map_event;
+ cpu_pmu->reset = scorpion_pmu_reset;
+ cpu_pmu->enable = scorpion_pmu_enable_event;
+ cpu_pmu->disable = scorpion_pmu_disable_event;
+ cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static const struct of_device_id armv7_pmu_of_device_ids[] = {
+ {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init},
+ {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init},
+ {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init},
+ {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init},
+ {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init},
+ {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
+ {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
+ {.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
+ {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init},
+ {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init},
+ {},
+};
+
+static int armv7_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver armv7_pmu_driver = {
+ .driver = {
+ .name = "armv7-pmu",
+ .of_match_table = armv7_pmu_of_device_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = armv7_pmu_device_probe,
+};
+
+builtin_platform_driver(armv7_pmu_driver);
diff --git a/drivers/perf/arm_xscale_pmu.c b/drivers/perf/arm_xscale_pmu.c
new file mode 100644
index 000000000000..3d8b72d6b37f
--- /dev/null
+++ b/drivers/perf/arm_xscale_pmu.c
@@ -0,0 +1,745 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv5 [xscale] Performance counter handling code.
+ *
+ * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * Based on the previous xscale OProfile code.
+ *
+ * There are two variants of the xscale PMU that we support:
+ * - xscale1pmu: 2 event counters and a cycle counter
+ * - xscale2pmu: 4 event counters and a cycle counter
+ * The two variants share event definitions, but have different
+ * PMU structures.
+ */
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+enum xscale_perf_types {
+ XSCALE_PERFCTR_ICACHE_MISS = 0x00,
+ XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
+ XSCALE_PERFCTR_DATA_STALL = 0x02,
+ XSCALE_PERFCTR_ITLB_MISS = 0x03,
+ XSCALE_PERFCTR_DTLB_MISS = 0x04,
+ XSCALE_PERFCTR_BRANCH = 0x05,
+ XSCALE_PERFCTR_BRANCH_MISS = 0x06,
+ XSCALE_PERFCTR_INSTRUCTION = 0x07,
+ XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
+ XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
+ XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
+ XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
+ XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
+ XSCALE_PERFCTR_PC_CHANGED = 0x0D,
+ XSCALE_PERFCTR_BCU_REQUEST = 0x10,
+ XSCALE_PERFCTR_BCU_FULL = 0x11,
+ XSCALE_PERFCTR_BCU_DRAIN = 0x12,
+ XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
+ XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
+ XSCALE_PERFCTR_RMW = 0x16,
+ /* XSCALE_PERFCTR_CCNT is not hardware defined */
+ XSCALE_PERFCTR_CCNT = 0xFE,
+ XSCALE_PERFCTR_UNUSED = 0xFF,
+};
+
+enum xscale_counters {
+ XSCALE_CYCLE_COUNTER = 0,
+ XSCALE_COUNTER0,
+ XSCALE_COUNTER1,
+ XSCALE_COUNTER2,
+ XSCALE_COUNTER3,
+};
+
+static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
+ [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
+ [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER,
+};
+
+static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
+};
+
+#define XSCALE_PMU_ENABLE 0x001
+#define XSCALE_PMN_RESET 0x002
+#define XSCALE_CCNT_RESET 0x004
+#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
+#define XSCALE_PMU_CNT64 0x008
+
+#define XSCALE1_OVERFLOWED_MASK 0x700
+#define XSCALE1_CCOUNT_OVERFLOW 0x400
+#define XSCALE1_COUNT0_OVERFLOW 0x100
+#define XSCALE1_COUNT1_OVERFLOW 0x200
+#define XSCALE1_CCOUNT_INT_EN 0x040
+#define XSCALE1_COUNT0_INT_EN 0x010
+#define XSCALE1_COUNT1_INT_EN 0x020
+#define XSCALE1_COUNT0_EVT_SHFT 12
+#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
+#define XSCALE1_COUNT1_EVT_SHFT 20
+#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
+
+static inline u32
+xscale1pmu_read_pmnc(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale1pmu_write_pmnc(u32 val)
+{
+ /* upper 4bits and 7, 11 are write-as-0 */
+ val &= 0xffff77f;
+ asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
+}
+
+static inline int
+xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
+ enum xscale_counters counter)
+{
+ int ret = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+ }
+
+ return ret;
+}
+
+static irqreturn_t
+xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmnc;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /*
+ * NOTE: there's an A stepping erratum that states if an overflow
+ * bit already exists and another occurs, the previous
+ * Overflow bit gets cleared. There's no workaround.
+ * Fixed in B stepping or later.
+ */
+ pmnc = xscale1pmu_read_pmnc();
+
+ /*
+ * Write the value back to clear the overflow flags. Overflow
+ * flags remain in pmnc for use below. We also disable the PMU
+ * while we process the interrupt.
+ */
+ xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+ if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ if (!event)
+ continue;
+
+ if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+
+ irq_work_run();
+
+ /*
+ * Re-enable the PMU.
+ */
+ pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(pmnc);
+
+ return IRQ_HANDLED;
+}
+
+static void xscale1pmu_enable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ mask = 0;
+ evt = XSCALE1_CCOUNT_INT_EN;
+ break;
+ case XSCALE_COUNTER0:
+ mask = XSCALE1_COUNT0_EVT_MASK;
+ evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
+ XSCALE1_COUNT0_INT_EN;
+ break;
+ case XSCALE_COUNTER1:
+ mask = XSCALE1_COUNT1_EVT_MASK;
+ evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
+ XSCALE1_COUNT1_INT_EN;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~mask;
+ val |= evt;
+ xscale1pmu_write_pmnc(val);
+}
+
+static void xscale1pmu_disable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ mask = XSCALE1_CCOUNT_INT_EN;
+ evt = 0;
+ break;
+ case XSCALE_COUNTER0:
+ mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
+ evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER1:
+ mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
+ evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~mask;
+ val |= evt;
+ xscale1pmu_write_pmnc(val);
+}
+
+static int
+xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ if (XSCALE_PERFCTR_CCNT == hwc->config_base) {
+ if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return XSCALE_CYCLE_COUNTER;
+ } else {
+ if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
+ return XSCALE_COUNTER1;
+
+ if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
+ return XSCALE_COUNTER0;
+
+ return -EAGAIN;
+ }
+}
+
+static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale1pmu_read_pmnc();
+ val |= XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(val);
+}
+
+static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(val);
+}
+
+static inline u64 xscale1pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ u32 val = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
+ break;
+ }
+
+ return val;
+}
+
+static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
+ break;
+ }
+}
+
+static int xscale_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &xscale_perf_map,
+ &xscale_perf_cache_map, 0xFF);
+}
+
+static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "armv5_xscale1";
+ cpu_pmu->handle_irq = xscale1pmu_handle_irq;
+ cpu_pmu->enable = xscale1pmu_enable_event;
+ cpu_pmu->disable = xscale1pmu_disable_event;
+ cpu_pmu->read_counter = xscale1pmu_read_counter;
+ cpu_pmu->write_counter = xscale1pmu_write_counter;
+ cpu_pmu->get_event_idx = xscale1pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx;
+ cpu_pmu->start = xscale1pmu_start;
+ cpu_pmu->stop = xscale1pmu_stop;
+ cpu_pmu->map_event = xscale_map_event;
+ cpu_pmu->num_events = 3;
+
+ return 0;
+}
+
+#define XSCALE2_OVERFLOWED_MASK 0x01f
+#define XSCALE2_CCOUNT_OVERFLOW 0x001
+#define XSCALE2_COUNT0_OVERFLOW 0x002
+#define XSCALE2_COUNT1_OVERFLOW 0x004
+#define XSCALE2_COUNT2_OVERFLOW 0x008
+#define XSCALE2_COUNT3_OVERFLOW 0x010
+#define XSCALE2_CCOUNT_INT_EN 0x001
+#define XSCALE2_COUNT0_INT_EN 0x002
+#define XSCALE2_COUNT1_INT_EN 0x004
+#define XSCALE2_COUNT2_INT_EN 0x008
+#define XSCALE2_COUNT3_INT_EN 0x010
+#define XSCALE2_COUNT0_EVT_SHFT 0
+#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
+#define XSCALE2_COUNT1_EVT_SHFT 8
+#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
+#define XSCALE2_COUNT2_EVT_SHFT 16
+#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
+#define XSCALE2_COUNT3_EVT_SHFT 24
+#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
+
+static inline u32
+xscale2pmu_read_pmnc(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
+ /* bits 1-2 and 4-23 are read-unpredictable */
+ return val & 0xff000009;
+}
+
+static inline void
+xscale2pmu_write_pmnc(u32 val)
+{
+ /* bits 4-23 are write-as-0, 24-31 are write ignored */
+ val &= 0xf;
+ asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_overflow_flags(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale2pmu_write_overflow_flags(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_event_select(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale2pmu_write_event_select(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
+}
+
+static inline u32
+xscale2pmu_read_int_enable(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
+ return val;
+}
+
+static void
+xscale2pmu_write_int_enable(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
+}
+
+static inline int
+xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
+ enum xscale_counters counter)
+{
+ int ret = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
+ break;
+ case XSCALE_COUNTER2:
+ ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
+ break;
+ case XSCALE_COUNTER3:
+ ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+ }
+
+ return ret;
+}
+
+static irqreturn_t
+xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmnc, of_flags;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /* Disable the PMU. */
+ pmnc = xscale2pmu_read_pmnc();
+ xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+ /* Check the overflow flag register. */
+ of_flags = xscale2pmu_read_overflow_flags();
+ if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
+ return IRQ_NONE;
+
+ /* Clear the overflow bits. */
+ xscale2pmu_write_overflow_flags(of_flags);
+
+ regs = get_irq_regs();
+
+ for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ if (!event)
+ continue;
+
+ if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+
+ irq_work_run();
+
+ /*
+ * Re-enable the PMU.
+ */
+ pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(pmnc);
+
+ return IRQ_HANDLED;
+}
+
+static void xscale2pmu_enable_event(struct perf_event *event)
+{
+ unsigned long ien, evtsel;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ ien = xscale2pmu_read_int_enable();
+ evtsel = xscale2pmu_read_event_select();
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ ien |= XSCALE2_CCOUNT_INT_EN;
+ break;
+ case XSCALE_COUNTER0:
+ ien |= XSCALE2_COUNT0_INT_EN;
+ evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER1:
+ ien |= XSCALE2_COUNT1_INT_EN;
+ evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER2:
+ ien |= XSCALE2_COUNT2_INT_EN;
+ evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER3:
+ ien |= XSCALE2_COUNT3_INT_EN;
+ evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ xscale2pmu_write_event_select(evtsel);
+ xscale2pmu_write_int_enable(ien);
+}
+
+static void xscale2pmu_disable_event(struct perf_event *event)
+{
+ unsigned long ien, evtsel, of_flags;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ ien = xscale2pmu_read_int_enable();
+ evtsel = xscale2pmu_read_event_select();
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ ien &= ~XSCALE2_CCOUNT_INT_EN;
+ of_flags = XSCALE2_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ien &= ~XSCALE2_COUNT0_INT_EN;
+ evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
+ of_flags = XSCALE2_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ien &= ~XSCALE2_COUNT1_INT_EN;
+ evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
+ of_flags = XSCALE2_COUNT1_OVERFLOW;
+ break;
+ case XSCALE_COUNTER2:
+ ien &= ~XSCALE2_COUNT2_INT_EN;
+ evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
+ of_flags = XSCALE2_COUNT2_OVERFLOW;
+ break;
+ case XSCALE_COUNTER3:
+ ien &= ~XSCALE2_COUNT3_INT_EN;
+ evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
+ of_flags = XSCALE2_COUNT3_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ xscale2pmu_write_event_select(evtsel);
+ xscale2pmu_write_int_enable(ien);
+ xscale2pmu_write_overflow_flags(of_flags);
+}
+
+static int
+xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx = xscale1pmu_get_event_idx(cpuc, event);
+ if (idx >= 0)
+ goto out;
+
+ if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
+ idx = XSCALE_COUNTER3;
+ else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
+ idx = XSCALE_COUNTER2;
+out:
+ return idx;
+}
+
+static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
+ val |= XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(val);
+}
+
+static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale2pmu_read_pmnc();
+ val &= ~XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(val);
+}
+
+static inline u64 xscale2pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ u32 val = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER2:
+ asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER3:
+ asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
+ break;
+ }
+
+ return val;
+}
+
+static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER2:
+ asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER3:
+ asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
+ break;
+ }
+}
+
+static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "armv5_xscale2";
+ cpu_pmu->handle_irq = xscale2pmu_handle_irq;
+ cpu_pmu->enable = xscale2pmu_enable_event;
+ cpu_pmu->disable = xscale2pmu_disable_event;
+ cpu_pmu->read_counter = xscale2pmu_read_counter;
+ cpu_pmu->write_counter = xscale2pmu_write_counter;
+ cpu_pmu->get_event_idx = xscale2pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx;
+ cpu_pmu->start = xscale2pmu_start;
+ cpu_pmu->stop = xscale2pmu_stop;
+ cpu_pmu->map_event = xscale_map_event;
+ cpu_pmu->num_events = 5;
+
+ return 0;
+}
+
+static const struct pmu_probe_info xscale_pmu_probe_table[] = {
+ XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
+ XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
+ { /* sentinel value */ }
+};
+
+static int xscale_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table);
+}
+
+static struct platform_driver xscale_pmu_driver = {
+ .driver = {
+ .name = "xscale-pmu",
+ },
+ .probe = xscale_pmu_device_probe,
+};
+
+builtin_platform_driver(xscale_pmu_driver);
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
index 1f93a66eff5b..43d68b69e630 100644
--- a/drivers/perf/cxl_pmu.c
+++ b/drivers/perf/cxl_pmu.c
@@ -972,6 +972,7 @@ static __exit void cxl_pmu_exit(void)
cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
}
+MODULE_DESCRIPTION("CXL Performance Monitor Driver");
MODULE_LICENSE("GPL");
MODULE_IMPORT_NS(CXL);
module_init(cxl_pmu_init);
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 1bbdb29743c4..746b92330ca7 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -850,4 +850,5 @@ static struct platform_driver imx_ddr_pmu_driver = {
};
module_platform_driver(imx_ddr_pmu_driver);
+MODULE_DESCRIPTION("Freescale i.MX8 DDR Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 72c2d3074cde..69f920b1caf2 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -11,14 +11,24 @@
#include <linux/perf_event.h>
/* Performance monitor configuration */
-#define PMCFG1 0x00
-#define PMCFG1_RD_TRANS_FILT_EN BIT(31)
-#define PMCFG1_WR_TRANS_FILT_EN BIT(30)
-#define PMCFG1_RD_BT_FILT_EN BIT(29)
-#define PMCFG1_ID_MASK GENMASK(17, 0)
+#define PMCFG1 0x00
+#define MX93_PMCFG1_RD_TRANS_FILT_EN BIT(31)
+#define MX93_PMCFG1_WR_TRANS_FILT_EN BIT(30)
+#define MX93_PMCFG1_RD_BT_FILT_EN BIT(29)
+#define MX93_PMCFG1_ID_MASK GENMASK(17, 0)
-#define PMCFG2 0x04
-#define PMCFG2_ID GENMASK(17, 0)
+#define MX95_PMCFG1_WR_BEAT_FILT_EN BIT(31)
+#define MX95_PMCFG1_RD_BEAT_FILT_EN BIT(30)
+
+#define PMCFG2 0x04
+#define MX93_PMCFG2_ID GENMASK(17, 0)
+
+#define PMCFG3 0x08
+#define PMCFG4 0x0C
+#define PMCFG5 0x10
+#define PMCFG6 0x14
+#define MX95_PMCFG_ID_MASK GENMASK(9, 0)
+#define MX95_PMCFG_ID GENMASK(25, 16)
/* Global control register affects all counters and takes priority over local control registers */
#define PMGC0 0x40
@@ -41,6 +51,10 @@
#define NUM_COUNTERS 11
#define CYCLES_COUNTER 0
+#define CYCLES_EVENT_ID 0
+
+#define CONFIG_EVENT_MASK GENMASK(7, 0)
+#define CONFIG_COUNTER_MASK GENMASK(23, 16)
#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
@@ -71,8 +85,23 @@ static const struct imx_ddr_devtype_data imx93_devtype_data = {
.identifier = "imx93",
};
+static const struct imx_ddr_devtype_data imx95_devtype_data = {
+ .identifier = "imx95",
+};
+
+static inline bool is_imx93(struct ddr_pmu *pmu)
+{
+ return pmu->devtype_data == &imx93_devtype_data;
+}
+
+static inline bool is_imx95(struct ddr_pmu *pmu)
+{
+ return pmu->devtype_data == &imx95_devtype_data;
+}
+
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
- {.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data},
+ { .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data },
+ { .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
@@ -118,21 +147,40 @@ static const struct attribute_group ddr_perf_cpumask_attr_group = {
.attrs = ddr_perf_cpumask_attrs,
};
+struct imx9_pmu_events_attr {
+ struct device_attribute attr;
+ u64 id;
+ const void *devtype_data;
+};
+
static ssize_t ddr_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *page)
{
- struct perf_pmu_events_attr *pmu_attr;
+ struct imx9_pmu_events_attr *pmu_attr;
- pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ pmu_attr = container_of(attr, struct imx9_pmu_events_attr, attr);
return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
}
-#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \
- (&((struct perf_pmu_events_attr[]) { \
+#define COUNTER_OFFSET_IN_EVENT 8
+#define ID(counter, id) ((counter << COUNTER_OFFSET_IN_EVENT) | id)
+
+#define DDR_PMU_EVENT_ATTR_COMM(_name, _id, _data) \
+ (&((struct imx9_pmu_events_attr[]) { \
{ .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
- .id = _id, } \
+ .id = _id, \
+ .devtype_data = _data, } \
})[0].attr.attr)
+#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, NULL)
+
+#define IMX93_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx93_devtype_data)
+
+#define IMX95_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx95_devtype_data)
+
static struct attribute *ddr_perf_events_attrs[] = {
/* counter0 cycles event */
IMX9_DDR_PMU_EVENT_ATTR(cycles, 0),
@@ -159,90 +207,114 @@ static struct attribute *ddr_perf_events_attrs[] = {
IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63),
/* counter1 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, 71),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, ID(1, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, ID(1, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, ID(1, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, ID(1, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, ID(1, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, ID(1, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, ID(1, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, ID(1, 71)),
/* counter2 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, 71),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, 72),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, 73),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, ID(2, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, ID(2, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, ID(2, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, ID(2, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, ID(2, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, ID(2, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, ID(2, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, ID(2, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, ID(2, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, ID(2, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_beat_filt, ID(2, 73)), /* imx95 specific*/
/* counter3 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, 71),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, 72),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, 73),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, ID(3, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, ID(3, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, ID(3, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, ID(3, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, ID(3, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, ID(3, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, ID(3, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, ID(3, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, ID(3, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, ID(3, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt2, ID(3, 73)), /* imx95 specific*/
/* counter4 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, 71),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, 72),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, 73),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, ID(4, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, ID(4, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, ID(4, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, ID(4, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, ID(4, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, ID(4, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, ID(4, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, ID(4, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, ID(4, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, ID(4, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt1, ID(4, 73)), /* imx95 specific*/
/* counter5 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, 71),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, 72),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, ID(5, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, ID(5, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, ID(5, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, ID(5, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, ID(5, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, ID(5, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, ID(5, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, ID(5, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, ID(5, 72)),
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt0, ID(5, 73)), /* imx95 specific*/
/* counter6 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, 72),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, ID(6, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, ID(6, 72)),
/* counter7 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, 64),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, 65),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, ID(7, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, ID(7, 65)),
/* counter8 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, 64),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, 65),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, ID(8, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, ID(8, 65)),
/* counter9 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, 66),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, ID(9, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, ID(9, 66)),
/* counter10 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, 65),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, 66),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, ID(10, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, ID(10, 66)),
NULL,
};
+static umode_t
+ddr_perf_events_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+ struct imx9_pmu_events_attr *eattr;
+
+ eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+ if (!eattr->devtype_data)
+ return attr->mode;
+
+ if (eattr->devtype_data != ddr_pmu->devtype_data)
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group ddr_perf_events_attr_group = {
.name = "events",
.attrs = ddr_perf_events_attrs,
+ .is_visible = ddr_perf_events_attrs_is_visible,
};
-PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(event, "config:0-7,16-23");
PMU_FORMAT_ATTR(counter, "config:8-15");
PMU_FORMAT_ATTR(axi_id, "config1:0-17");
PMU_FORMAT_ATTR(axi_mask, "config2:0-17");
@@ -339,8 +411,10 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
int counter, bool enable)
{
u32 ctrl_a;
+ int event;
ctrl_a = readl_relaxed(pmu->base + PMLCA(counter));
+ event = FIELD_GET(CONFIG_EVENT_MASK, config);
if (enable) {
ctrl_a |= PMLCA_FC;
@@ -352,7 +426,7 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
ctrl_a &= ~PMLCA_FC;
ctrl_a |= PMLCA_CE;
ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F);
- ctrl_a |= FIELD_PREP(PMLCA_EVENT, (config & 0x000000FF));
+ ctrl_a |= FIELD_PREP(PMLCA_EVENT, event);
writel(ctrl_a, pmu->base + PMLCA(counter));
} else {
/* Freeze counter. */
@@ -361,39 +435,79 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
}
}
-static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int cfg2)
+static void imx93_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event,
+ int counter, int axi_id, int axi_mask)
{
u32 pmcfg1, pmcfg2;
- int event, counter;
-
- event = cfg & 0x000000FF;
- counter = (cfg & 0x0000FF00) >> 8;
+ u32 mask[] = { MX93_PMCFG1_RD_TRANS_FILT_EN,
+ MX93_PMCFG1_WR_TRANS_FILT_EN,
+ MX93_PMCFG1_RD_BT_FILT_EN };
pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
- if (counter == 2 && event == 73)
- pmcfg1 |= PMCFG1_RD_TRANS_FILT_EN;
- else if (counter == 2 && event != 73)
- pmcfg1 &= ~PMCFG1_RD_TRANS_FILT_EN;
+ if (counter >= 2 && counter <= 4)
+ pmcfg1 = event == 73 ? pmcfg1 | mask[counter - 2] :
+ pmcfg1 & ~mask[counter - 2];
- if (counter == 3 && event == 73)
- pmcfg1 |= PMCFG1_WR_TRANS_FILT_EN;
- else if (counter == 3 && event != 73)
- pmcfg1 &= ~PMCFG1_WR_TRANS_FILT_EN;
+ pmcfg1 &= ~FIELD_PREP(MX93_PMCFG1_ID_MASK, 0x3FFFF);
+ pmcfg1 |= FIELD_PREP(MX93_PMCFG1_ID_MASK, axi_mask);
+ writel_relaxed(pmcfg1, pmu->base + PMCFG1);
- if (counter == 4 && event == 73)
- pmcfg1 |= PMCFG1_RD_BT_FILT_EN;
- else if (counter == 4 && event != 73)
- pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN;
+ pmcfg2 = readl_relaxed(pmu->base + PMCFG2);
+ pmcfg2 &= ~FIELD_PREP(MX93_PMCFG2_ID, 0x3FFFF);
+ pmcfg2 |= FIELD_PREP(MX93_PMCFG2_ID, axi_id);
+ writel_relaxed(pmcfg2, pmu->base + PMCFG2);
+}
- pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF);
- pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, cfg2);
- writel(pmcfg1, pmu->base + PMCFG1);
+static void imx95_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event,
+ int counter, int axi_id, int axi_mask)
+{
+ u32 pmcfg1, pmcfg, offset = 0;
- pmcfg2 = readl_relaxed(pmu->base + PMCFG2);
- pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF);
- pmcfg2 |= FIELD_PREP(PMCFG2_ID, cfg1);
- writel(pmcfg2, pmu->base + PMCFG2);
+ pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
+
+ if (event == 73) {
+ switch (counter) {
+ case 2:
+ pmcfg1 |= MX95_PMCFG1_WR_BEAT_FILT_EN;
+ offset = PMCFG3;
+ break;
+ case 3:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG4;
+ break;
+ case 4:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG5;
+ break;
+ case 5:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG6;
+ break;
+ }
+ } else {
+ switch (counter) {
+ case 2:
+ pmcfg1 &= ~MX95_PMCFG1_WR_BEAT_FILT_EN;
+ break;
+ case 3:
+ case 4:
+ case 5:
+ pmcfg1 &= ~MX95_PMCFG1_RD_BEAT_FILT_EN;
+ break;
+ }
+ }
+
+ writel_relaxed(pmcfg1, pmu->base + PMCFG1);
+
+ if (offset) {
+ pmcfg = readl_relaxed(pmu->base + offset);
+ pmcfg &= ~(FIELD_PREP(MX95_PMCFG_ID_MASK, 0x3FF) |
+ FIELD_PREP(MX95_PMCFG_ID, 0x3FF));
+ pmcfg |= (FIELD_PREP(MX95_PMCFG_ID_MASK, axi_mask) |
+ FIELD_PREP(MX95_PMCFG_ID, axi_id));
+ writel_relaxed(pmcfg, pmu->base + offset);
+ }
}
static void ddr_perf_event_update(struct perf_event *event)
@@ -460,6 +574,28 @@ static void ddr_perf_event_start(struct perf_event *event, int flags)
hwc->state = 0;
}
+static int ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event, int counter)
+{
+ int i;
+
+ if (event == CYCLES_EVENT_ID) {
+ // Cycles counter is dedicated for cycle event.
+ if (pmu->events[CYCLES_COUNTER] == NULL)
+ return CYCLES_COUNTER;
+ } else if (counter != 0) {
+ // Counter specific event use specific counter.
+ if (pmu->events[counter] == NULL)
+ return counter;
+ } else {
+ // Auto allocate counter for referene event.
+ for (i = 1; i < NUM_COUNTERS; i++)
+ if (pmu->events[i] == NULL)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
static int ddr_perf_event_add(struct perf_event *event, int flags)
{
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
@@ -467,21 +603,33 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
int cfg = event->attr.config;
int cfg1 = event->attr.config1;
int cfg2 = event->attr.config2;
- int counter;
+ int event_id, counter;
- counter = (cfg & 0x0000FF00) >> 8;
+ event_id = FIELD_GET(CONFIG_EVENT_MASK, cfg);
+ counter = FIELD_GET(CONFIG_COUNTER_MASK, cfg);
+
+ counter = ddr_perf_alloc_counter(pmu, event_id, counter);
+ if (counter < 0) {
+ dev_dbg(pmu->dev, "There are not enough counters\n");
+ return -EOPNOTSUPP;
+ }
pmu->events[counter] = event;
pmu->active_events++;
hwc->idx = counter;
hwc->state |= PERF_HES_STOPPED;
+ if (is_imx93(pmu))
+ /* read trans, write trans, read beat */
+ imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2);
+
+ if (is_imx95(pmu))
+ /* write beat, read beat2, read beat1, read beat */
+ imx95_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2);
+
if (flags & PERF_EF_START)
ddr_perf_event_start(event, flags);
- /* read trans, write trans, read beat */
- ddr_perf_monitor_config(pmu, cfg, cfg1, cfg2);
-
return 0;
}
@@ -501,9 +649,11 @@ static void ddr_perf_event_del(struct perf_event *event, int flags)
{
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
ddr_perf_event_stop(event, PERF_EF_UPDATE);
+ pmu->events[counter] = NULL;
pmu->active_events--;
hwc->idx = -1;
}
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 6392cbedcd06..918cdc31de57 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -537,4 +537,5 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module)
}
EXPORT_SYMBOL_GPL(hisi_pmu_init);
+MODULE_DESCRIPTION("HiSilicon SoC uncore Performance Monitor driver framework");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index e2abca188dbe..94f1ebcd2a27 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -763,4 +763,5 @@ module_init(cn10k_ddr_pmu_init);
module_exit(cn10k_ddr_pmu_exit);
MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_DESCRIPTION("Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver");
MODULE_LICENSE("GPL v2");