summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-03 11:22:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-03 11:22:50 -0700
commit7cca308cfdc0725363ac5943dca9dcd49cc1d2d5 (patch)
tree39173c9b21c4a6e1f68e3b672afbe008b27198f9 /drivers
parent11d5576880aed34b8aa4e8049afdab92793b071f (diff)
parenta3314262eede9c909a0c797f16f25f941d12c78d (diff)
Merge tag 'powerpc-5.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Convert pseries & powernv to use MSI IRQ domains. - Rework the pseries CPU numbering so that CPUs that are removed, and later re-added, are given a CPU number on the same node as previously, when possible. - Add support for a new more flexible device-tree format for specifying NUMA distances. - Convert powerpc to GENERIC_PTDUMP. - Retire sbc8548 and sbc8641d board support. - Various other small features and fixes. Thanks to Alexey Kardashevskiy, Aneesh Kumar K.V, Anton Blanchard, Cédric Le Goater, Christophe Leroy, Emmanuel Gil Peyrot, Fabiano Rosas, Fangrui Song, Finn Thain, Gautham R. Shenoy, Hari Bathini, Joel Stanley, Jordan Niethe, Kajol Jain, Laurent Dufour, Leonardo Bras, Lukas Bulwahn, Marc Zyngier, Masahiro Yamada, Michal Suchanek, Nathan Chancellor, Nicholas Piggin, Parth Shah, Paul Gortmaker, Pratik R. Sampat, Randy Dunlap, Sebastian Andrzej Siewior, Srikar Dronamraju, Wan Jiabing, Xiongwei Song, and Zheng Yongjun. * tag 'powerpc-5.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (154 commits) powerpc/bug: Cast to unsigned long before passing to inline asm powerpc/ptdump: Fix generic ptdump for 64-bit KVM: PPC: Fix clearing never mapped TCEs in realmode powerpc/pseries/iommu: Rename "direct window" to "dma window" powerpc/pseries/iommu: Make use of DDW for indirect mapping powerpc/pseries/iommu: Find existing DDW with given property name powerpc/pseries/iommu: Update remove_dma_window() to accept property name powerpc/pseries/iommu: Reorganize iommu_table_setparms*() with new helper powerpc/pseries/iommu: Add ddw_property_create() and refactor enable_ddw() powerpc/pseries/iommu: Allow DDW windows starting at 0x00 powerpc/pseries/iommu: Add ddw_list_new_entry() helper powerpc/pseries/iommu: Add iommu_pseries_alloc_table() helper powerpc/kernel/iommu: Add new iommu_table_in_use() helper powerpc/pseries/iommu: Replace hard-coded page shift powerpc/numa: Update cpu_cpu_map on CPU online/offline powerpc/numa: Print debug statements only when required powerpc/numa: convert printk to pr_xxx powerpc/numa: Drop dbg in favour of pr_debug powerpc/smp: Enable CACHE domain for shared processor powerpc/smp: Update cpu_core_map on all PowerPc systems ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c16
-rw-r--r--drivers/cpuidle/cpuidle-pseries.c77
2 files changed, 60 insertions, 33 deletions
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 23a06cba392c..5a2cf5f91ccb 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -36,6 +36,7 @@
#define MAX_PSTATE_SHIFT 32
#define LPSTATE_SHIFT 48
#define GPSTATE_SHIFT 56
+#define MAX_NR_CHIPS 32
#define MAX_RAMP_DOWN_TIME 5120
/*
@@ -1046,12 +1047,20 @@ static int init_chip_info(void)
unsigned int *chip;
unsigned int cpu, i;
unsigned int prev_chip_id = UINT_MAX;
+ cpumask_t *chip_cpu_mask;
int ret = 0;
chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
if (!chip)
return -ENOMEM;
+ /* Allocate a chip cpu mask large enough to fit mask for all chips */
+ chip_cpu_mask = kcalloc(MAX_NR_CHIPS, sizeof(cpumask_t), GFP_KERNEL);
+ if (!chip_cpu_mask) {
+ ret = -ENOMEM;
+ goto free_and_return;
+ }
+
for_each_possible_cpu(cpu) {
unsigned int id = cpu_to_chip_id(cpu);
@@ -1059,22 +1068,25 @@ static int init_chip_info(void)
prev_chip_id = id;
chip[nr_chips++] = id;
}
+ cpumask_set_cpu(cpu, &chip_cpu_mask[nr_chips-1]);
}
chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
if (!chips) {
ret = -ENOMEM;
- goto free_and_return;
+ goto out_free_chip_cpu_mask;
}
for (i = 0; i < nr_chips; i++) {
chips[i].id = chip[i];
- cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
+ cpumask_copy(&chips[i].mask, &chip_cpu_mask[i]);
INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
for_each_cpu(cpu, &chips[i].mask)
per_cpu(chip_info, cpu) = &chips[i];
}
+out_free_chip_cpu_mask:
+ kfree(chip_cpu_mask);
free_and_return:
kfree(chip);
return ret;
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index a2b5c6f60cf0..7e7ab5597d7a 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -346,11 +346,9 @@ static int pseries_cpuidle_driver_init(void)
static void __init fixup_cede0_latency(void)
{
struct xcede_latency_payload *payload;
- u64 min_latency_us;
+ u64 min_xcede_latency_us = UINT_MAX;
int i;
- min_latency_us = dedicated_states[1].exit_latency; // CEDE latency
-
if (parse_cede_parameters())
return;
@@ -358,42 +356,45 @@ static void __init fixup_cede0_latency(void)
nr_xcede_records);
payload = &xcede_latency_parameter.payload;
+
+ /*
+ * The CEDE idle state maps to CEDE(0). While the hypervisor
+ * does not advertise CEDE(0) exit latency values, it does
+ * advertise the latency values of the extended CEDE states.
+ * We use the lowest advertised exit latency value as a proxy
+ * for the exit latency of CEDE(0).
+ */
for (i = 0; i < nr_xcede_records; i++) {
struct xcede_latency_record *record = &payload->records[i];
+ u8 hint = record->hint;
u64 latency_tb = be64_to_cpu(record->latency_ticks);
u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC);
- if (latency_us == 0)
- pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i);
-
- if (latency_us < min_latency_us)
- min_latency_us = latency_us;
- }
-
- /*
- * By default, we assume that CEDE(0) has exit latency 10us,
- * since there is no way for us to query from the platform.
- *
- * However, if the wakeup latency of an Extended CEDE state is
- * smaller than 10us, then we can be sure that CEDE(0)
- * requires no more than that.
- *
- * Perform the fix-up.
- */
- if (min_latency_us < dedicated_states[1].exit_latency) {
/*
- * We set a minimum of 1us wakeup latency for cede0 to
- * distinguish it from snooze
+ * We expect the exit latency of an extended CEDE
+ * state to be non-zero, it to since it takes at least
+ * a few nanoseconds to wakeup the idle CPU and
+ * dispatch the virtual processor into the Linux
+ * Guest.
+ *
+ * So we consider only non-zero value for performing
+ * the fixup of CEDE(0) latency.
*/
- u64 cede0_latency = 1;
+ if (latency_us == 0) {
+ pr_warn("cpuidle: Skipping xcede record %d [hint=%d]. Exit latency = 0us\n",
+ i, hint);
+ continue;
+ }
- if (min_latency_us > cede0_latency)
- cede0_latency = min_latency_us - 1;
+ if (latency_us < min_xcede_latency_us)
+ min_xcede_latency_us = latency_us;
+ }
- dedicated_states[1].exit_latency = cede0_latency;
- dedicated_states[1].target_residency = 10 * (cede0_latency);
+ if (min_xcede_latency_us != UINT_MAX) {
+ dedicated_states[1].exit_latency = min_xcede_latency_us;
+ dedicated_states[1].target_residency = 10 * (min_xcede_latency_us);
pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n",
- cede0_latency);
+ min_xcede_latency_us);
}
}
@@ -402,7 +403,7 @@ static void __init fixup_cede0_latency(void)
* pseries_idle_probe()
* Choose state table for shared versus dedicated partition
*/
-static int pseries_idle_probe(void)
+static int __init pseries_idle_probe(void)
{
if (cpuidle_disable != IDLE_NO_OVERRIDE)
@@ -419,7 +420,21 @@ static int pseries_idle_probe(void)
cpuidle_state_table = shared_states;
max_idle_state = ARRAY_SIZE(shared_states);
} else {
- fixup_cede0_latency();
+ /*
+ * Use firmware provided latency values
+ * starting with POWER10 platforms. In the
+ * case that we are running on a POWER10
+ * platform but in an earlier compat mode, we
+ * can still use the firmware provided values.
+ *
+ * However, on platforms prior to POWER10, we
+ * cannot rely on the accuracy of the firmware
+ * provided latency values. On such platforms,
+ * go with the conservative default estimate
+ * of 10us.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10))
+ fixup_cede0_latency();
cpuidle_state_table = dedicated_states;
max_idle_state = NR_DEDICATED_STATES;
}