From 8f36479d0eda3c078ee4e1ec79ea9a2650cd0996 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Mon, 17 Jul 2017 16:12:43 +0300 Subject: powerpc: allow compiling with GENERIC_MSI_IRQ_DOMAIN This allows building powerpc with the GENERIC_MSI_IRQ_DOMAIN Kconfig by enabling the asm-generic msi.h in Kbuild. Without this, there's a compilation error [1] because powerpc, as most arches, doesn't provide an asm/msi.h. [1] In file included from ./include/linux/kvm_host.h:20:0, from ./arch/powerpc/include/asm/kvm_ppc.h:30, from arch/powerpc/kernel/dbell.c:20: ./include/linux/msi.h:195:21: fatal error: asm/msi.h: No such file or directory Signed-off-by: Laurentiu Tudor Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 5c4fbc80dc6c..2542ea15d338 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h generic-y += vtime.h +generic-y += msi.h -- cgit v1.2.3-58-ga151 From 28a5db0061014c8afbbb98560cf420c29bc4d8e1 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 19 Jul 2017 03:06:32 +0530 Subject: powerpc/powernv: Add IMC OPAL APIs In-Memory Collection (IMC) counters are performance monitoring infrastructure. These counters need special sequence of SCOMs to init/start/stop which is handled by OPAL. And OPAL provides three APIs to init and control these IMC engines. OPAL API documentation: https://github.com/open-power/skiboot/blob/master/doc/opal-api/opal-imc-counters.rst Patch updates the kernel side powernv platform code to support the new OPAL APIs Signed-off-by: Hemant Kumar Signed-off-by: Anju T Sudhakar Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 12 +++++++++++- arch/powerpc/include/asm/opal.h | 5 +++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 3 +++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 3130a73652c7..7df005965634 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -190,7 +190,10 @@ #define OPAL_NPU_INIT_CONTEXT 146 #define OPAL_NPU_DESTROY_CONTEXT 147 #define OPAL_NPU_MAP_LPAR 148 -#define OPAL_LAST 148 +#define OPAL_IMC_COUNTERS_INIT 149 +#define OPAL_IMC_COUNTERS_START 150 +#define OPAL_IMC_COUNTERS_STOP 151 +#define OPAL_LAST 151 /* Device tree flags */ @@ -1084,6 +1087,13 @@ enum { XIVE_DUMP_EMU_STATE = 5, }; +/* "type" argument options for OPAL_IMC_COUNTERS_* calls */ +enum { + OPAL_IMC_COUNTERS_NEST = 1, + OPAL_IMC_COUNTERS_CORE = 2, +}; + + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 588fb1c23af9..6b8513c3ad40 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -268,6 +268,11 @@ int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_imc_counters_init(uint32_t type, uint64_t address, + uint64_t cpu_pir); +int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir); +int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir); + /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 4ca6c26a56d5..b77f52ee8263 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -310,3 +310,6 @@ OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); +OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); +OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); +OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); -- cgit v1.2.3-58-ga151 From 8f95faaac56c18b32d0e23ace55417a440abdb7e Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 19 Jul 2017 03:06:33 +0530 Subject: powerpc/powernv: Detect and create IMC device Code to create platform device for the In-Memory Collection (IMC) counters. Platform devices are created based on the IMC compatibility. New header file created to contain the data structures and macros needed for In-Memory Collection (IMC) counter pmu devices. The device tree for IMC counters starts at the node "imc-counters". This node contains all the IMC PMU nodes and event nodes for these IMC PMUs. Device probe() parses the device to locate three possible IMC device types (Nest/Core/Thread). Function then branch to parse each unit nodes to populate vital information such as device memory sizes, event nodes information, base address for reserve memory access (if any) and so on. Simple bare-minimum shutdown function added which only "stops" the engines. Signed-off-by: Anju T Sudhakar Signed-off-by: Hemant Kumar Signed-off-by: Madhavan Srinivasan [mpe: Fix build with CONFIG_PERF_EVENTS=n] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/imc-pmu.h | 128 +++++++++++++++++ arch/powerpc/platforms/powernv/Makefile | 1 + arch/powerpc/platforms/powernv/opal-imc.c | 221 ++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 14 ++ 4 files changed, 364 insertions(+) create mode 100644 arch/powerpc/include/asm/imc-pmu.h create mode 100644 arch/powerpc/platforms/powernv/opal-imc.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h new file mode 100644 index 000000000000..7f74c282710f --- /dev/null +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -0,0 +1,128 @@ +#ifndef __ASM_POWERPC_IMC_PMU_H +#define __ASM_POWERPC_IMC_PMU_H + +/* + * IMC Nest Performance Monitor counter support. + * + * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. + * (C) 2017 Anju T Sudhakar, IBM Corporation. + * (C) 2017 Hemant K Shaw, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. + */ + +#include +#include +#include +#include +#include + +/* + * For static allocation of some of the structures. + */ +#define IMC_MAX_PMUS 32 + +/* + * Compatibility macros for IMC devices + */ +#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters" +#define IMC_DTB_UNIT_COMPAT "ibm,imc-counters" + + +/* + * LDBAR: Counter address and Enable/Disable macro. + * perf/imc-pmu.c has the LDBAR layout information. + */ +#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL +#define THREAD_IMC_ENABLE 0x8000000000000000ULL + +/* + * Structure to hold memory address information for imc units. + */ +struct imc_mem_info { + u64 *vbase; + u32 id; +}; + +/* + * Place holder for nest pmu events and values. + */ +struct imc_events { + u32 value; + char *name; + char *unit; + char *scale; +}; + +/* Event attribute array index */ +#define IMC_FORMAT_ATTR 0 +#define IMC_EVENT_ATTR 1 +#define IMC_CPUMASK_ATTR 2 +#define IMC_NULL_ATTR 3 + +/* PMU Format attribute macros */ +#define IMC_EVENT_OFFSET_MASK 0xffffffffULL + +/* + * Device tree parser code detects IMC pmu support and + * registers new IMC pmus. This structure will hold the + * pmu functions, events, counter memory information + * and attrs for each imc pmu and will be referenced at + * the time of pmu registration. + */ +struct imc_pmu { + struct pmu pmu; + struct imc_mem_info *mem_info; + struct imc_events **events; + /* + * Attribute groups for the PMU. Slot 0 used for + * format attribute, slot 1 used for cpusmask attribute, + * slot 2 used for event attribute. Slot 3 keep as + * NULL. + */ + const struct attribute_group *attr_groups[4]; + u32 counter_mem_size; + int domain; + /* + * flag to notify whether the memory is mmaped + * or allocated by kernel. + */ + bool imc_counter_mmaped; +}; + +/* + * Structure to hold id, lock and reference count for the imc events which + * are inited. + */ +struct imc_pmu_ref { + struct mutex lock; + unsigned int id; + int refc; +}; + +/* + * In-Memory Collection Counters type. + * Data comes from Device tree. + * Three device type are supported. + */ + +enum { + IMC_TYPE_THREAD = 0x1, + IMC_TYPE_CORE = 0x4, + IMC_TYPE_CHIP = 0x10, +}; + +/* + * Domains for IMC PMUs + */ +#define IMC_DOMAIN_NEST 1 +#define IMC_DOMAIN_CORE 2 +#define IMC_DOMAIN_THREAD 3 + +extern int init_imc_pmu(struct device_node *parent, + struct imc_pmu *pmu_ptr, int pmu_id); +extern void thread_imc_disable(void); +#endif /* __ASM_POWERPC_IMC_PMU_H */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index b5d98cb3f482..a0d4353a17c9 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -12,3 +12,4 @@ obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o +obj-$(CONFIG_PERF_EVENTS) += opal-imc.o diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c new file mode 100644 index 000000000000..f57a6fbd3f57 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -0,0 +1,221 @@ +/* + * OPAL IMC interface detection driver + * Supported on POWERNV platform + * + * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. + * (C) 2017 Anju T Sudhakar, IBM Corporation. + * (C) 2017 Hemant K Shaw, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * imc_get_mem_addr_nest: Function to get nest counter memory region + * for each chip + */ +static int imc_get_mem_addr_nest(struct device_node *node, + struct imc_pmu *pmu_ptr, + u32 offset) +{ + int nr_chips = 0, i; + u64 *base_addr_arr, baddr; + u32 *chipid_arr; + + nr_chips = of_property_count_u32_elems(node, "chip-id"); + if (nr_chips <= 0) + return -ENODEV; + + base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL); + if (!base_addr_arr) + return -ENOMEM; + + chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL); + if (!chipid_arr) + return -ENOMEM; + + if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips)) + goto error; + + if (of_property_read_u64_array(node, "base-addr", base_addr_arr, + nr_chips)) + goto error; + + pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info), + GFP_KERNEL); + if (!pmu_ptr->mem_info) + goto error; + + for (i = 0; i < nr_chips; i++) { + pmu_ptr->mem_info[i].id = chipid_arr[i]; + baddr = base_addr_arr[i] + offset; + pmu_ptr->mem_info[i].vbase = phys_to_virt(baddr); + } + + pmu_ptr->imc_counter_mmaped = true; + kfree(base_addr_arr); + kfree(chipid_arr); + return 0; + +error: + kfree(pmu_ptr->mem_info); + kfree(base_addr_arr); + kfree(chipid_arr); + return -1; +} + +/* + * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index + * and domain as the inputs. + * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets + */ +static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) +{ + int ret = 0; + struct imc_pmu *pmu_ptr; + u32 offset; + + /* memory for pmu */ + pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL); + if (!pmu_ptr) + return -ENOMEM; + + /* Set the domain */ + pmu_ptr->domain = domain; + + ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size); + if (ret) { + ret = -EINVAL; + goto free_pmu; + } + + if (!of_property_read_u32(parent, "offset", &offset)) { + if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) { + ret = -EINVAL; + goto free_pmu; + } + } + + return 0; + +free_pmu: + kfree(pmu_ptr); + return ret; +} + +static void disable_nest_pmu_counters(void) +{ + int nid, cpu; + struct cpumask *l_cpumask; + + get_online_cpus(); + for_each_online_node(nid) { + l_cpumask = cpumask_of_node(nid); + cpu = cpumask_first(l_cpumask); + opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(cpu)); + } + put_online_cpus(); +} + +static void disable_core_pmu_counters(void) +{ + cpumask_t cores_map; + int cpu, rc; + + get_online_cpus(); + /* Disable the IMC Core functions */ + cores_map = cpu_online_cores_map(); + for_each_cpu(cpu, &cores_map) { + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(cpu)); + if (rc) + pr_err("%s: Failed to stop Core (cpu = %d)\n", + __FUNCTION__, cpu); + } + put_online_cpus(); +} + +static int opal_imc_counters_probe(struct platform_device *pdev) +{ + struct device_node *imc_dev = pdev->dev.of_node; + int pmu_count = 0, domain; + u32 type; + + /* + * Check whether this is kdump kernel. If yes, force the engines to + * stop and return. + */ + if (is_kdump_kernel()) { + disable_nest_pmu_counters(); + disable_core_pmu_counters(); + return -ENODEV; + } + + for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) { + if (of_property_read_u32(imc_dev, "type", &type)) { + pr_warn("IMC Device without type property\n"); + continue; + } + + switch (type) { + case IMC_TYPE_CHIP: + domain = IMC_DOMAIN_NEST; + break; + case IMC_TYPE_CORE: + domain =IMC_DOMAIN_CORE; + break; + case IMC_TYPE_THREAD: + domain = IMC_DOMAIN_THREAD; + break; + default: + pr_warn("IMC Unknown Device type \n"); + domain = -1; + break; + } + + if (!imc_pmu_create(imc_dev, pmu_count, domain)) + pmu_count++; + } + + return 0; +} + +static void opal_imc_counters_shutdown(struct platform_device *pdev) +{ + /* + * Function only stops the engines which is bare minimum. + * TODO: Need to handle proper memory cleanup and pmu + * unregister. + */ + disable_nest_pmu_counters(); + disable_core_pmu_counters(); +} + +static const struct of_device_id opal_imc_match[] = { + { .compatible = IMC_DTB_COMPAT }, + {}, +}; + +static struct platform_driver opal_imc_driver = { + .driver = { + .name = "opal-imc-counters", + .of_match_table = opal_imc_match, + }, + .probe = opal_imc_counters_probe, + .shutdown = opal_imc_counters_shutdown, +}; + +builtin_platform_driver(opal_imc_driver); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index cad6b57ce494..946158e6fc1d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include "powernv.h" @@ -720,6 +722,15 @@ static void opal_pdev_init(const char *compatible) of_platform_device_create(np, NULL, NULL); } +static void __init opal_imc_init_dev(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT); + if (np) + of_platform_device_create(np, NULL, NULL); +} + static int kopald(void *unused) { unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1; @@ -793,6 +804,9 @@ static int __init opal_init(void) /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); + /* Detect In-Memory Collection counters and create devices*/ + opal_imc_init_dev(); + /* Create leds platform devices */ leds = of_find_node_by_path("/ibm,opal/leds"); if (leds) { -- cgit v1.2.3-58-ga151 From e1c1cfed54326fd2b17c78f0c85092167fc0783b Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 21 Jul 2017 16:11:37 +0530 Subject: powerpc/powernv: Save/Restore additional SPRs for stop4 cpuidle The stop4 idle state on POWER9 is a deep idle state which loses hypervisor resources, but whose latency is low enough that it can be exposed via cpuidle. Until now, the deep idle states which lose hypervisor resources (eg: winkle) were only exposed via CPU-Hotplug. Hence currently on wakeup from such states, barring a few SPRs which need to be restored to their older value, rest of the SPRS are reinitialized to their values corresponding to that at boot time. When stop4 is used in the context of cpuidle, we want these additional SPRs to be restored to their older value, to ensure that the context on the CPU coming back from idle is same as it was before going idle. In this patch, we define a SPR save area in PACA (since we have used up the volatile register space in the stack) and on POWER9, we restore SPRN_PID, SPRN_LDBAR, SPRN_FSCR, SPRN_HFSCR, SPRN_MMCRA, SPRN_MMCR1, SPRN_MMCR2 to the values they had before entering stop. Signed-off-by: Gautham R. Shenoy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 11 +++++++ arch/powerpc/include/asm/paca.h | 7 ++++ arch/powerpc/kernel/asm-offsets.c | 8 +++++ arch/powerpc/kernel/idle_book3s.S | 65 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 89 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 52586f9956bb..8a174cba5567 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -67,6 +67,17 @@ #define ERR_DEEP_STATE_ESL_MISMATCH -2 #ifndef __ASSEMBLY__ +/* Additional SPRs that need to be saved/restored during stop */ +struct stop_sprs { + u64 pid; + u64 ldbar; + u64 fscr; + u64 hfscr; + u64 mmcr1; + u64 mmcr2; + u64 mmcra; +}; + extern u32 pnv_fastsleep_workaround_at_entry[]; extern u32 pnv_fastsleep_workaround_at_exit[]; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index dc88a31cc79a..04b60af027ae 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -31,6 +31,7 @@ #endif #include #include +#include register struct paca_struct *local_paca asm("r13"); @@ -183,6 +184,12 @@ struct paca_struct { struct paca_struct **thread_sibling_pacas; /* The PSSCR value that the kernel requested before going to stop */ u64 requested_psscr; + + /* + * Save area for additional SPRs that need to be + * saved/restored during cpuidle stop. + */ + struct stop_sprs stop_sprs; #endif #ifdef CONFIG_PPC_STD_MMU_64 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6e95c2c19a7e..8cfb20e38cfe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -746,6 +746,14 @@ int main(void) OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); +#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) + STOP_SPR(STOP_PID, pid); + STOP_SPR(STOP_LDBAR, ldbar); + STOP_SPR(STOP_FSCR, fscr); + STOP_SPR(STOP_HFSCR, hfscr); + STOP_SPR(STOP_MMCR1, mmcr1); + STOP_SPR(STOP_MMCR2, mmcr2); + STOP_SPR(STOP_MMCRA, mmcra); #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 516ebef905c0..4621568277f2 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -85,7 +85,61 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) std r3,_WORT(r1) mfspr r3,SPRN_WORC std r3,_WORC(r1) +/* + * On POWER9, there are idle states such as stop4, invoked via cpuidle, + * that lose hypervisor resources. In such cases, we need to save + * additional SPRs before entering those idle states so that they can + * be restored to their older values on wakeup from the idle state. + * + * On POWER8, the only such deep idle state is winkle which is used + * only in the context of CPU-Hotplug, where these additional SPRs are + * reinitiazed to a sane value. Hence there is no need to save/restore + * these SPRs. + */ +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + +power9_save_additional_sprs: + mfspr r3, SPRN_PID + mfspr r4, SPRN_LDBAR + std r3, STOP_PID(r13) + std r4, STOP_LDBAR(r13) + mfspr r3, SPRN_FSCR + mfspr r4, SPRN_HFSCR + std r3, STOP_FSCR(r13) + std r4, STOP_HFSCR(r13) + + mfspr r3, SPRN_MMCRA + mfspr r4, SPRN_MMCR1 + std r3, STOP_MMCRA(r13) + std r4, STOP_MMCR1(r13) + + mfspr r3, SPRN_MMCR2 + std r3, STOP_MMCR2(r13) + blr + +power9_restore_additional_sprs: + ld r3,_LPCR(r1) + ld r4, STOP_PID(r13) + mtspr SPRN_LPCR,r3 + mtspr SPRN_PID, r4 + + ld r3, STOP_LDBAR(r13) + ld r4, STOP_FSCR(r13) + mtspr SPRN_LDBAR, r3 + mtspr SPRN_FSCR, r4 + + ld r3, STOP_HFSCR(r13) + ld r4, STOP_MMCRA(r13) + mtspr SPRN_HFSCR, r3 + mtspr SPRN_MMCRA, r4 + /* We have already restored PACA_MMCR0 */ + ld r3, STOP_MMCR1(r13) + ld r4, STOP_MMCR2(r13) + mtspr SPRN_MMCR1, r3 + mtspr SPRN_MMCR2, r4 blr /* @@ -803,9 +857,16 @@ no_segments: mtctr r12 bctrl +/* + * On POWER9, we can come here on wakeup from a cpuidle stop state. + * Hence restore the additional SPRs to the saved value. + * + * On POWER8, we come here only on winkle. Since winkle is used + * only in the case of CPU-Hotplug, we don't need to restore + * the additional SPRs. + */ BEGIN_FTR_SECTION - ld r4,_LPCR(r1) - mtspr SPRN_LPCR,r4 + bl power9_restore_additional_sprs END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) hypervisor_state_restored: -- cgit v1.2.3-58-ga151 From a46cc7a90fd8d95bfbb2b27080efe872a1a51db4 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:05 +1000 Subject: powerpc/mm/radix: Improve TLB/PWC flushes At the moment we have to rather sub-optimal flushing behaviours: - flush_tlb_mm() will flush the PWC which is unnecessary (for example when doing a fork) - A large unmap will call flush_tlb_pwc() multiple times causing us to perform that fairly expensive operation repeatedly. This happens often in batches of 3 on every new process. So we change flush_tlb_mm() to only flush the TLB, and we use the existing "need_flush_all" flag in struct mmu_gather to indicate that the PWC needs flushing. Unfortunately, flush_tlb_range() still needs to do a full flush for now as it's used by the THP collapsing. We will fix that later. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 4 +- arch/powerpc/mm/tlb-radix.c | 66 +++++++++------------- 2 files changed, 28 insertions(+), 42 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index cc7fbde4f53c..7196999cdc82 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -22,22 +22,20 @@ extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) -#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 2f2967a2db93..28f339cdd836 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -68,17 +68,6 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } -static inline void tlbiel_pwc(unsigned long pid) -{ - asm volatile("ptesync": : :"memory"); - - /* For PWC flush, we don't look at set number */ - __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); -} - static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -149,31 +138,23 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); -void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +#ifndef CONFIG_SMP +static void radix__local_flush_all_mm(struct mm_struct *mm) { unsigned long pid; - struct mm_struct *mm = tlb->mm; - /* - * If we are doing a full mm flush, we will do a tlb flush - * with RIC_FLUSH_ALL later. - */ - if (tlb->fullmm) - return; preempt_disable(); - pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - tlbiel_pwc(pid); - + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } -EXPORT_SYMBOL(radix__local_flush_tlb_pwc); +#endif /* CONFIG_SMP */ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) @@ -211,38 +192,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm) goto no_context; if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_ALL); + _tlbie_pid(pid, RIC_FLUSH_TLB); else - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbiel_pid(pid, RIC_FLUSH_TLB); no_context: preempt_enable(); } EXPORT_SYMBOL(radix__flush_tlb_mm); -void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +static void radix__flush_all_mm(struct mm_struct *mm) { unsigned long pid; - struct mm_struct *mm = tlb->mm; - /* - * If we are doing a full mm flush, we will do a tlb flush - * with RIC_FLUSH_ALL later. - */ - if (tlb->fullmm) - return; preempt_disable(); - pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_PWC); + _tlbie_pid(pid, RIC_FLUSH_ALL); else - tlbiel_pwc(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); } + +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + tlb->need_flush_all = 1; +} EXPORT_SYMBOL(radix__flush_tlb_pwc); void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, @@ -274,6 +252,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(radix__flush_tlb_page); +#else /* CONFIG_SMP */ +#define radix__flush_all_mm radix__local_flush_all_mm #endif /* CONFIG_SMP */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -291,7 +271,12 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm = vma->vm_mm; - radix__flush_tlb_mm(mm); + + /* + * This is currently used when collapsing THPs so we need to + * flush the PWC. We should fix this. + */ + radix__flush_all_mm(mm); } EXPORT_SYMBOL(radix__flush_tlb_range); @@ -322,7 +307,10 @@ void radix__tlb_flush(struct mmu_gather *tlb) */ if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all) radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize); - else + else if (tlb->need_flush_all) { + tlb->need_flush_all = 0; + radix__flush_all_mm(mm); + } else radix__flush_tlb_mm(mm); } -- cgit v1.2.3-58-ga151 From 424de9c6e3f89399fc11afc1f53f89c5329132da Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:06 +1000 Subject: powerpc/mm/radix: Avoid flushing the PWC on every flush_tlb_range We do that because it's used by THP pmd collapsing, so use instead a dedicated flush function. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 1 + arch/powerpc/mm/pgtable-radix.c | 5 ++- arch/powerpc/mm/tlb-radix.c | 43 +++++++++++++++++++--- 3 files changed, 43 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 7196999cdc82..9b433a624bf3 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -36,6 +36,7 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) #endif extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); +extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 5cc50d47ce3f..5d05245208ee 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -804,9 +804,12 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre */ pmd = *pmdp; pmd_clear(pmdp); + /*FIXME!! Verify whether we need this kick below */ kick_all_cpus_sync(); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); + return pmd; } diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 28f339cdd836..18151e9ad694 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -272,11 +272,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm = vma->vm_mm; - /* - * This is currently used when collapsing THPs so we need to - * flush the PWC. We should fix this. - */ - radix__flush_all_mm(mm); + radix__flush_tlb_mm(mm); } EXPORT_SYMBOL(radix__flush_tlb_range); @@ -355,6 +351,43 @@ err_out: preempt_enable(); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) +{ + int local = mm_is_thread_local(mm); + unsigned long ap = mmu_get_ap(mmu_virtual_psize); + unsigned long pid, end; + + + pid = mm ? mm->context.id : 0; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + + /* 4k page size, just blow the world */ + if (PAGE_SIZE == 0x1000) { + radix__flush_all_mm(mm); + return; + } + + /* Otherwise first do the PWC */ + if (local) + _tlbiel_pid(pid, RIC_FLUSH_PWC); + else + _tlbie_pid(pid, RIC_FLUSH_PWC); + + /* Then iterate the pages */ + end = addr + HPAGE_PMD_SIZE; + for (; addr < end; addr += PAGE_SIZE) { + if (local) + _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); + else + _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); + } +no_context: + preempt_enable(); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size) { -- cgit v1.2.3-58-ga151 From 870cfe77a91e91cac5937784d73b9d27b6a12296 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:26 +1000 Subject: powerpc/mm: Update definitions of DSISR bits This updates the definitions for the various DSISR bits to match both some historical stuff and to match new bits on POWER9. In addition, we define some masks corresponding to the "bad" faults on Book3S, and some masks corresponding to the bits that match between DSISR and SRR1 for a DSI and an ISI. This comes with a small code update to change the definition of DSISR_PGDIRFAULT which becomes DSISR_PRTABLE_FAULT to match architecture 3.0B Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 69 +++++++++++++++++++++++++++++----- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 +- 2 files changed, 61 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a3b6575c7842..73be2f71dbbb 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -272,16 +272,65 @@ #define SPRN_DAR 0x013 /* Data Address Register */ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ -#define DSISR_NOHPTE 0x40000000 /* no translation found */ -#define DSISR_PROTFAULT 0x08000000 /* protection fault */ -#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ -#define DSISR_ISSTORE 0x02000000 /* access was a store */ -#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ -#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ -#define DSISR_KEYFAULT 0x00200000 /* Key fault */ -#define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */ -#define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */ -#define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */ +#define DSISR_BAD_DIRECT_ST 0x80000000 /* Obsolete: Direct store error */ +#define DSISR_NOHPTE 0x40000000 /* no translation found */ +#define DSISR_ATTR_CONFLICT 0x20000000 /* P9: Process vs. Partition attr */ +#define DSISR_NOEXEC_OR_G 0x10000000 /* Alias of SRR1 bit, see below */ +#define DSISR_PROTFAULT 0x08000000 /* protection fault */ +#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ +#define DSISR_ISSTORE 0x02000000 /* access was a store */ +#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ +#define DSISR_NOSEGMENT 0x00200000 /* STAB miss (unsupported) */ +#define DSISR_KEYFAULT 0x00200000 /* Storage Key fault */ +#define DSISR_BAD_EXT_CTRL 0x00100000 /* Obsolete: External ctrl error */ +#define DSISR_UNSUPP_MMU 0x00080000 /* P9: Unsupported MMU config */ +#define DSISR_SET_RC 0x00040000 /* P9: Failed setting of R/C bits */ +#define DSISR_PRTABLE_FAULT 0x00020000 /* P9: Fault on process table */ +#define DSISR_ICSWX_NO_CT 0x00004000 /* P7: icswx unavailable cp type */ +#define DSISR_BAD_COPYPASTE 0x00000008 /* P9: Copy/Paste on wrong memtype */ +#define DSISR_BAD_AMO 0x00000004 /* P9: Incorrect AMO opcode */ +#define DSISR_BAD_CI_LDST 0x00000002 /* P8: Bad HV CI load/store */ + +/* + * DSISR_NOEXEC_OR_G doesn't actually exist. This bit is always + * 0 on DSIs. However, on ISIs, the corresponding bit in SRR1 + * indicates an attempt at executing from a no-execute PTE + * or segment or from a guarded page. + * + * We add a definition here for completeness as we alias + * DSISR and SRR1 in do_page_fault. + */ + +/* + * DSISR bits that are treated as a fault. Any bit set + * here will skip hash_page, and cause do_page_fault to + * trigger a SIGBUS or SIGSEGV: + */ +#define DSISR_BAD_FAULT_32S (DSISR_BAD_DIRECT_ST | \ + DSISR_BADACCESS | \ + DSISR_BAD_EXT_CTRL) +#define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \ + DSISR_ATTR_CONFLICT | \ + DSISR_KEYFAULT | \ + DSISR_UNSUPP_MMU | \ + DSISR_PRTABLE_FAULT | \ + DSISR_ICSWX_NO_CT | \ + DSISR_BAD_COPYPASTE | \ + DSISR_BAD_AMO | \ + DSISR_BAD_CI_LDST) +/* + * These bits are equivalent in SRR1 and DSISR for 0x400 + * instruction access interrupts on Book3S + */ +#define DSISR_SRR1_MATCH_32S (DSISR_NOHPTE | \ + DSISR_NOEXEC_OR_G | \ + DSISR_PROTFAULT) +#define DSISR_SRR1_MATCH_64S (DSISR_SRR1_MATCH_32S | \ + DSISR_KEYFAULT | \ + DSISR_UNSUPP_MMU | \ + DSISR_SET_RC | \ + DSISR_PRTABLE_FAULT) + #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ #define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index f6b3e67c5762..6d677c79eeb1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -322,13 +322,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, gpa = vcpu->arch.fault_gpa & ~0xfffUL; gpa &= ~0xF000000000000000ul; gfn = gpa >> PAGE_SHIFT; - if (!(dsisr & DSISR_PGDIRFAULT)) + if (!(dsisr & DSISR_PRTABLE_FAULT)) gpa |= ea & 0xfff; memslot = gfn_to_memslot(kvm, gfn); /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { - if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS | + if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS | DSISR_SET_RC)) { /* * Bad address in guest page table tree, or other -- cgit v1.2.3-58-ga151 From 6ff4d3e9665274b69cff47f64cc20183465a1de2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:46 +1000 Subject: powerpc: Remove old unused icswx based coprocessor support We have a whole pile of unused code to maintain the ACOP register, allocate coprocessor PIDs and handle ACOP faults. This mechanism was used for the HFI adapter on POWER7 which is dead and gone and whose driver never went upstream. It was used on some A2 core based stuff that also never saw the light of day. Take out all that code. There is still some POWER8 coprocessor code that uses icswx but it's kernel only and thus doesn't use any of that infrastructure. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 5 - arch/powerpc/include/asm/mmu_context.h | 6 - arch/powerpc/include/asm/reg_booke.h | 3 - arch/powerpc/mm/Makefile | 2 - arch/powerpc/mm/fault.c | 15 -- arch/powerpc/mm/icswx.c | 292 ------------------------------- arch/powerpc/mm/icswx.h | 68 ------- arch/powerpc/mm/icswx_pid.c | 87 --------- arch/powerpc/mm/mmu_context_book3s64.c | 18 -- arch/powerpc/platforms/Kconfig.cputype | 38 ---- 10 files changed, 534 deletions(-) delete mode 100644 arch/powerpc/mm/icswx.c delete mode 100644 arch/powerpc/mm/icswx.h delete mode 100644 arch/powerpc/mm/icswx_pid.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 77529a3e3811..cbf1945f4338 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -96,11 +96,6 @@ typedef struct { #ifdef CONFIG_PPC_SUBPAGE_PROT struct subpage_prot_table spt; #endif /* CONFIG_PPC_SUBPAGE_PROT */ -#ifdef CONFIG_PPC_ICSWX - struct spinlock *cop_lockp; /* guard acop and cop_pid */ - unsigned long acop; /* mask of enabled coprocessor types */ - unsigned int cop_pid; /* pid value used with coprocessors */ -#endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_64K_PAGES /* for 4K PTE fragment support */ void *pte_frag; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index da7e9432fa8f..eb749c86dca5 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -96,12 +96,6 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, if (prev == next) return; -#ifdef CONFIG_PPC_ICSWX - /* Switch coprocessor context only if prev or next uses a coprocessor */ - if (prev->context.acop || next->context.acop) - switch_cop(next); -#endif /* CONFIG_PPC_ICSWX */ - /* We must stop all altivec streams before changing the HW * context */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 737e012ef56e..eb2a33d5df26 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -221,10 +221,7 @@ #define SPRN_CSRR0 SPRN_SRR2 /* Critical Save and Restore Register 0 */ #define SPRN_CSRR1 SPRN_SRR3 /* Critical Save and Restore Register 1 */ #endif - -#ifdef CONFIG_PPC_ICSWX #define SPRN_HACOP 0x15F /* Hypervisor Available Coprocessor Register */ -#endif /* Bit definitions for CCR1. */ #define CCR1_DPC 0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */ diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 7414034df1c3..fa6a6ba34355 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -22,8 +22,6 @@ ifeq ($(CONFIG_PPC_STD_MMU_64),y) obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o endif -obj-$(CONFIG_PPC_ICSWX) += icswx.o -obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o obj-$(CONFIG_40x) += 40x_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4e1f5e388020..ed902a8fefa8 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -45,8 +45,6 @@ #include #include -#include "icswx.h" - static inline bool notify_page_fault(struct pt_regs *regs) { bool ret = false; @@ -389,19 +387,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, int fault, major = 0; bool store_update_sp = false; -#ifdef CONFIG_PPC_ICSWX - /* - * we need to do this early because this "data storage - * interrupt" does not update the DAR/DEAR so we don't want to - * look at it - */ - if (error_code & ICSWX_DSI_UCT) { - int rc = acop_handle_fault(regs, address, error_code); - if (rc) - return rc; - } -#endif /* CONFIG_PPC_ICSWX */ - if (notify_page_fault(regs)) return 0; diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c deleted file mode 100644 index 1fa794d7d59f..000000000000 --- a/arch/powerpc/mm/icswx.c +++ /dev/null @@ -1,292 +0,0 @@ -/* - * ICSWX and ACOP Management - * - * Copyright (C) 2011 Anton Blanchard, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "icswx.h" - -/* - * The processor and its L2 cache cause the icswx instruction to - * generate a COP_REQ transaction on PowerBus. The transaction has no - * address, and the processor does not perform an MMU access to - * authenticate the transaction. The command portion of the PowerBus - * COP_REQ transaction includes the LPAR_ID (LPID) and the coprocessor - * Process ID (PID), which the coprocessor compares to the authorized - * LPID and PID held in the coprocessor, to determine if the process - * is authorized to generate the transaction. The data of the COP_REQ - * transaction is 128-byte or less in size and is placed in cacheable - * memory on a 128-byte cache line boundary. - * - * The task to use a coprocessor should use use_cop() to mark the use - * of the Coprocessor Type (CT) and context switching. On a server - * class processor, the PID register is used only for coprocessor - * management + * and so a coprocessor PID is allocated before - * executing icswx + * instruction. Drop_cop() is used to free the - * coprocessor PID. - * - * Example: - * Host Fabric Interface (HFI) is a PowerPC network coprocessor. - * Each HFI have multiple windows. Each HFI window serves as a - * network device sending to and receiving from HFI network. - * HFI immediate send function uses icswx instruction. The immediate - * send function allows small (single cache-line) packets be sent - * without using the regular HFI send FIFO and doorbell, which are - * much slower than immediate send. - * - * For each task intending to use HFI immediate send, the HFI driver - * calls use_cop() to obtain a coprocessor PID for the task. - * The HFI driver then allocate a free HFI window and save the - * coprocessor PID to the HFI window to allow the task to use the - * HFI window. - * - * The HFI driver repeatedly creates immediate send packets and - * issues icswx instruction to send data through the HFI window. - * The HFI compares the coprocessor PID in the CPU PID register - * to the PID held in the HFI window to determine if the transaction - * is allowed. - * - * When the task to release the HFI window, the HFI driver calls - * drop_cop() to release the coprocessor PID. - */ - -void switch_cop(struct mm_struct *next) -{ -#ifdef CONFIG_PPC_ICSWX_PID - mtspr(SPRN_PID, next->context.cop_pid); -#endif - mtspr(SPRN_ACOP, next->context.acop); -} - -/** - * Start using a coprocessor. - * @acop: mask of coprocessor to be used. - * @mm: The mm the coprocessor to associate with. Most likely current mm. - * - * Return a positive PID if successful. Negative errno otherwise. - * The returned PID will be fed to the coprocessor to determine if an - * icswx transaction is authenticated. - */ -int use_cop(unsigned long acop, struct mm_struct *mm) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_ICSWX)) - return -ENODEV; - - if (!mm || !acop) - return -EINVAL; - - /* The page_table_lock ensures mm_users won't change under us */ - spin_lock(&mm->page_table_lock); - spin_lock(mm->context.cop_lockp); - - ret = get_cop_pid(mm); - if (ret < 0) - goto out; - - /* update acop */ - mm->context.acop |= acop; - - sync_cop(mm); - - /* - * If this is a threaded process then there might be other threads - * running. We need to send an IPI to force them to pick up any - * change in PID and ACOP. - */ - if (atomic_read(&mm->mm_users) > 1) - smp_call_function(sync_cop, mm, 1); - -out: - spin_unlock(mm->context.cop_lockp); - spin_unlock(&mm->page_table_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(use_cop); - -/** - * Stop using a coprocessor. - * @acop: mask of coprocessor to be stopped. - * @mm: The mm the coprocessor associated with. - */ -void drop_cop(unsigned long acop, struct mm_struct *mm) -{ - int free_pid; - - if (!cpu_has_feature(CPU_FTR_ICSWX)) - return; - - if (WARN_ON_ONCE(!mm)) - return; - - /* The page_table_lock ensures mm_users won't change under us */ - spin_lock(&mm->page_table_lock); - spin_lock(mm->context.cop_lockp); - - mm->context.acop &= ~acop; - - free_pid = disable_cop_pid(mm); - sync_cop(mm); - - /* - * If this is a threaded process then there might be other threads - * running. We need to send an IPI to force them to pick up any - * change in PID and ACOP. - */ - if (atomic_read(&mm->mm_users) > 1) - smp_call_function(sync_cop, mm, 1); - - if (free_pid != COP_PID_NONE) - free_cop_pid(free_pid); - - spin_unlock(mm->context.cop_lockp); - spin_unlock(&mm->page_table_lock); -} -EXPORT_SYMBOL_GPL(drop_cop); - -static int acop_use_cop(int ct) -{ - /* There is no alternate policy, yet */ - return -1; -} - -/* - * Get the instruction word at the NIP - */ -static u32 acop_get_inst(struct pt_regs *regs) -{ - u32 inst; - u32 __user *p; - - p = (u32 __user *)regs->nip; - if (!access_ok(VERIFY_READ, p, sizeof(*p))) - return 0; - - if (__get_user(inst, p)) - return 0; - - return inst; -} - -/** - * @regs: registers at time of interrupt - * @address: storage address - * @error_code: Fault code, usually the DSISR or ESR depending on - * processor type - * - * Return 0 if we are able to resolve the data storage fault that - * results from a CT miss in the ACOP register. - */ -int acop_handle_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - int ct; - u32 inst = 0; - - if (!cpu_has_feature(CPU_FTR_ICSWX)) { - pr_info("No coprocessors available"); - _exception(SIGILL, regs, ILL_ILLOPN, address); - } - - if (!user_mode(regs)) { - /* this could happen if the HV denies the - * kernel access, for now we just die */ - die("ICSWX from kernel failed", regs, SIGSEGV); - } - - /* Some implementations leave us a hint for the CT */ - ct = ICSWX_GET_CT_HINT(error_code); - if (ct < 0) { - /* we have to peek at the instruction word to figure out CT */ - u32 ccw; - u32 rs; - - inst = acop_get_inst(regs); - if (inst == 0) - return -1; - - rs = (inst >> (31 - 10)) & 0x1f; - ccw = regs->gpr[rs]; - ct = (ccw >> 16) & 0x3f; - } - - /* - * We could be here because another thread has enabled acop - * but the ACOP register has yet to be updated. - * - * This should have been taken care of by the IPI to sync all - * the threads (see smp_call_function(sync_cop, mm, 1)), but - * that could take forever if there are a significant amount - * of threads. - * - * Given the number of threads on some of these systems, - * perhaps this is the best way to sync ACOP rather than whack - * every thread with an IPI. - */ - if ((acop_copro_type_bit(ct) & current->active_mm->context.acop) != 0) { - sync_cop(current->active_mm); - return 0; - } - - /* check for alternate policy */ - if (!acop_use_cop(ct)) - return 0; - - /* at this point the CT is unknown to the system */ - pr_warn("%s[%d]: Coprocessor %d is unavailable\n", - current->comm, current->pid, ct); - - /* get inst if we don't already have it */ - if (inst == 0) { - inst = acop_get_inst(regs); - if (inst == 0) - return -1; - } - - /* Check if the instruction is the "record form" */ - if (inst & 1) { - /* - * the instruction is "record" form so we can reject - * using CR0 - */ - regs->ccr &= ~(0xful << 28); - regs->ccr |= ICSWX_RC_NOT_FOUND << 28; - - /* Move on to the next instruction */ - regs->nip += 4; - } else { - /* - * There is no architected mechanism to report a bad - * CT so we could either SIGILL or report nothing. - * Since the non-record version should only bu used - * for "hints" or "don't care" we should probably do - * nothing. However, I could see how some people - * might want an SIGILL so it here if you want it. - */ -#ifdef CONFIG_PPC_ICSWX_USE_SIGILL - _exception(SIGILL, regs, ILL_ILLOPN, address); -#else - regs->nip += 4; -#endif - } - - return 0; -} -EXPORT_SYMBOL_GPL(acop_handle_fault); diff --git a/arch/powerpc/mm/icswx.h b/arch/powerpc/mm/icswx.h deleted file mode 100644 index 6dedc08e62c8..000000000000 --- a/arch/powerpc/mm/icswx.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _ARCH_POWERPC_MM_ICSWX_H_ -#define _ARCH_POWERPC_MM_ICSWX_H_ - -/* - * ICSWX and ACOP Management - * - * Copyright (C) 2011 Anton Blanchard, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include - -/* also used to denote that PIDs are not used */ -#define COP_PID_NONE 0 - -static inline void sync_cop(void *arg) -{ - struct mm_struct *mm = arg; - - if (mm == current->active_mm) - switch_cop(current->active_mm); -} - -#ifdef CONFIG_PPC_ICSWX_PID -extern int get_cop_pid(struct mm_struct *mm); -extern int disable_cop_pid(struct mm_struct *mm); -extern void free_cop_pid(int free_pid); -#else -#define get_cop_pid(m) (COP_PID_NONE) -#define disable_cop_pid(m) (COP_PID_NONE) -#define free_cop_pid(p) -#endif - -/* - * These are implementation bits for architected registers. If this - * ever becomes architecture the should be moved to reg.h et. al. - */ -/* UCT is the same bit for Server and Embedded */ -#define ICSWX_DSI_UCT 0x00004000 /* Unavailable Coprocessor Type */ - -#ifdef CONFIG_PPC_BOOK3E -/* Embedded implementation gives us no hints as to what the CT is */ -#define ICSWX_GET_CT_HINT(x) (-1) -#else -/* Server implementation contains the CT value in the DSISR */ -#define ICSWX_DSISR_CTMASK 0x00003f00 -#define ICSWX_GET_CT_HINT(x) (((x) & ICSWX_DSISR_CTMASK) >> 8) -#endif - -#define ICSWX_RC_STARTED 0x8 /* The request has been started */ -#define ICSWX_RC_NOT_IDLE 0x4 /* No coprocessor found idle */ -#define ICSWX_RC_NOT_FOUND 0x2 /* No coprocessor found */ -#define ICSWX_RC_UNDEFINED 0x1 /* Reserved */ - -extern int acop_handle_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code); - -static inline u64 acop_copro_type_bit(unsigned int type) -{ - return 1ULL << (63 - type); -} - -#endif /* !_ARCH_POWERPC_MM_ICSWX_H_ */ diff --git a/arch/powerpc/mm/icswx_pid.c b/arch/powerpc/mm/icswx_pid.c deleted file mode 100644 index 91e30eb7d054..000000000000 --- a/arch/powerpc/mm/icswx_pid.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * ICSWX and ACOP/PID Management - * - * Copyright (C) 2011 Anton Blanchard, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "icswx.h" - -#define COP_PID_MIN (COP_PID_NONE + 1) -#define COP_PID_MAX (0xFFFF) - -static DEFINE_SPINLOCK(mmu_context_acop_lock); -static DEFINE_IDA(cop_ida); - -static int new_cop_pid(struct ida *ida, int min_id, int max_id, - spinlock_t *lock) -{ - int index; - int err; - -again: - if (!ida_pre_get(ida, GFP_KERNEL)) - return -ENOMEM; - - spin_lock(lock); - err = ida_get_new_above(ida, min_id, &index); - spin_unlock(lock); - - if (err == -EAGAIN) - goto again; - else if (err) - return err; - - if (index > max_id) { - spin_lock(lock); - ida_remove(ida, index); - spin_unlock(lock); - return -ENOMEM; - } - - return index; -} - -int get_cop_pid(struct mm_struct *mm) -{ - int pid; - - if (mm->context.cop_pid == COP_PID_NONE) { - pid = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX, - &mmu_context_acop_lock); - if (pid >= 0) - mm->context.cop_pid = pid; - } - return mm->context.cop_pid; -} - -int disable_cop_pid(struct mm_struct *mm) -{ - int free_pid = COP_PID_NONE; - - if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) { - free_pid = mm->context.cop_pid; - mm->context.cop_pid = COP_PID_NONE; - } - return free_pid; -} - -void free_cop_pid(int free_pid) -{ - spin_lock(&mmu_context_acop_lock); - ida_remove(&cop_ida, free_pid); - spin_unlock(&mmu_context_acop_lock); -} diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index abed1fe6992f..db9d8cb0f8ee 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -25,8 +25,6 @@ #include #include -#include "icswx.h" - static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); @@ -164,16 +162,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) return index; mm->context.id = index; -#ifdef CONFIG_PPC_ICSWX - mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL); - if (!mm->context.cop_lockp) { - __destroy_context(index); - subpage_prot_free(mm); - mm->context.id = MMU_NO_CONTEXT; - return -ENOMEM; - } - spin_lock_init(mm->context.cop_lockp); -#endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_64K_PAGES mm->context.pte_frag = NULL; @@ -225,12 +213,6 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_SPAPR_TCE_IOMMU WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); #endif -#ifdef CONFIG_PPC_ICSWX - drop_cop(mm->context.acop, mm); - kfree(mm->context.cop_lockp); - mm->context.cop_lockp = NULL; -#endif /* CONFIG_PPC_ICSWX */ - if (radix_enabled()) { /* * Radix doesn't have a valid bit in the process table diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 2f629e0551e9..9539620a48d4 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -271,44 +271,6 @@ config VSX If in doubt, say Y here. -config PPC_ICSWX - bool "Support for PowerPC icswx coprocessor instruction" - depends on PPC_BOOK3S_64 - default n - ---help--- - - This option enables kernel support for the PowerPC Initiate - Coprocessor Store Word (icswx) coprocessor instruction on POWER7 - and POWER8 processors. POWER9 uses new copy/paste instructions - to invoke the coprocessor. - - This option is only useful if you have a processor that supports - the icswx coprocessor instruction. It does not have any effect - on processors without the icswx coprocessor instruction. - - This option slightly increases kernel memory usage. - - If in doubt, say N here. - -config PPC_ICSWX_PID - bool "icswx requires direct PID management" - depends on PPC_ICSWX - default y - ---help--- - The PID register in server is used explicitly for ICSWX. In - embedded systems PID management is done by the system. - -config PPC_ICSWX_USE_SIGILL - bool "Should a bad CT cause a SIGILL?" - depends on PPC_ICSWX - default n - ---help--- - Should a bad CT used for "non-record form ICSWX" cause an - illegal instruction signal or should it be silent as - architected. - - If in doubt, say N here. - config SPE_POSSIBLE def_bool y depends on E200 || (E500 && !PPC_E500MC) -- cgit v1.2.3-58-ga151 From 2552910084a5e12e280caf082ab01468e187a064 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Fri, 4 Aug 2017 11:55:14 +0200 Subject: powerpc/powernv: Enable PCI peer-to-peer P9 has support for PCI peer-to-peer, enabling a device to write in the MMIO space of another device directly, without interrupting the CPU. This patch adds support for it on powernv, by adding a new API to be called by drivers. The pnv_pci_set_p2p(...) call configures an 'initiator', i.e the device which will issue the MMIO operation, and a 'target', i.e. the device on the receiving side. P9 really only supports MMIO stores for the time being but that's expected to change in the future, so the API allows to define both load and store operations. /* PCI p2p descriptor */ #define OPAL_PCI_P2P_ENABLE 0x1 #define OPAL_PCI_P2P_LOAD 0x2 #define OPAL_PCI_P2P_STORE 0x4 int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc) It uses a new OPAL call, as the configuration magic is done on the PHBs by skiboot. Signed-off-by: Frederic Barrat Reviewed-by: Russell Currey [mpe: Drop unrelated OPAL calls, s/uint64_t/u64/, minor formatting] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 8 ++- arch/powerpc/include/asm/opal.h | 2 + arch/powerpc/include/asm/pnv-pci.h | 2 + arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/pci-ioda.c | 3 +- arch/powerpc/platforms/powernv/pci.c | 75 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/pci.h | 5 ++ 7 files changed, 93 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 7df005965634..ced1ef21e981 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -193,7 +193,8 @@ #define OPAL_IMC_COUNTERS_INIT 149 #define OPAL_IMC_COUNTERS_START 150 #define OPAL_IMC_COUNTERS_STOP 151 -#define OPAL_LAST 151 +#define OPAL_PCI_SET_P2P 157 +#define OPAL_LAST 157 /* Device tree flags */ @@ -1094,6 +1095,11 @@ enum { }; +/* PCI p2p descriptor */ +#define OPAL_PCI_P2P_ENABLE 0x1 +#define OPAL_PCI_P2P_LOAD 0x2 +#define OPAL_PCI_P2P_STORE 0x4 + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6b8513c3ad40..5a715e66f910 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -267,6 +267,8 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id); int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, + uint64_t desc, uint16_t pe_number); int64_t opal_imc_counters_init(uint32_t type, uint64_t address, uint64_t cpu_pir); diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index de9681034353..3e5cf251ad9a 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -26,6 +26,8 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state); extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state); extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg); +extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, + u64 desc); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index b77f52ee8263..3369a6f2b2f1 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -313,3 +313,4 @@ OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); +OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 437613588df1..026a06c51458 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1408,7 +1408,6 @@ m64_failed: static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, int num); -static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) { @@ -2394,7 +2393,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, return 0; } -static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) +void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 7905d179d036..5422f4a6317c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -37,6 +37,8 @@ #include "powernv.h" #include "pci.h" +static DEFINE_MUTEX(p2p_mutex); + int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) { struct device_node *parent = np; @@ -1017,6 +1019,79 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus) } } +int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc) +{ + struct pci_controller *hose; + struct pnv_phb *phb_init, *phb_target; + struct pnv_ioda_pe *pe_init; + int rc; + + if (!opal_check_token(OPAL_PCI_SET_P2P)) + return -ENXIO; + + hose = pci_bus_to_host(initiator->bus); + phb_init = hose->private_data; + + hose = pci_bus_to_host(target->bus); + phb_target = hose->private_data; + + pe_init = pnv_ioda_get_pe(initiator); + if (!pe_init) + return -ENODEV; + + /* + * Configuring the initiator's PHB requires to adjust its + * TVE#1 setting. Since the same device can be an initiator + * several times for different target devices, we need to keep + * a reference count to know when we can restore the default + * bypass setting on its TVE#1 when disabling. Opal is not + * tracking PE states, so we add a reference count on the PE + * in linux. + * + * For the target, the configuration is per PHB, so we keep a + * target reference count on the PHB. + */ + mutex_lock(&p2p_mutex); + + if (desc & OPAL_PCI_P2P_ENABLE) { + /* always go to opal to validate the configuration */ + rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, + desc, pe_init->pe_number); + + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto out; + } + + pe_init->p2p_initiator_count++; + phb_target->p2p_target_count++; + } else { + if (!pe_init->p2p_initiator_count || + !phb_target->p2p_target_count) { + rc = -EINVAL; + goto out; + } + + if (--pe_init->p2p_initiator_count == 0) + pnv_pci_ioda2_set_bypass(pe_init, true); + + if (--phb_target->p2p_target_count == 0) { + rc = opal_pci_set_p2p(phb_init->opal_id, + phb_target->opal_id, desc, + pe_init->pe_number); + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto out; + } + } + } + rc = 0; +out: + mutex_unlock(&p2p_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_pci_set_p2p); + void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index f16bc403ec03..a95273c524f6 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -78,6 +78,9 @@ struct pnv_ioda_pe { struct pnv_ioda_pe *master; struct list_head slaves; + /* PCI peer-to-peer*/ + int p2p_initiator_count; + /* Link in list of PE#s */ struct list_head list; }; @@ -189,6 +192,7 @@ struct pnv_phb { #ifdef CONFIG_CXL_BASE struct cxl_afu *cxl_afu; #endif + int p2p_target_count; }; extern struct pci_ops pnv_pci_ops; @@ -229,6 +233,7 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern bool pnv_pci_enable_device_hook(struct pci_dev *dev); +extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...); -- cgit v1.2.3-58-ga151 From e66ca3db5917f4bcad039d3a3df9f1003797c249 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Fri, 4 Aug 2017 11:12:18 +1000 Subject: powerpc/powernv: Use darn instruction for get_random_seed() on Power9 This adds powernv_get_random_darn() which utilises the darn instruction, introduced in ISA v3.0/POWER9. The darn instruction can potentially return an error, which is supported by the get_random_seed() API, in normal usage if we see an error we just return that to the caller. However when detecting whether darn is functional at boot we try up to 10 times, before deciding that darn doesn't work and failing the registration of get_random_seed(). That way an intermittent failure at boot doesn't deprive the system of randomness until the next reboot. Signed-off-by: Matt Brown [mpe: Move init into a function, tweak change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 4 ++++ arch/powerpc/platforms/powernv/rng.c | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index fa9ebaead91e..041ba15aa2b9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -193,6 +193,7 @@ #define PPC_INST_CLRBHRB 0x7c00035c #define PPC_INST_COPY 0x7c20060c #define PPC_INST_CP_ABORT 0x7c00068c +#define PPC_INST_DARN 0x7c0005e6 #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe #define PPC_INST_DCBAL 0x7c2005ec @@ -395,6 +396,9 @@ #define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT) #define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \ ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN | \ + ___PPC_RT(t) | \ + (((l) & 0x3) << 16)) #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 1a9d84371a4d..c5ce3a8bd4c9 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -16,11 +16,13 @@ #include #include #include +#include #include #include #include #include +#define DARN_ERR 0xFFFFFFFFFFFFFFFFul struct powernv_rng { void __iomem *regs; @@ -67,6 +69,41 @@ int powernv_get_random_real_mode(unsigned long *v) return 1; } +int powernv_get_random_darn(unsigned long *v) +{ + unsigned long val; + + /* Using DARN with L=1 - 64-bit conditioned random number */ + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); + + if (val == DARN_ERR) + return 0; + + *v = val; + + return 1; +} + +static int initialise_darn(void) +{ + unsigned long val; + int i; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + + for (i = 0; i < 10; i++) { + if (powernv_get_random_darn(&val)) { + ppc_md.get_random_seed = powernv_get_random_darn; + return 0; + } + } + + pr_warn("Unable to use DARN for get_random_seed()\n"); + + return -EIO; +} + int powernv_get_random_long(unsigned long *v) { struct powernv_rng *rng; @@ -150,6 +187,8 @@ static __init int rng_init(void) of_platform_device_create(dn, NULL, NULL); } + initialise_darn(); + return 0; } machine_subsys_initcall(powernv, rng_init); -- cgit v1.2.3-58-ga151 From 63ee9b2ff9d306efaa61b04b8710fafe339ae441 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 1 Aug 2017 20:29:22 +1000 Subject: powerpc/mm/book3s64: Make KERN_IO_START a variable Currently KERN_IO_START is defined as: #define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1)) Although it looks like a constant, both the components are actually variables, to allow us to have a different value between Radix and Hash with a single kernel. However that still requires both Radix and Hash to place the kernel IO region at the same location relative to the start and end of the kernel virtual region (namely 1/2 way through it), and we'd like to change that. So split KERN_IO_START out into its own variable, and initialise it for Radix and Hash. In the medium term we should be able to reconsolidate this, by doing a more involved rearrangement of the location of the regions. Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 2 ++ arch/powerpc/include/asm/book3s/64/pgtable.h | 3 ++- arch/powerpc/include/asm/book3s/64/radix.h | 2 ++ arch/powerpc/mm/hash_utils_64.c | 1 + arch/powerpc/mm/pgtable-radix.c | 1 + arch/powerpc/mm/pgtable_64.c | 2 ++ 6 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 36fc7bfe9e11..d613653ed5b9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -51,6 +51,8 @@ #define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1) #define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) +#define H_KERN_IO_START H_VMALLOC_END + /* * Region IDs */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index d1da415e283c..18a8580d3ddc 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -272,8 +272,10 @@ extern unsigned long __vmalloc_end; extern unsigned long __kernel_virt_start; extern unsigned long __kernel_virt_size; +extern unsigned long __kernel_io_start; #define KERN_VIRT_START __kernel_virt_start #define KERN_VIRT_SIZE __kernel_virt_size +#define KERN_IO_START __kernel_io_start extern struct page *vmemmap; extern unsigned long ioremap_bot; extern unsigned long pci_io_base; @@ -298,7 +300,6 @@ extern unsigned long pci_io_base; * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE */ -#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1)) #define FULL_IO_SIZE 0x80000000ul #define ISA_IO_BASE (KERN_IO_START) #define ISA_IO_END (KERN_IO_START + 0x10000ul) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 544440b5aff3..1e5ba94e62ef 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -110,6 +110,8 @@ */ #define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) +#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) + #ifndef __ASSEMBLY__ #define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) #define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7419fc1854ad..a93137c358ea 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1019,6 +1019,7 @@ void __init hash__early_init_mmu(void) __kernel_virt_size = H_KERN_VIRT_SIZE; __vmalloc_start = H_VMALLOC_START; __vmalloc_end = H_VMALLOC_END; + __kernel_io_start = H_KERN_IO_START; vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 5d05245208ee..c1185c8ecb22 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -494,6 +494,7 @@ void __init radix__early_init_mmu(void) __kernel_virt_size = RADIX_KERN_VIRT_SIZE; __vmalloc_start = RADIX_VMALLOC_START; __vmalloc_end = RADIX_VMALLOC_END; + __kernel_io_start = RADIX_KERN_IO_START; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 0736e94c7615..ac0717a90ca6 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -104,6 +104,8 @@ unsigned long __vmalloc_start; EXPORT_SYMBOL(__vmalloc_start); unsigned long __vmalloc_end; EXPORT_SYMBOL(__vmalloc_end); +unsigned long __kernel_io_start; +EXPORT_SYMBOL(__kernel_io_start); struct page *vmemmap; EXPORT_SYMBOL(vmemmap); unsigned long __pte_frag_nr; -- cgit v1.2.3-58-ga151 From 21a0e8c14bf61472723d2acc83f98ab35ff321b4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 1 Aug 2017 20:29:24 +1000 Subject: powerpc/mm/hash64: Make vmalloc 56T on hash On 64-bit book3s, with the hash MMU, we currently define the kernel virtual space (vmalloc, ioremap etc.), to be 16T in size. This is a leftover from pre v3.7 when our user VM was also 16T. Of that 16T we split it 50/50, with half used for PCI IO and ioremap and the other 8T for vmalloc. We never bothered to make it any bigger because 8T of vmalloc ought to be enough for anybody. But it turns out that's not true, the per cpu allocator wants large amounts of vmalloc space, not to make large allocations, but to allow a large stride between allocations, because we use pcpu_embed_first_chunk(). With a bit of juggling we can increase the entire kernel virtual space to 64T. The only real complication is the check of the address in the SLB miss handler, see the comment in the code. Although we could continue to split virtual space 50/50 as we do now, no one seems to be running out of PCI IO or ioremap space. So instead keep that as 8T, and use the remaining 56T for vmalloc. In future we should be able to increase the kernel virtual space to 512T, the code already supports that, it just needs testing on older hardware. Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/hash.h | 4 ++-- arch/powerpc/mm/slb_low.S | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index d613653ed5b9..f88452019114 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -40,7 +40,7 @@ * Define the address range of the kernel non-linear virtual area */ #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) -#define H_KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) +#define H_KERN_VIRT_SIZE ASM_CONST(0x0000400000000000) /* 64T */ /* * The vmalloc space starts at the beginning of that region, and @@ -48,7 +48,7 @@ * (we keep a quarter for the virtual memmap) */ #define H_VMALLOC_START H_KERN_VIRT_START -#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1) +#define H_VMALLOC_SIZE ASM_CONST(0x380000000000) /* 56T */ #define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) #define H_KERN_IO_START H_VMALLOC_END diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 2eb1b92a68ff..906a86fe457b 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -121,9 +121,21 @@ slb_miss_kernel_load_vmemmap: 1: #endif /* CONFIG_SPARSEMEM_VMEMMAP */ - clrldi r11,r10,48 - cmpldi r11,(H_VMALLOC_SIZE >> 28) - 1 - bgt 5f + /* + * r10 contains the ESID, which is the original faulting EA shifted + * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28) + * which is 0xd00038000. That can't be used as an immediate, even if we + * ignored the 0xd, so we have to load it into a register, and we only + * have one register free. So we must load all of (H_VMALLOC_END >> 28) + * into a register and compare ESID against that. + */ + lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000 + ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800 + // Rotate left 4, then mask with 0xffffffff0 + rldic r11,r11,4,28 // r11 = 0xd00038000 + cmpld r10,r11 // if r10 >= r11 + bge 5f // goto io_mapping + /* * vmalloc mapping gets the encoding from the PACA as the mapping * can be demoted from 64K -> 4K dynamically on some machines. -- cgit v1.2.3-58-ga151 From 8a583c0a8d316d8ea52ea78491174ab1a3e9ef9d Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sat, 5 Aug 2017 19:55:11 +0200 Subject: powerpc: Fix invalid use of register expressions binutils >= 2.26 now warns about misuse of register expressions in assembler operands that are actually literals, for example: arch/powerpc/kernel/entry_64.S:535: Warning: invalid register expression In practice these are almost all uses of r0 that should just be a literal 0. Signed-off-by: Andreas Schwab [mpe: Mention r0 is almost always the culprit, fold in purgatory change] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 2 +- arch/powerpc/kernel/swsusp_asm64.S | 2 +- arch/powerpc/lib/copypage_power7.S | 14 ++++---- arch/powerpc/lib/copyuser_power7.S | 66 ++++++++++++++++++------------------- arch/powerpc/lib/memcpy_power7.S | 66 ++++++++++++++++++------------------- arch/powerpc/lib/string_64.S | 2 +- arch/powerpc/purgatory/trampoline.S | 2 +- 7 files changed, 77 insertions(+), 77 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 6baeeb9acd0d..daeda2bbe12a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -439,7 +439,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) .machine push ; \ .machine "power4" ; \ lis scratch,0x60000000@h; \ - dcbt r0,scratch,0b01010; \ + dcbt 0,scratch,0b01010; \ .machine pop /* diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 988f38dced0f..82d8aae81c6a 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -179,7 +179,7 @@ nothing_to_copy: sld r3, r3, r0 li r0, 0 1: - dcbf r0,r3 + dcbf 0,r3 addi r3,r3,0x20 bdnz 1b diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index a84d333ecb09..ca5fc8fa7efc 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -45,13 +45,13 @@ _GLOBAL(copypage_power7) .machine push .machine "power4" /* setup read stream 0 */ - dcbt r0,r4,0b01000 /* addr from */ - dcbt r0,r7,0b01010 /* length and depth from */ + dcbt 0,r4,0b01000 /* addr from */ + dcbt 0,r7,0b01010 /* length and depth from */ /* setup write stream 1 */ - dcbtst r0,r9,0b01000 /* addr to */ - dcbtst r0,r10,0b01010 /* length and depth to */ + dcbtst 0,r9,0b01000 /* addr to */ + dcbtst 0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* all streams GO */ + dcbt 0,r8,0b01010 /* all streams GO */ .machine pop #ifdef CONFIG_ALTIVEC @@ -83,7 +83,7 @@ _GLOBAL(copypage_power7) li r12,112 .align 5 -1: lvx v7,r0,r4 +1: lvx v7,0,r4 lvx v6,r4,r6 lvx v5,r4,r7 lvx v4,r4,r8 @@ -92,7 +92,7 @@ _GLOBAL(copypage_power7) lvx v1,r4,r11 lvx v0,r4,r12 addi r4,r4,128 - stvx v7,r0,r3 + stvx v7,0,r3 stvx v6,r3,r6 stvx v5,r3,r7 stvx v4,r3,r8 diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 706b7cc19846..d416a4a66578 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -315,13 +315,13 @@ err1; stb r0,0(r3) .machine push .machine "power4" /* setup read stream 0 */ - dcbt r0,r6,0b01000 /* addr from */ - dcbt r0,r7,0b01010 /* length and depth from */ + dcbt 0,r6,0b01000 /* addr from */ + dcbt 0,r7,0b01010 /* length and depth from */ /* setup write stream 1 */ - dcbtst r0,r9,0b01000 /* addr to */ - dcbtst r0,r10,0b01010 /* length and depth to */ + dcbtst 0,r9,0b01000 /* addr to */ + dcbtst 0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* all streams GO */ + dcbt 0,r8,0b01010 /* all streams GO */ .machine pop beq cr1,.Lunwind_stack_nonvmx_copy @@ -376,26 +376,26 @@ err3; std r0,0(r3) li r11,48 bf cr7*4+3,5f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 addi r4,r4,16 -err3; stvx v1,r0,r3 +err3; stvx v1,0,r3 addi r3,r3,16 5: bf cr7*4+2,6f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 err3; lvx v0,r4,r9 addi r4,r4,32 -err3; stvx v1,r0,r3 +err3; stvx v1,0,r3 err3; stvx v0,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f -err3; lvx v3,r0,r4 +err3; lvx v3,0,r4 err3; lvx v2,r4,r9 err3; lvx v1,r4,r10 err3; lvx v0,r4,r11 addi r4,r4,64 -err3; stvx v3,r0,r3 +err3; stvx v3,0,r3 err3; stvx v2,r3,r9 err3; stvx v1,r3,r10 err3; stvx v0,r3,r11 @@ -421,7 +421,7 @@ err3; stvx v0,r3,r11 */ .align 5 8: -err4; lvx v7,r0,r4 +err4; lvx v7,0,r4 err4; lvx v6,r4,r9 err4; lvx v5,r4,r10 err4; lvx v4,r4,r11 @@ -430,7 +430,7 @@ err4; lvx v2,r4,r14 err4; lvx v1,r4,r15 err4; lvx v0,r4,r16 addi r4,r4,128 -err4; stvx v7,r0,r3 +err4; stvx v7,0,r3 err4; stvx v6,r3,r9 err4; stvx v5,r3,r10 err4; stvx v4,r3,r11 @@ -451,29 +451,29 @@ err4; stvx v0,r3,r16 mtocrf 0x01,r6 bf cr7*4+1,9f -err3; lvx v3,r0,r4 +err3; lvx v3,0,r4 err3; lvx v2,r4,r9 err3; lvx v1,r4,r10 err3; lvx v0,r4,r11 addi r4,r4,64 -err3; stvx v3,r0,r3 +err3; stvx v3,0,r3 err3; stvx v2,r3,r9 err3; stvx v1,r3,r10 err3; stvx v0,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 err3; lvx v0,r4,r9 addi r4,r4,32 -err3; stvx v1,r0,r3 +err3; stvx v1,0,r3 err3; stvx v0,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 addi r4,r4,16 -err3; stvx v1,r0,r3 +err3; stvx v1,0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -553,25 +553,25 @@ err3; lvx v0,0,r4 addi r4,r4,16 bf cr7*4+3,5f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 VPERM(v8,v0,v1,v16) addi r4,r4,16 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 addi r3,r3,16 vor v0,v1,v1 5: bf cr7*4+2,6f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 VPERM(v8,v0,v1,v16) err3; lvx v0,r4,r9 VPERM(v9,v1,v0,v16) addi r4,r4,32 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 err3; stvx v9,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f -err3; lvx v3,r0,r4 +err3; lvx v3,0,r4 VPERM(v8,v0,v3,v16) err3; lvx v2,r4,r9 VPERM(v9,v3,v2,v16) @@ -580,7 +580,7 @@ err3; lvx v1,r4,r10 err3; lvx v0,r4,r11 VPERM(v11,v1,v0,v16) addi r4,r4,64 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 err3; stvx v9,r3,r9 err3; stvx v10,r3,r10 err3; stvx v11,r3,r11 @@ -606,7 +606,7 @@ err3; stvx v11,r3,r11 */ .align 5 8: -err4; lvx v7,r0,r4 +err4; lvx v7,0,r4 VPERM(v8,v0,v7,v16) err4; lvx v6,r4,r9 VPERM(v9,v7,v6,v16) @@ -623,7 +623,7 @@ err4; lvx v1,r4,r15 err4; lvx v0,r4,r16 VPERM(v15,v1,v0,v16) addi r4,r4,128 -err4; stvx v8,r0,r3 +err4; stvx v8,0,r3 err4; stvx v9,r3,r9 err4; stvx v10,r3,r10 err4; stvx v11,r3,r11 @@ -644,7 +644,7 @@ err4; stvx v15,r3,r16 mtocrf 0x01,r6 bf cr7*4+1,9f -err3; lvx v3,r0,r4 +err3; lvx v3,0,r4 VPERM(v8,v0,v3,v16) err3; lvx v2,r4,r9 VPERM(v9,v3,v2,v16) @@ -653,27 +653,27 @@ err3; lvx v1,r4,r10 err3; lvx v0,r4,r11 VPERM(v11,v1,v0,v16) addi r4,r4,64 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 err3; stvx v9,r3,r9 err3; stvx v10,r3,r10 err3; stvx v11,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 VPERM(v8,v0,v1,v16) err3; lvx v0,r4,r9 VPERM(v9,v1,v0,v16) addi r4,r4,32 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 err3; stvx v9,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 VPERM(v8,v0,v1,v16) addi r4,r4,16 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 addi r3,r3,16 /* Up to 15B to go */ diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 786234fd4e91..193909abd18b 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -261,12 +261,12 @@ _GLOBAL(memcpy_power7) .machine push .machine "power4" - dcbt r0,r6,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + dcbt 0,r6,0b01000 + dcbt 0,r7,0b01010 + dcbtst 0,r9,0b01000 + dcbtst 0,r10,0b01010 eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt 0,r8,0b01010 /* GO */ .machine pop beq cr1,.Lunwind_stack_nonvmx_copy @@ -321,26 +321,26 @@ _GLOBAL(memcpy_power7) li r11,48 bf cr7*4+3,5f - lvx v1,r0,r4 + lvx v1,0,r4 addi r4,r4,16 - stvx v1,r0,r3 + stvx v1,0,r3 addi r3,r3,16 5: bf cr7*4+2,6f - lvx v1,r0,r4 + lvx v1,0,r4 lvx v0,r4,r9 addi r4,r4,32 - stvx v1,r0,r3 + stvx v1,0,r3 stvx v0,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f - lvx v3,r0,r4 + lvx v3,0,r4 lvx v2,r4,r9 lvx v1,r4,r10 lvx v0,r4,r11 addi r4,r4,64 - stvx v3,r0,r3 + stvx v3,0,r3 stvx v2,r3,r9 stvx v1,r3,r10 stvx v0,r3,r11 @@ -366,7 +366,7 @@ _GLOBAL(memcpy_power7) */ .align 5 8: - lvx v7,r0,r4 + lvx v7,0,r4 lvx v6,r4,r9 lvx v5,r4,r10 lvx v4,r4,r11 @@ -375,7 +375,7 @@ _GLOBAL(memcpy_power7) lvx v1,r4,r15 lvx v0,r4,r16 addi r4,r4,128 - stvx v7,r0,r3 + stvx v7,0,r3 stvx v6,r3,r9 stvx v5,r3,r10 stvx v4,r3,r11 @@ -396,29 +396,29 @@ _GLOBAL(memcpy_power7) mtocrf 0x01,r6 bf cr7*4+1,9f - lvx v3,r0,r4 + lvx v3,0,r4 lvx v2,r4,r9 lvx v1,r4,r10 lvx v0,r4,r11 addi r4,r4,64 - stvx v3,r0,r3 + stvx v3,0,r3 stvx v2,r3,r9 stvx v1,r3,r10 stvx v0,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f - lvx v1,r0,r4 + lvx v1,0,r4 lvx v0,r4,r9 addi r4,r4,32 - stvx v1,r0,r3 + stvx v1,0,r3 stvx v0,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f - lvx v1,r0,r4 + lvx v1,0,r4 addi r4,r4,16 - stvx v1,r0,r3 + stvx v1,0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -499,25 +499,25 @@ _GLOBAL(memcpy_power7) addi r4,r4,16 bf cr7*4+3,5f - lvx v1,r0,r4 + lvx v1,0,r4 VPERM(v8,v0,v1,v16) addi r4,r4,16 - stvx v8,r0,r3 + stvx v8,0,r3 addi r3,r3,16 vor v0,v1,v1 5: bf cr7*4+2,6f - lvx v1,r0,r4 + lvx v1,0,r4 VPERM(v8,v0,v1,v16) lvx v0,r4,r9 VPERM(v9,v1,v0,v16) addi r4,r4,32 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f - lvx v3,r0,r4 + lvx v3,0,r4 VPERM(v8,v0,v3,v16) lvx v2,r4,r9 VPERM(v9,v3,v2,v16) @@ -526,7 +526,7 @@ _GLOBAL(memcpy_power7) lvx v0,r4,r11 VPERM(v11,v1,v0,v16) addi r4,r4,64 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 stvx v10,r3,r10 stvx v11,r3,r11 @@ -552,7 +552,7 @@ _GLOBAL(memcpy_power7) */ .align 5 8: - lvx v7,r0,r4 + lvx v7,0,r4 VPERM(v8,v0,v7,v16) lvx v6,r4,r9 VPERM(v9,v7,v6,v16) @@ -569,7 +569,7 @@ _GLOBAL(memcpy_power7) lvx v0,r4,r16 VPERM(v15,v1,v0,v16) addi r4,r4,128 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 stvx v10,r3,r10 stvx v11,r3,r11 @@ -590,7 +590,7 @@ _GLOBAL(memcpy_power7) mtocrf 0x01,r6 bf cr7*4+1,9f - lvx v3,r0,r4 + lvx v3,0,r4 VPERM(v8,v0,v3,v16) lvx v2,r4,r9 VPERM(v9,v3,v2,v16) @@ -599,27 +599,27 @@ _GLOBAL(memcpy_power7) lvx v0,r4,r11 VPERM(v11,v1,v0,v16) addi r4,r4,64 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 stvx v10,r3,r10 stvx v11,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f - lvx v1,r0,r4 + lvx v1,0,r4 VPERM(v8,v0,v1,v16) lvx v0,r4,r9 VPERM(v9,v1,v0,v16) addi r4,r4,32 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f - lvx v1,r0,r4 + lvx v1,0,r4 VPERM(v8,v0,v1,v16) addi r4,r4,16 - stvx v8,r0,r3 + stvx v8,0,r3 addi r3,r3,16 /* Up to 15B to go */ diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index d5b4d9498c54..56aac4c22025 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -184,7 +184,7 @@ err1; std r0,8(r3) mtctr r6 mr r8,r3 14: -err1; dcbz r0,r3 +err1; dcbz 0,r3 add r3,r3,r9 bdnz 14b diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index 3696ea6c4826..30277446892c 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -67,7 +67,7 @@ master: mr %r16,%r3 /* save dt address in reg16 */ li %r4,20 LWZX_BE %r6,%r3,%r4 /* fetch __be32 version number at byte 20 */ - cmpwi %r0,%r6,2 /* v2 or later? */ + cmpwi %cr0,%r6,2 /* v2 or later? */ blt 1f li %r4,28 STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */ -- cgit v1.2.3-58-ga151 From ca41ad4377072e3e51593a2a9593c6cdbf4a5c0d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Aug 2017 22:00:53 +1000 Subject: powerpc: Add irq accounting for system reset interrupts Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hardirq.h | 1 + arch/powerpc/kernel/irq.c | 6 ++++++ arch/powerpc/kernel/traps.c | 2 ++ 3 files changed, 9 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 8add8b861e8d..64b73b03d473 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -12,6 +12,7 @@ typedef struct { unsigned int mce_exceptions; unsigned int spurious_irqs; unsigned int hmi_exceptions; + unsigned int sreset_irqs; #ifdef CONFIG_PPC_DOORBELL unsigned int doorbell_irqs; #endif diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0bcec745a672..5c18335580b6 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -470,6 +470,11 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } + seq_printf(p, "%*s: ", prec, "NMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_printf(p, " System Reset interrupts\n"); + #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { seq_printf(p, "%*s: ", prec, "DBL"); @@ -494,6 +499,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; sum += per_cpu(irq_stat, cpu).hmi_exceptions; + sum += per_cpu(irq_stat, cpu).sreset_irqs; #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d42c567e3c29..ec1557f1b157 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -288,6 +288,8 @@ void system_reset_exception(struct pt_regs *regs) if (!nested) nmi_enter(); + __this_cpu_inc(irq_stat.sreset_irqs); + /* See if any machine dependent calls */ if (ppc_md.system_reset_exception) { if (ppc_md.system_reset_exception(regs)) -- cgit v1.2.3-58-ga151 From 04019bf8ebb3c8cd66d75046054aeb264ba2db54 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Aug 2017 22:00:54 +1000 Subject: powerpc: Add irq accounting for watchdog interrupts This adds an irq counter for the watchdog soft-NMI. This interrupt only fires when interrupts are soft-disabled, so it will not increment much even when the watchdog is running. However it's useful for debugging and sanity checking. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hardirq.h | 3 +++ arch/powerpc/kernel/irq.c | 10 ++++++++++ arch/powerpc/kernel/watchdog.c | 3 +++ 3 files changed, 16 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 64b73b03d473..c97603d617e3 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -13,6 +13,9 @@ typedef struct { unsigned int spurious_irqs; unsigned int hmi_exceptions; unsigned int sreset_irqs; +#ifdef CONFIG_PPC_WATCHDOG + unsigned int soft_nmi_irqs; +#endif #ifdef CONFIG_PPC_DOORBELL unsigned int doorbell_irqs; #endif diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5c18335580b6..77a7f7514327 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -475,6 +475,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); seq_printf(p, " System Reset interrupts\n"); +#ifdef CONFIG_PPC_WATCHDOG + seq_printf(p, "%*s: ", prec, "WDG"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_printf(p, " Watchdog soft-NMI interrupts\n"); +#endif + #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { seq_printf(p, "%*s: ", prec, "DBL"); @@ -500,6 +507,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).timer_irqs_others; sum += per_cpu(irq_stat, cpu).hmi_exceptions; sum += per_cpu(irq_stat, cpu).sreset_irqs; +#ifdef CONFIG_PPC_WATCHDOG + sum += per_cpu(irq_stat, cpu).soft_nmi_irqs; +#endif #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index b67f8b03a32d..4b9a567c9975 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -204,6 +204,9 @@ void soft_nmi_interrupt(struct pt_regs *regs) return; nmi_enter(); + + __this_cpu_inc(irq_stat.soft_nmi_irqs); + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { per_cpu(wd_timer_tb, cpu) = tb; -- cgit v1.2.3-58-ga151 From cb8b340de21e1c57e1c6d4f26ccc4af46a3ed559 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 10 Aug 2017 09:01:18 +0530 Subject: powerpc/powernv: Add support for powercap framework Adds a generic powercap framework to change the system powercap inband through OPAL-OCC command/response interface. Signed-off-by: Shilpasri G Bhat Signed-off-by: Michael Ellerman --- .../ABI/testing/sysfs-firmware-opal-powercap | 31 +++ arch/powerpc/include/asm/opal-api.h | 3 + arch/powerpc/include/asm/opal.h | 5 + arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-powercap.c | 244 +++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 2 + arch/powerpc/platforms/powernv/opal.c | 4 + 7 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-firmware-opal-powercap create mode 100644 arch/powerpc/platforms/powernv/opal-powercap.c (limited to 'arch/powerpc/include') diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-powercap b/Documentation/ABI/testing/sysfs-firmware-opal-powercap new file mode 100644 index 000000000000..c9b66ec4f165 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-opal-powercap @@ -0,0 +1,31 @@ +What: /sys/firmware/opal/powercap +Date: August 2017 +Contact: Linux for PowerPC mailing list +Description: Powercap directory for Powernv (P8, P9) servers + + Each folder in this directory contains a + power-cappable component. + +What: /sys/firmware/opal/powercap/system-powercap + /sys/firmware/opal/powercap/system-powercap/powercap-min + /sys/firmware/opal/powercap/system-powercap/powercap-max + /sys/firmware/opal/powercap/system-powercap/powercap-current +Date: August 2017 +Contact: Linux for PowerPC mailing list +Description: System powercap directory and attributes applicable for + Powernv (P8, P9) servers + + This directory provides powercap information. It + contains below sysfs attributes: + + - powercap-min : This file provides the minimum + possible powercap in Watt units + + - powercap-max : This file provides the maximum + possible powercap in Watt units + + - powercap-current : This file provides the current + powercap set on the system. Writing to this file + creates a request for setting a new-powercap. The + powercap requested must be between powercap-min + and powercap-max. diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index ced1ef21e981..b87305b22746 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -42,6 +42,7 @@ #define OPAL_I2C_STOP_ERR -24 #define OPAL_XIVE_PROVISIONING -31 #define OPAL_XIVE_FREE_ACTIVE -32 +#define OPAL_TIMEOUT -33 /* API Tokens (in r0) */ #define OPAL_INVALID_CALL -1 @@ -193,6 +194,8 @@ #define OPAL_IMC_COUNTERS_INIT 149 #define OPAL_IMC_COUNTERS_START 150 #define OPAL_IMC_COUNTERS_STOP 151 +#define OPAL_GET_POWERCAP 152 +#define OPAL_SET_POWERCAP 153 #define OPAL_PCI_SET_P2P 157 #define OPAL_LAST 157 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 5a715e66f910..6f09ab74aa7b 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -275,6 +275,9 @@ int64_t opal_imc_counters_init(uint32_t type, uint64_t address, int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir); int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir); +int opal_get_powercap(u32 handle, int token, u32 *pcap); +int opal_set_powercap(u32 handle, int token, u32 pcap); + /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); @@ -352,6 +355,8 @@ static inline int opal_get_async_rc(struct opal_msg msg) void opal_wake_poller(void); +void opal_powercap_init(void); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index a0d4353a17c9..f9ec36d0e684 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o -obj-y += opal-kmsg.o +obj-y += opal-kmsg.o opal-powercap.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c new file mode 100644 index 000000000000..badb29bde93f --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -0,0 +1,244 @@ +/* + * PowerNV OPAL Powercap interface + * + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "opal-powercap: " fmt + +#include +#include +#include + +#include + +DEFINE_MUTEX(powercap_mutex); + +static struct kobject *powercap_kobj; + +struct powercap_attr { + u32 handle; + struct kobj_attribute attr; +}; + +static struct pcap { + struct attribute_group pg; + struct powercap_attr *pattrs; +} *pcaps; + +static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct powercap_attr *pcap_attr = container_of(attr, + struct powercap_attr, attr); + struct opal_msg msg; + u32 pcap; + int ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&powercap_mutex); + if (ret) + goto out_token; + + ret = opal_get_powercap(pcap_attr->handle, token, (u32 *)__pa(&pcap)); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) { + ret = sprintf(buf, "%u\n", be32_to_cpu(pcap)); + if (ret < 0) + ret = -EIO; + } + break; + case OPAL_SUCCESS: + ret = sprintf(buf, "%u\n", be32_to_cpu(pcap)); + if (ret < 0) + ret = -EIO; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&powercap_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static ssize_t powercap_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, + size_t count) +{ + struct powercap_attr *pcap_attr = container_of(attr, + struct powercap_attr, attr); + struct opal_msg msg; + u32 pcap; + int ret, token; + + ret = kstrtoint(buf, 0, &pcap); + if (ret) + return ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&powercap_mutex); + if (ret) + goto out_token; + + ret = opal_set_powercap(pcap_attr->handle, token, pcap); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&powercap_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static void powercap_add_attr(int handle, const char *name, + struct powercap_attr *attr) +{ + attr->handle = handle; + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = powercap_show; +} + +void __init opal_powercap_init(void) +{ + struct device_node *powercap, *node; + int i = 0; + + powercap = of_find_compatible_node(NULL, NULL, "ibm,opal-powercap"); + if (!powercap) { + pr_devel("Powercap node not found\n"); + return; + } + + pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps), + GFP_KERNEL); + if (!pcaps) + return; + + powercap_kobj = kobject_create_and_add("powercap", opal_kobj); + if (!powercap_kobj) { + pr_warn("Failed to create powercap kobject\n"); + goto out_pcaps; + } + + i = 0; + for_each_child_of_node(powercap, node) { + u32 cur, min, max; + int j = 0; + bool has_cur = false, has_min = false, has_max = false; + + if (!of_property_read_u32(node, "powercap-min", &min)) { + j++; + has_min = true; + } + + if (!of_property_read_u32(node, "powercap-max", &max)) { + j++; + has_max = true; + } + + if (!of_property_read_u32(node, "powercap-current", &cur)) { + j++; + has_cur = true; + } + + pcaps[i].pattrs = kcalloc(j, sizeof(struct powercap_attr), + GFP_KERNEL); + if (!pcaps[i].pattrs) + goto out_pcaps_pattrs; + + pcaps[i].pg.attrs = kcalloc(j + 1, sizeof(struct attribute *), + GFP_KERNEL); + if (!pcaps[i].pg.attrs) { + kfree(pcaps[i].pattrs); + goto out_pcaps_pattrs; + } + + j = 0; + pcaps[i].pg.name = node->name; + if (has_min) { + powercap_add_attr(min, "powercap-min", + &pcaps[i].pattrs[j]); + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (has_max) { + powercap_add_attr(max, "powercap-max", + &pcaps[i].pattrs[j]); + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (has_cur) { + powercap_add_attr(cur, "powercap-current", + &pcaps[i].pattrs[j]); + pcaps[i].pattrs[j].attr.attr.mode |= 0220; + pcaps[i].pattrs[j].attr.store = powercap_store; + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (sysfs_create_group(powercap_kobj, &pcaps[i].pg)) { + pr_warn("Failed to create powercap attribute group %s\n", + pcaps[i].pg.name); + goto out_pcaps_pattrs; + } + i++; + } + + return; + +out_pcaps_pattrs: + while (--i >= 0) { + kfree(pcaps[i].pattrs); + kfree(pcaps[i].pg.attrs); + } + kobject_put(powercap_kobj); +out_pcaps: + kfree(pcaps); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3369a6f2b2f1..139286c98808 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -314,3 +314,5 @@ OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P); +OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); +OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 946158e6fc1d..28323c75e672 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -850,6 +850,9 @@ static int __init opal_init(void) /* Initialise OPAL kmsg dumper for flushing console on panic */ opal_kmsg_init(); + /* Initialise OPAL powercap interface */ + opal_powercap_init(); + return 0; } machine_subsys_initcall(powernv, opal_init); @@ -966,6 +969,7 @@ int opal_error_code(int rc) case OPAL_UNSUPPORTED: return -EIO; case OPAL_HARDWARE: return -EIO; case OPAL_INTERNAL_ERROR: return -EIO; + case OPAL_TIMEOUT: return -ETIMEDOUT; default: pr_err("%s: unexpected OPAL error %d\n", __func__, rc); return -EIO; -- cgit v1.2.3-58-ga151 From 8e84b2d1f0f6a00b6476790f7bce6dcbffe91980 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 10 Aug 2017 09:01:19 +0530 Subject: powerpc/powernv: Add support to set power-shifting-ratio This patch adds support to set power-shifting-ratio which hints the firmware how to distribute/throttle power between different entities in a system (e.g CPU v/s GPU). This ratio is used by OCC for power capping algorithm. Signed-off-by: Shilpasri G Bhat Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/sysfs-firmware-opal-psr | 18 +++ arch/powerpc/include/asm/opal-api.h | 2 + arch/powerpc/include/asm/opal.h | 3 + arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-psr.c | 175 ++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 2 + arch/powerpc/platforms/powernv/opal.c | 3 + 7 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-firmware-opal-psr create mode 100644 arch/powerpc/platforms/powernv/opal-psr.c (limited to 'arch/powerpc/include') diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-psr b/Documentation/ABI/testing/sysfs-firmware-opal-psr new file mode 100644 index 000000000000..cc2ece70e365 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-opal-psr @@ -0,0 +1,18 @@ +What: /sys/firmware/opal/psr +Date: August 2017 +Contact: Linux for PowerPC mailing list +Description: Power-Shift-Ratio directory for Powernv P9 servers + + Power-Shift-Ratio allows to provide hints the firmware + to shift/throttle power between different entities in + the system. Each attribute in this directory indicates + a settable PSR. + +What: /sys/firmware/opal/psr/cpu_to_gpu_X +Date: August 2017 +Contact: Linux for PowerPC mailing list +Description: PSR sysfs attributes for Powernv P9 servers + + Power-Shift-Ratio between CPU and GPU for a given chip + with chip-id X. This file gives the ratio (0-100) + which is used by OCC for power-capping. diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index b87305b22746..0cb7d110c0b4 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -196,6 +196,8 @@ #define OPAL_IMC_COUNTERS_STOP 151 #define OPAL_GET_POWERCAP 152 #define OPAL_SET_POWERCAP 153 +#define OPAL_GET_POWER_SHIFT_RATIO 154 +#define OPAL_SET_POWER_SHIFT_RATIO 155 #define OPAL_PCI_SET_P2P 157 #define OPAL_LAST 157 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6f09ab74aa7b..d87ffcb16b61 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -277,6 +277,8 @@ int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir); int opal_get_powercap(u32 handle, int token, u32 *pcap); int opal_set_powercap(u32 handle, int token, u32 pcap); +int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr); +int opal_set_power_shift_ratio(u32 handle, int token, u32 psr); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, @@ -356,6 +358,7 @@ static inline int opal_get_async_rc(struct opal_msg msg) void opal_wake_poller(void); void opal_powercap_init(void); +void opal_psr_init(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index f9ec36d0e684..674ed1e8160a 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o -obj-y += opal-kmsg.o opal-powercap.o +obj-y += opal-kmsg.o opal-powercap.o opal-psr.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c new file mode 100644 index 000000000000..7313b7fc9071 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-psr.c @@ -0,0 +1,175 @@ +/* + * PowerNV OPAL Power-Shift-Ratio interface + * + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "opal-psr: " fmt + +#include +#include +#include + +#include + +DEFINE_MUTEX(psr_mutex); + +static struct kobject *psr_kobj; + +struct psr_attr { + u32 handle; + struct kobj_attribute attr; +} *psr_attrs; + +static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr); + struct opal_msg msg; + int psr, ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&psr_mutex); + if (ret) + goto out_token; + + ret = opal_get_power_shift_ratio(psr_attr->handle, token, + (u32 *)__pa(&psr)); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) { + ret = sprintf(buf, "%u\n", be32_to_cpu(psr)); + if (ret < 0) + ret = -EIO; + } + break; + case OPAL_SUCCESS: + ret = sprintf(buf, "%u\n", be32_to_cpu(psr)); + if (ret < 0) + ret = -EIO; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&psr_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static ssize_t psr_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr); + struct opal_msg msg; + int psr, ret, token; + + ret = kstrtoint(buf, 0, &psr); + if (ret) + return ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&psr_mutex); + if (ret) + goto out_token; + + ret = opal_set_power_shift_ratio(psr_attr->handle, token, psr); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&psr_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +void __init opal_psr_init(void) +{ + struct device_node *psr, *node; + int i = 0; + + psr = of_find_compatible_node(NULL, NULL, + "ibm,opal-power-shift-ratio"); + if (!psr) { + pr_devel("Power-shift-ratio node not found\n"); + return; + } + + psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr), + GFP_KERNEL); + if (!psr_attrs) + return; + + psr_kobj = kobject_create_and_add("psr", opal_kobj); + if (!psr_kobj) { + pr_warn("Failed to create psr kobject\n"); + goto out; + } + + for_each_child_of_node(psr, node) { + if (of_property_read_u32(node, "handle", + &psr_attrs[i].handle)) + goto out_kobj; + + sysfs_attr_init(&psr_attrs[i].attr.attr); + if (of_property_read_string(node, "label", + &psr_attrs[i].attr.attr.name)) + goto out_kobj; + psr_attrs[i].attr.attr.mode = 0664; + psr_attrs[i].attr.show = psr_show; + psr_attrs[i].attr.store = psr_store; + if (sysfs_create_file(psr_kobj, &psr_attrs[i].attr.attr)) { + pr_devel("Failed to create psr sysfs file %s\n", + psr_attrs[i].attr.attr.name); + goto out_kobj; + } + i++; + } + + return; +out_kobj: + kobject_put(psr_kobj); +out: + kfree(psr_attrs); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 139286c98808..09cc6ec92af7 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -316,3 +316,5 @@ OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P); OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); +OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); +OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 28323c75e672..f659edbc5056 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -853,6 +853,9 @@ static int __init opal_init(void) /* Initialise OPAL powercap interface */ opal_powercap_init(); + /* Initialise OPAL Power-Shifting-Ratio interface */ + opal_psr_init(); + return 0; } machine_subsys_initcall(powernv, opal_init); -- cgit v1.2.3-58-ga151 From bf9571550f529335caa59f41827d180908759916 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 10 Aug 2017 09:01:20 +0530 Subject: powerpc/powernv: Add support to clear sensor groups data Adds support for clearing different sensor groups. OCC inband sensor groups like CSM, Profiler, Job Scheduler can be cleared using this driver. The min/max of all sensors belonging to these sensor groups will be cleared. Signed-off-by: Shilpasri G Bhat Signed-off-by: Michael Ellerman --- .../bindings/powerpc/opal/sensor-groups.txt | 27 +++ arch/powerpc/include/asm/opal-api.h | 1 + arch/powerpc/include/asm/opal.h | 2 + arch/powerpc/platforms/powernv/Makefile | 2 +- .../powerpc/platforms/powernv/opal-sensor-groups.c | 212 +++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/opal.c | 3 + 7 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/powerpc/opal/sensor-groups.txt create mode 100644 arch/powerpc/platforms/powernv/opal-sensor-groups.c (limited to 'arch/powerpc/include') diff --git a/Documentation/devicetree/bindings/powerpc/opal/sensor-groups.txt b/Documentation/devicetree/bindings/powerpc/opal/sensor-groups.txt new file mode 100644 index 000000000000..6ad881cbffda --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/opal/sensor-groups.txt @@ -0,0 +1,27 @@ +IBM OPAL Sensor Groups Binding +------------------------------- + +Node: /ibm,opal/sensor-groups + +Description: Contains sensor groups available in the Powernv P9 +servers. Each child node indicates a sensor group. + +- compatible : Should be "ibm,opal-sensor-group" + +Each child node contains below properties: + +- type : String to indicate the type of sensor-group + +- sensor-group-id: Abstract unique identifier provided by firmware of + type which is used for sensor-group + operations like clearing the min/max history of all + sensors belonging to the group. + +- ibm,chip-id : Chip ID + +- sensors : Phandle array of child nodes of /ibm,opal/sensor/ + belonging to this group + +- ops : Array of opal-call numbers indicating available operations on + sensor groups like clearing min/max, enabling/disabling sensor + group. diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0cb7d110c0b4..450a60b81d2a 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -198,6 +198,7 @@ #define OPAL_SET_POWERCAP 153 #define OPAL_GET_POWER_SHIFT_RATIO 154 #define OPAL_SET_POWER_SHIFT_RATIO 155 +#define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_PCI_SET_P2P 157 #define OPAL_LAST 157 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index d87ffcb16b61..97ff192f5cfb 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -279,6 +279,7 @@ int opal_get_powercap(u32 handle, int token, u32 *pcap); int opal_set_powercap(u32 handle, int token, u32 pcap); int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr); int opal_set_power_shift_ratio(u32 handle, int token, u32 psr); +int opal_sensor_group_clear(u32 group_hndl, int token); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, @@ -359,6 +360,7 @@ void opal_wake_poller(void); void opal_powercap_init(void); void opal_psr_init(void); +void opal_sensor_groups_init(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 674ed1e8160a..177b3d4542b5 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o -obj-y += opal-kmsg.o opal-powercap.o opal-psr.o +obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c new file mode 100644 index 000000000000..7e5a235ebf76 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -0,0 +1,212 @@ +/* + * PowerNV OPAL Sensor-groups interface + * + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "opal-sensor-groups: " fmt + +#include +#include +#include + +#include + +DEFINE_MUTEX(sg_mutex); + +static struct kobject *sg_kobj; + +struct sg_attr { + u32 handle; + struct kobj_attribute attr; +}; + +static struct sensor_group { + char name[20]; + struct attribute_group sg; + struct sg_attr *sgattrs; +} *sgs; + +static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct sg_attr *sattr = container_of(attr, struct sg_attr, attr); + struct opal_msg msg; + u32 data; + int ret, token; + + ret = kstrtoint(buf, 0, &data); + if (ret) + return ret; + + if (data != 1) + return -EINVAL; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&sg_mutex); + if (ret) + goto out_token; + + ret = opal_sensor_group_clear(sattr->handle, token); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&sg_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static struct sg_ops_info { + int opal_no; + const char *attr_name; + ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count); +} ops_info[] = { + { OPAL_SENSOR_GROUP_CLEAR, "clear", sg_store }, +}; + +static void add_attr(int handle, struct sg_attr *attr, int index) +{ + attr->handle = handle; + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = ops_info[index].attr_name; + attr->attr.attr.mode = 0220; + attr->attr.store = ops_info[index].store; +} + +static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, + u32 handle) +{ + int i, j; + int count = 0; + + for (i = 0; i < len; i++) + for (j = 0; j < ARRAY_SIZE(ops_info); j++) + if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) { + add_attr(handle, &sg->sgattrs[count], j); + sg->sg.attrs[count] = + &sg->sgattrs[count].attr.attr; + count++; + } + + return sysfs_create_group(sg_kobj, &sg->sg); +} + +static int get_nr_attrs(const __be32 *ops, int len) +{ + int i, j; + int nr_attrs = 0; + + for (i = 0; i < len; i++) + for (j = 0; j < ARRAY_SIZE(ops_info); j++) + if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) + nr_attrs++; + + return nr_attrs; +} + +void __init opal_sensor_groups_init(void) +{ + struct device_node *sg, *node; + int i = 0; + + sg = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group"); + if (!sg) { + pr_devel("Sensor groups node not found\n"); + return; + } + + sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL); + if (!sgs) + return; + + sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj); + if (!sg_kobj) { + pr_warn("Failed to create sensor group kobject\n"); + goto out_sgs; + } + + for_each_child_of_node(sg, node) { + const __be32 *ops; + u32 sgid, len, nr_attrs, chipid; + + ops = of_get_property(node, "ops", &len); + if (!ops) + continue; + + nr_attrs = get_nr_attrs(ops, len); + if (!nr_attrs) + continue; + + sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr), + GFP_KERNEL); + if (!sgs[i].sgattrs) + goto out_sgs_sgattrs; + + sgs[i].sg.attrs = kcalloc(nr_attrs + 1, + sizeof(struct attribute *), + GFP_KERNEL); + + if (!sgs[i].sg.attrs) { + kfree(sgs[i].sgattrs); + goto out_sgs_sgattrs; + } + + if (of_property_read_u32(node, "sensor-group-id", &sgid)) { + pr_warn("sensor-group-id property not found\n"); + goto out_sgs_sgattrs; + } + + if (!of_property_read_u32(node, "ibm,chip-id", &chipid)) + sprintf(sgs[i].name, "%s%d", node->name, chipid); + else + sprintf(sgs[i].name, "%s", node->name); + + sgs[i].sg.name = sgs[i].name; + if (add_attr_group(ops, len, &sgs[i], sgid)) { + pr_warn("Failed to create sensor attribute group %s\n", + sgs[i].sg.name); + goto out_sgs_sgattrs; + } + i++; + } + + return; + +out_sgs_sgattrs: + while (--i >= 0) { + kfree(sgs[i].sgattrs); + kfree(sgs[i].sg.attrs); + } + kobject_put(sg_kobj); +out_sgs: + kfree(sgs); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 09cc6ec92af7..951fa93f881d 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -318,3 +318,4 @@ OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); +OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index f659edbc5056..3c122d08b6c3 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -856,6 +856,9 @@ static int __init opal_init(void) /* Initialise OPAL Power-Shifting-Ratio interface */ opal_psr_init(); + /* Initialise OPAL sensor groups */ + opal_sensor_groups_init(); + return 0; } machine_subsys_initcall(powernv, opal_init); -- cgit v1.2.3-58-ga151 From d30a5a5262ca64d58aa07fb2ecd7f992df83b4bc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 8 Aug 2017 16:39:25 +1000 Subject: powerpc/traps: Use SRR1 defines for program check reasons Currently we open code the reason codes for program checks. Instead use the existing SRR1 defines. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/traps.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 73be2f71dbbb..c0600e9e0ff5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -724,6 +724,7 @@ * may not be recoverable */ #define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ #define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ +#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ #define SRR1_PROGILL 0x00080000 /* Illegal instruction */ #define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a891c277040b..c567d706a7ed 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -379,11 +379,11 @@ static inline int check_io_access(struct pt_regs *regs) /* On non-4xx, the reason for the machine check or program exception is in the MSR. */ #define get_reason(regs) ((regs)->msr) -#define REASON_TM 0x200000 -#define REASON_FP 0x100000 -#define REASON_ILLEGAL 0x80000 -#define REASON_PRIVILEGED 0x40000 -#define REASON_TRAP 0x20000 +#define REASON_TM SRR1_PROGTM +#define REASON_FP SRR1_PROGFPE +#define REASON_ILLEGAL SRR1_PROGILL +#define REASON_PRIVILEGED SRR1_PROGPRIV +#define REASON_TRAP SRR1_PROGTRAP #define single_stepping(regs) ((regs)->msr & MSR_SE) #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) -- cgit v1.2.3-58-ga151 From 72e4b2cdf07b4c43115f058ed74d694eab5d6454 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2017 13:58:50 +0200 Subject: powerpc/time: refactor MFTB() to limit number of ifdefs The 8xx cannot access the TBL and TBU registers using mfspr/mtspr It must be accessed using mftb/mftbu Due to this, there is a number of places with #ifdef CONFIG_8xx This patch defines new macros MFTBL(x) and MFTBU(x) on the same model as MFTB(x) and tries to make use of them as much as possible. In arch/powerpc/include/asm/timex.h, we also remove the ifdef for the asm() operands as the compiler doesn't mind unused operands Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/boot/ppc_asm.h | 8 ++++++++ arch/powerpc/boot/util.S | 24 +++++------------------- arch/powerpc/include/asm/ppc_asm.h | 12 +++++++++--- arch/powerpc/include/asm/timex.h | 4 ---- arch/powerpc/kernel/vdso32/gettimeofday.S | 12 +++--------- 5 files changed, 25 insertions(+), 35 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h index 68e388ee94fe..c63299f9fdd9 100644 --- a/arch/powerpc/boot/ppc_asm.h +++ b/arch/powerpc/boot/ppc_asm.h @@ -80,4 +80,12 @@ .long 0xa6037b7d; /* mtsrr1 r11 */ \ .long 0x2400004c /* rfid */ +#ifdef CONFIG_PPC_8xx +#define MFTBL(dest) mftb dest +#define MFTBU(dest) mftbu dest +#else +#define MFTBL(dest) mfspr dest, SPRN_TBRL +#define MFTBU(dest) mfspr dest, SPRN_TBRU +#endif + #endif /* _PPC64_PPC_ASM_H */ diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S index 243b8497d58b..ec069177d942 100644 --- a/arch/powerpc/boot/util.S +++ b/arch/powerpc/boot/util.S @@ -71,32 +71,18 @@ udelay: add r4,r4,r5 addi r4,r4,-1 divw r4,r4,r5 /* BUS ticks */ -#ifdef CONFIG_8xx -1: mftbu r5 - mftb r6 - mftbu r7 -#else -1: mfspr r5, SPRN_TBRU - mfspr r6, SPRN_TBRL - mfspr r7, SPRN_TBRU -#endif +1: MFTBU(r5) + MFTBL(r6) + MFTBU(r7) cmpw 0,r5,r7 bne 1b /* Get [synced] base time */ addc r9,r6,r4 /* Compute end time */ addze r8,r5 -#ifdef CONFIG_8xx -2: mftbu r5 -#else -2: mfspr r5, SPRN_TBRU -#endif +2: MFTBU(r5) cmpw 0,r5,r8 blt 2b bgt 3f -#ifdef CONFIG_8xx - mftb r6 -#else - mfspr r6, SPRN_TBRL -#endif + MFTBL(r6) cmpw 0,r6,r9 blt 2b 3: blr diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index daeda2bbe12a..1d7ff3156a97 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -378,10 +378,16 @@ BEGIN_FTR_SECTION_NESTED(96); \ cmpwi dest,0; \ beq- 90b; \ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) -#elif defined(CONFIG_8xx) -#define MFTB(dest) mftb dest #else -#define MFTB(dest) mfspr dest, SPRN_TBRL +#define MFTB(dest) MFTBL(dest) +#endif + +#ifdef CONFIG_PPC_8xx +#define MFTBL(dest) mftb dest +#define MFTBU(dest) mftbu dest +#else +#define MFTBL(dest) mfspr dest, SPRN_TBRL +#define MFTBU(dest) mfspr dest, SPRN_TBRU #endif #ifndef CONFIG_SMP diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 2cf846edb3fc..b467dbcb0fb7 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -45,11 +45,7 @@ static inline cycles_t get_cycles(void) " .long 0\n" " .long 0\n" ".previous" -#ifdef CONFIG_8xx - : "=r" (ret) : "i" (CPU_FTR_601)); -#else : "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL)); -#endif return ret; #endif } diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 6b2b69616e77..769c2624e0a6 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -232,15 +232,9 @@ __do_get_tspec: lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) /* Get a stable TB value */ -#ifdef CONFIG_8xx -2: mftbu r3 - mftbl r4 - mftbu r0 -#else -2: mfspr r3, SPRN_TBRU - mfspr r4, SPRN_TBRL - mfspr r0, SPRN_TBRU -#endif +2: MFTBU(r3) + MFTBL(r4) + MFTBU(r0) cmplw cr0,r3,r0 bne- 2b -- cgit v1.2.3-58-ga151 From 968159c0031ac1e07ab4426397e786c9c483f068 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2017 13:58:54 +0200 Subject: powerpc/8xx: Getting rid of remaining use of CONFIG_8xx Two config options exist to define powerpc MPC8xx: * CONFIG_PPC_8xx * CONFIG_8xx arch/powerpc/platforms/Kconfig.cputype has contained the following comment about CONFIG_8xx item for some years: "# this is temp to handle compat with arch=ppc" arch/powerpc is now the only place with remaining use of CONFIG_8xx: get rid of them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 4 ++-- arch/powerpc/Makefile | 2 +- arch/powerpc/boot/Makefile | 4 ++-- arch/powerpc/include/asm/cache.h | 2 +- arch/powerpc/include/asm/cputable.h | 4 ++-- arch/powerpc/include/asm/fs_pd.h | 2 +- arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- arch/powerpc/include/asm/ppc_asm.h | 2 +- arch/powerpc/include/asm/reg.h | 10 +++++----- arch/powerpc/include/asm/timex.h | 2 +- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/cputable.c | 4 ++-- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/kgdb.c | 4 ++-- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/mm/mem.c | 2 +- arch/powerpc/mm/mmu_decl.h | 10 +++++----- arch/powerpc/mm/tlb_nohash_low.S | 2 +- arch/powerpc/platforms/8xx/Kconfig | 1 - arch/powerpc/platforms/Kconfig.cputype | 2 +- 20 files changed, 32 insertions(+), 33 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9640852fc0b2..c8334a5b0eb7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -405,7 +405,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" - depends on 4xx || 8xx || PPC_MPC832x || BOOKE + depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE ---help--- Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the @@ -967,7 +967,7 @@ config PPC_PCI_CHOICE config PCI bool "PCI support" if PPC_PCI_CHOICE - default y if !40x && !CPM2 && !8xx && !PPC_83xx \ + default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \ && !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON default PCI_QSPAN if PPC_8xx select GENERIC_PCI_IOMAP diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 7b8eddfc46d2..5480a7d45ef2 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -249,7 +249,7 @@ KBUILD_AFLAGS += $(aflags-y) KBUILD_CFLAGS += $(cflags-y) head-y := arch/powerpc/kernel/head_$(BITS).o -head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o +head-$(CONFIG_PPC_8xx) := arch/powerpc/kernel/head_8xx.o head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o head-$(CONFIG_FSL_BOOKE) := arch/powerpc/kernel/head_fsl_booke.o diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a7814a7b1523..bd2a1b8cd83f 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -107,7 +107,7 @@ src-wlib-y += crtsavres.S endif src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c -src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c +src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c @@ -124,7 +124,7 @@ src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \ treeboot-iss4xx.c treeboot-currituck.c \ treeboot-akebono.c \ simpleboot.c fixed-head.S virtex.c -src-plat-$(CONFIG_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c +src-plat-$(CONFIG_PPC_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c src-plat-$(CONFIG_PPC_83xx) += cuboot-83xx.c fixed-head.S redboot-83xx.c diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 5a90292afbad..d122f7f957ce 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -5,7 +5,7 @@ /* bytes per L1 cache line */ -#if defined(CONFIG_8xx) || defined(CONFIG_403GCX) +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX) #define L1_CACHE_SHIFT 4 #define MAX_COPY_PREFETCH 1 #elif defined(CONFIG_PPC_E500MC) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index d02ad93bf708..a9bf921f4efc 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -513,7 +513,7 @@ enum { #else CPU_FTRS_GENERIC_32 | #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | #endif #ifdef CONFIG_40x @@ -565,7 +565,7 @@ enum { #else CPU_FTRS_GENERIC_32 & #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & #endif #ifdef CONFIG_40x diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index f79d6c74eb2a..8def56ec05c6 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -26,7 +26,7 @@ #define cpm2_unmap(addr) do {} while(0) #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #include extern immap_t __iomem *mpc8xx_immr; diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 91314268f04f..9278eaa7ca59 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -121,7 +121,7 @@ extern int icache_44x_need_flush; #include #elif defined(CONFIG_FSL_BOOKE) #include -#elif defined(CONFIG_8xx) +#elif defined(CONFIG_PPC_8xx) #include #endif diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 1d7ff3156a97..36f3e41c9fbe 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -417,7 +417,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) * and they must be used. */ -#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx) +#if !defined(CONFIG_4xx) && !defined(CONFIG_PPC_8xx) #define tlbia \ li r4,1024; \ mtctr r4; \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c0600e9e0ff5..70722e5b93e7 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -22,9 +22,9 @@ #include #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #include -#endif /* CONFIG_8xx */ +#endif /* CONFIG_PPC_8xx */ #define MSR_SF_LG 63 /* Enable 64 bit mode */ #define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ @@ -135,7 +135,7 @@ #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) -#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx) +#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) /* Default MSR for kernel mode. */ #define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) @@ -1164,7 +1164,7 @@ #endif #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 #define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 @@ -1363,7 +1363,7 @@ static inline void msr_check_and_clear(unsigned long bits) #else /* __powerpc64__ */ -#if defined(CONFIG_8xx) +#if defined(CONFIG_PPC_8xx) #define mftbl() ({unsigned long rval; \ asm volatile("mftbl %0" : "=r" (rval)); rval;}) #define mftbu() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index b467dbcb0fb7..cb61eae5b7ed 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -29,7 +29,7 @@ static inline cycles_t get_cycles(void) ret = 0; __asm__ __volatile__( -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx "97: mftb %0\n" #else "97: mfspr %0, %2\n" diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 5622bd0248e5..91960f83039c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -83,7 +83,7 @@ extra-y := head_$(BITS).o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o -extra-$(CONFIG_8xx) := head_8xx.o +extra-$(CONFIG_PPC_8xx) := head_8xx.o extra-y += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 080bf51e0104..e9ba5b84ac9b 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1259,7 +1259,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc603", }, #endif /* CONFIG_PPC_BOOK3S_32 */ -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, .pvr_value = 0x00500000, @@ -1274,7 +1274,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_8xx, .platform = "ppc823", }, -#endif /* CONFIG_8xx */ +#endif /* CONFIG_PPC_8xx */ #ifdef CONFIG_40x { /* 403GC */ .pvr_mask = 0xffffff00, diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 77a7f7514327..8b514e910bf1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -24,7 +24,7 @@ * mask register (of which only 16 are defined), hence the weird shifting * and complement of the cached_irq_mask. I want to be able to stuff * this right into the SIU SMASK register. - * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx + * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx * to reduce code space and undefined function references. */ diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index dbf098121ce6..35e240a0a408 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -67,9 +67,9 @@ static struct hard_trap_info #endif #else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ -#if defined(CONFIG_8xx) +#if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ -#else /* ! CONFIG_8xx */ +#else /* ! CONFIG_PPC_8xx */ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index ed902a8fefa8..f88fac3d281b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -353,7 +353,7 @@ static void sanity_check_fault(bool is_write, unsigned long error_code) { } #define page_fault_is_bad(__err) (0) #else #define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE) -#if defined(CONFIG_8xx) +#if defined(CONFIG_PPC_8xx) #define page_fault_is_bad(__err) ((__err) & 0x10000000) #elif defined(CONFIG_PPC64) #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 46b4e67d2372..4362b86ef84c 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -436,7 +436,7 @@ void flush_dcache_icache_page(struct page *page) return; } #endif -#if defined(CONFIG_8xx) || defined(CONFIG_PPC64) +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) /* On 8xx there is no need to kmap since highmem is not supported */ __flush_dcache_icache(page_address(page)); #else diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index d46128b22150..57fbc554c785 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -27,7 +27,7 @@ /* * On 40x and 8xx, we directly inline tlbia and tlbivax */ -#if defined(CONFIG_40x) || defined(CONFIG_8xx) +#if defined(CONFIG_40x) || defined(CONFIG_PPC_8xx) static inline void _tlbil_all(void) { asm volatile ("sync; tlbia; isync" : : : "memory"); @@ -38,7 +38,7 @@ static inline void _tlbil_pid(unsigned int pid) } #define _tlbil_pid_noind(pid) _tlbil_pid(pid) -#else /* CONFIG_40x || CONFIG_8xx */ +#else /* CONFIG_40x || CONFIG_PPC_8xx */ extern void _tlbil_all(void); extern void _tlbil_pid(unsigned int pid); #ifdef CONFIG_PPC_BOOK3E @@ -46,12 +46,12 @@ extern void _tlbil_pid_noind(unsigned int pid); #else #define _tlbil_pid_noind(pid) _tlbil_pid(pid) #endif -#endif /* !(CONFIG_40x || CONFIG_8xx) */ +#endif /* !(CONFIG_40x || CONFIG_PPC_8xx) */ /* * On 8xx, we directly inline tlbie, on others, it's extern */ -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx static inline void _tlbil_va(unsigned long address, unsigned int pid, unsigned int tsize, unsigned int ind) { @@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid, { __tlbil_va(address, pid); } -#endif /* CONFIG_8xx */ +#endif /* CONFIG_PPC_8xx */ #if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x) extern void _tlbivax_bcast(unsigned long address, unsigned int pid, diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index eabecfcaef7c..048b8e9f4492 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -60,7 +60,7 @@ _GLOBAL(__tlbil_va) isync 1: blr -#elif defined(CONFIG_8xx) +#elif defined(CONFIG_PPC_8xx) /* * Nothing to do for 8xx, everything is inline diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 80cbcb0ad9b1..d3f664f5166b 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -5,7 +5,6 @@ config CPM1 choice prompt "8xx Machine Type" depends on PPC_8xx - depends on 8xx default MPC885ADS config MPC8XXFADS diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9539620a48d4..395593ef580c 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -375,7 +375,7 @@ config NR_CPUS config NOT_COHERENT_CACHE bool - depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON + depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON default n if PPC_47x default y -- cgit v1.2.3-58-ga151 From 3ee87674e0212a152419a479dfb1eed501bab386 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2017 13:58:58 +0200 Subject: powerpc/8xx: Use symbolic PVR value For the 8xx, PVR values defined in arch/powerpc/include/asm/reg.h are nowhere used. Remove all defines and add PVR_8xx Use it in arch/powerpc/kernel/cputable.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 6 ++---- arch/powerpc/kernel/cputable.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 70722e5b93e7..c36823d64ec9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1247,10 +1247,8 @@ * differentiated by the version number in the Communication Processor * Module (CPM). */ -#define PVR_821 0x00500000 -#define PVR_823 PVR_821 -#define PVR_850 PVR_821 -#define PVR_860 PVR_821 +#define PVR_8xx 0x00500000 + #define PVR_8240 0x00810100 #define PVR_8245 0x80811014 #define PVR_8260 PVR_8240 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e9ba5b84ac9b..760872916013 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1262,7 +1262,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, - .pvr_value = 0x00500000, + .pvr_value = PVR_8xx, .cpu_name = "8xx", /* CPU_FTR_MAYBE_CAN_DOZE is possible, * if the 8xx code is there.... */ -- cgit v1.2.3-58-ga151 From 5b6c133e0801007117cf4a466cb56de86e186138 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 15 Aug 2017 20:02:56 +1000 Subject: powerpc/mm/nohash: Move definition of PGALLOC_GFP to fix build errors In some obscure Book3E configs (randconfig) we can end up missing a definition for PGALLOC_GFP in pgtable_64.c. Fix it by moving the definition to asm/pgalloc.h. Fixes: de3b87611dd1 ("powerpc/mm/book(e)(3s)/64: Add page table accounting") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 -- arch/powerpc/include/asm/pgalloc.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index e2329db9d6f4..1fcfa425cefa 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -41,8 +41,6 @@ extern struct kmem_cache *pgtable_cache[]; pgtable_cache[(shift) - 1]; \ }) -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO - extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int); extern void pte_fragment_free(unsigned long *, int); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index d795c5d5789c..45ae1212ab8a 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -17,6 +17,8 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) } #endif /* MODULE */ +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) + #ifdef CONFIG_PPC_BOOK3S #include #else -- cgit v1.2.3-58-ga151 From 3184cc4b6f6a1dc0c1745aafe2b14b1206ef3187 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Aug 2017 15:51:03 +0200 Subject: powerpc/mm: Fix kernel RAM protection after freeing unused memory on PPC32 As seen below, allthough the init sections have been freed, the associated memory area is still marked as executable in the page tables. ~ dmesg [ 5.860093] Freeing unused kernel memory: 592K (c0570000 - c0604000) ~ cat /sys/kernel/debug/kernel_page_tables ---[ Start of kernel VM ]--- 0xc0000000-0xc0497fff 4704K rw X present dirty accessed shared 0xc0498000-0xc056ffff 864K rw present dirty accessed shared 0xc0570000-0xc059ffff 192K rw X present dirty accessed shared 0xc05a0000-0xc7ffffff 125312K rw present dirty accessed shared ---[ vmalloc() Area ]--- This patch fixes that. The implementation is done by reusing the change_page_attr() function implemented for CONFIG_DEBUG_PAGEALLOC Signed-off-by: Christophe Leroy Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable.h | 2 +- arch/powerpc/mm/pgtable_32.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index afae9a336136..ab7f44475b1f 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -81,7 +81,7 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); -#ifdef CONFIG_STRICT_KERNEL_RWX +#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32) void mark_initmem_nx(void); #else static inline void mark_initmem_nx(void) { } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 991036f818bb..85e8f0e0efe6 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -323,8 +323,6 @@ get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) return(retval); } -#ifdef CONFIG_DEBUG_PAGEALLOC - static int __change_page_attr_noflush(struct page *page, pgprot_t prot) { pte_t *kpte; @@ -347,7 +345,7 @@ static int __change_page_attr_noflush(struct page *page, pgprot_t prot) /* * Change the page attributes of an page in the linear mapping. * - * THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY + * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY */ static int change_page_attr(struct page *page, int numpages, pgprot_t prot) { @@ -368,7 +366,16 @@ static int change_page_attr(struct page *page, int numpages, pgprot_t prot) return err; } +void mark_initmem_nx(void) +{ + struct page *page = virt_to_page(_sinittext); + unsigned long numpages = PFN_UP((unsigned long)_einittext) - + PFN_DOWN((unsigned long)_sinittext); + + change_page_attr(page, numpages, PAGE_KERNEL); +} +#ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { if (PageHighMem(page)) -- cgit v1.2.3-58-ga151 From 86b19520e7ef5539eb081c76fe2f5c955180205f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Aug 2017 15:51:07 +0200 Subject: powerpc/mm: declare some local functions static get_pteptr() and __mapin_ram_chunk() are only used locally, so define them static Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgtable.h | 3 --- arch/powerpc/include/asm/nohash/32/pgtable.h | 3 --- arch/powerpc/mm/pgtable_32.c | 4 ++-- 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 7fb755880409..17c8766777f1 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -294,9 +294,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, - pmd_t **pmdp); - int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); /* Generic accessors to PTE bits */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 9278eaa7ca59..185c6a47f9ba 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -337,9 +337,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, - pmd_t **pmdp); - int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 4a3dd9fc6989..57f89cd88568 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -243,7 +243,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) /* * Map in a chunk of physical memory starting at start. */ -void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) +static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) { unsigned long v, s, f; phys_addr_t p; @@ -295,7 +295,7 @@ void __init mapin_ram(void) * Returns true (1) if PTE was found, zero otherwise. The pointer to * the PTE pointer is unmodified if PTE is not found. */ -int +static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) { pgd_t *pgd; -- cgit v1.2.3-58-ga151 From 4cfac2f9c7f116af8516d0b3d0e7383189eca376 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Aug 2017 15:51:09 +0200 Subject: powerpc/mm: Simplify __set_fixmap() __set_fixmap() uses __fix_to_virt() then does the boundary checks by it self. Instead, we can use fix_to_virt() which does the verification at build time. For this, we need to use it inline so that GCC can see the real value of idx at buildtime. In the meantime, we remove the 'fixmaps' variable. This variable is set but has never been used from the beginning (commit 2c419bdeca1d9 ("[POWERPC] Port fixmap from x86 and use for kmap_atomic")) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/fixmap.h | 10 +++++++--- arch/powerpc/mm/pgtable_32.c | 15 --------------- 2 files changed, 7 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 4508b322f2cd..6c40dfda5912 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -17,6 +17,7 @@ #ifndef __ASSEMBLY__ #include #include +#include #ifdef CONFIG_HIGHMEM #include #include @@ -62,9 +63,6 @@ enum fixed_addresses { __end_of_fixed_addresses }; -extern void __set_fixmap (enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags); - #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) @@ -72,5 +70,11 @@ extern void __set_fixmap (enum fixed_addresses idx, #include +static inline void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags)); +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 57f89cd88568..65eda1997c3f 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -408,18 +408,3 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); } #endif /* CONFIG_DEBUG_PAGEALLOC */ - -static int fixmaps; - -void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - - map_kernel_page(address, phys, pgprot_val(flags)); - fixmaps++; -} -- cgit v1.2.3-58-ga151 From ca8afd4046255ac046f8229d5159c6d213e37b22 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 12 Jul 2017 17:03:42 +0200 Subject: powerpc/hugetlb: fix page rights verification in gup_hugepte() gup_hugepte() checks if pages are present and readable, and when 'write' is set, also checks if the pages are writable. Initially this was done by checking if _PAGE_PRESENT and _PAGE_READ were set. In addition, _PAGE_WRITE was verified for write accesses. The problem is that we have to handle the three following cases: 1/ The target defines __PAGE_READ and __PAGE_WRITE 2/ The target defines __PAGE_RW 3/ The target defines __PAGE_RO In case 1/, this is obvious In case 2/, __PAGE_READ is defined as 0 and __PAGE_WRITE as __PAGE_RW so it works as well. But in case 3, __PAGE_RW is defined as 0, which means __PAGE_WRITE is 0 and then the test returns true (page writable) in all cases. A first correction was attempted in commit 6b8cb66a6a7cc ("powerpc: Fix usage of _PAGE_RO in hugepage"), but that fix is wrong: instead of checking that the page is writable when write is requested, it checks that the page is NOT writable when write is NOT requested. This patch adds a new pte_read() helper to check whether a page is readable or not. This avoids handling all possible cases in gup_hugepte(). Then gup_hugepte() is modified to use pte_present(), pte_read() and pte_write() instead of the raw flags. Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgtable.h | 1 + arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +++++ arch/powerpc/include/asm/nohash/pgtable.h | 1 + arch/powerpc/mm/hugetlbpage.c | 15 +++------------ 4 files changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 17c8766777f1..4d453f979553 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -298,6 +298,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} +static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 18a8580d3ddc..1071d52f382d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -410,6 +410,11 @@ static inline int pte_write(pte_t pte) return __pte_write(pte) || pte_savedwrite(pte); } +static inline int pte_read(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ)); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index e5805ad78e12..17989c3d9a24 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -14,6 +14,7 @@ static inline int pte_write(pte_t pte) { return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; } +static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index e1bf5ca397fe..1226932579a0 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -977,7 +977,6 @@ EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte); int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { - unsigned long mask; unsigned long pte_end; struct page *head, *page; pte_t pte; @@ -988,18 +987,10 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, end = pte_end; pte = READ_ONCE(*ptep); - mask = _PAGE_PRESENT | _PAGE_READ; - - /* - * On some CPUs like the 8xx, _PAGE_RW hence _PAGE_WRITE is defined - * as 0 and _PAGE_RO has to be set when a page is not writable - */ - if (write) - mask |= _PAGE_WRITE; - else - mask |= _PAGE_RO; - if ((pte_val(pte) & mask) != mask) + if (!pte_present(pte) || !pte_read(pte)) + return 0; + if (write && !pte_write(pte)) return 0; /* hugepages are never "special" */ -- cgit v1.2.3-58-ga151 From 79cc38ded1e1ac86e69c90f604efadd50b0b3762 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 28 Jul 2017 10:31:26 +0530 Subject: powerpc/mm/hugetlb: Add support for reserving gigantic huge pages via kernel command line With commit aa888a74977a8 ("hugetlb: support larger than MAX_ORDER") we added support for allocating gigantic hugepages via kernel command line. Switch ppc64 arch specific code to use that. W.r.t FSL support, we now limit our allocation range using BOOTMEM_ALLOC_ACCESSIBLE. We use the kernel command line to do reservation of hugetlb pages on powernv platforms. On pseries hash mmu mode the supported gigantic huge page size is 16GB and that can only be allocated with hypervisor assist. For pseries the command line option doesn't do the allocation. Instead pseries does gigantic hugepage allocation based on hypervisor hint that is specified via "ibm,expected#pages" property of the memory node. Cc: Scott Wood Cc: Christophe Leroy Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +- arch/powerpc/include/asm/hugetlb.h | 14 -- arch/powerpc/kernel/setup-common.c | 7 - arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 177 +++----------------------- arch/powerpc/mm/init_32.c | 2 - 6 files changed, 22 insertions(+), 182 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 6981a52b3887..f28d21c69f79 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -468,7 +468,7 @@ extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize); int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize); -extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); +extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); #ifdef CONFIG_PPC_PSERIES diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 7f4025a6c69e..b8a0fb442c64 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -218,18 +218,4 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, } #endif /* CONFIG_HUGETLB_PAGE */ -/* - * FSL Book3E platforms require special gpage handling - the gpages - * are reserved early in the boot process by memblock instead of via - * the .dts as on IBM platforms. - */ -#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \ - defined(CONFIG_PPC_8xx)) -extern void __init reserve_hugetlb_gpages(void); -#else -static inline void reserve_hugetlb_gpages(void) -{ -} -#endif - #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 94a948207cd2..0f896f17d5ab 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -916,13 +916,6 @@ void __init setup_arch(char **cmdline_p) /* Reserve large chunks of memory for use by CMA for KVM. */ kvm_cma_reserve(); - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - klp_init_thread_info(&init_thread_info); init_mm.start_code = (unsigned long)_stext; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a93137c358ea..867ddacc1213 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -509,7 +509,7 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, phys_addr, block_size, expected_pages); if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) { memblock_reserve(phys_addr, block_size * expected_pages); - add_gpage(phys_addr, block_size, expected_pages); + pseries_add_gpage(phys_addr, block_size, expected_pages); } return 0; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1226932579a0..cd5bd4a4c5e4 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -36,26 +36,6 @@ unsigned int HPAGE_SHIFT; EXPORT_SYMBOL(HPAGE_SHIFT); -/* - * Tracks gpages after the device tree is scanned and before the - * huge_boot_pages list is ready. On non-Freescale implementations, this is - * just used to track 16G pages and so is a single array. FSL-based - * implementations may have more than one gpage size, so we need multiple - * arrays - */ -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) -#define MAX_NUMBER_GPAGES 128 -struct psize_gpages { - u64 gpage_list[MAX_NUMBER_GPAGES]; - unsigned int nr_gpages; -}; -static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; -#else -#define MAX_NUMBER_GPAGES 1024 -static u64 gpage_freearray[MAX_NUMBER_GPAGES]; -static unsigned nr_gpages; -#endif - #define hugepd_none(hpd) (hpd_val(hpd) == 0) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) @@ -210,145 +190,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(*hpdp, addr, pdshift); } -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) -/* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy allocator is setup. - */ -void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) -{ - unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); - int i; - - if (addr == 0) - return; - - gpage_freearray[idx].nr_gpages = number_of_pages; - - for (i = 0; i < number_of_pages; i++) { - gpage_freearray[idx].gpage_list[i] = addr; - addr += page_size; - } -} - -/* - * Moves the gigantic page addresses from the temporary list to the - * huge_boot_pages list. - */ -int alloc_bootmem_huge_page(struct hstate *hstate) -{ - struct huge_bootmem_page *m; - int idx = shift_to_mmu_psize(huge_page_shift(hstate)); - int nr_gpages = gpage_freearray[idx].nr_gpages; - - if (nr_gpages == 0) - return 0; - -#ifdef CONFIG_HIGHMEM - /* - * If gpages can be in highmem we can't use the trick of storing the - * data structure in the page; allocate space for this - */ - m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0); - m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; -#else - m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); -#endif - - list_add(&m->list, &huge_boot_pages); - gpage_freearray[idx].nr_gpages = nr_gpages; - gpage_freearray[idx].gpage_list[nr_gpages] = 0; - m->hstate = hstate; - - return 1; -} +#ifdef CONFIG_PPC_BOOK3S_64 /* - * Scan the command line hugepagesz= options for gigantic pages; store those in - * a list that we use to allocate the memory once all options are parsed. + * Tracks gpages after the device tree is scanned and before the + * huge_boot_pages list is ready on pseries. */ - -unsigned long gpage_npages[MMU_PAGE_COUNT]; - -static int __init do_gpage_early_setup(char *param, char *val, - const char *unused, void *arg) -{ - static phys_addr_t size; - unsigned long npages; - - /* - * The hugepagesz and hugepages cmdline options are interleaved. We - * use the size variable to keep track of whether or not this was done - * properly and skip over instances where it is incorrect. Other - * command-line parsing code will issue warnings, so we don't need to. - * - */ - if ((strcmp(param, "default_hugepagesz") == 0) || - (strcmp(param, "hugepagesz") == 0)) { - size = memparse(val, NULL); - } else if (strcmp(param, "hugepages") == 0) { - if (size != 0) { - if (sscanf(val, "%lu", &npages) <= 0) - npages = 0; - if (npages > MAX_NUMBER_GPAGES) { - pr_warn("MMU: %lu pages requested for page " -#ifdef CONFIG_PHYS_ADDR_T_64BIT - "size %llu KB, limiting to " -#else - "size %u KB, limiting to " -#endif - __stringify(MAX_NUMBER_GPAGES) "\n", - npages, size / 1024); - npages = MAX_NUMBER_GPAGES; - } - gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; - size = 0; - } - } - return 0; -} - +#define MAX_NUMBER_GPAGES 1024 +__initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES]; +__initdata static unsigned nr_gpages; /* - * This function allocates physical space for pages that are larger than the - * buddy allocator can handle. We want to allocate these in highmem because - * the amount of lowmem is limited. This means that this function MUST be - * called before lowmem_end_addr is set up in MMU_init() in order for the lmb - * allocate to grab highmem. - */ -void __init reserve_hugetlb_gpages(void) -{ - static __initdata char cmdline[COMMAND_LINE_SIZE]; - phys_addr_t size, base; - int i; - - strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); - parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0, - NULL, &do_gpage_early_setup); - - /* - * Walk gpage list in reverse, allocating larger page sizes first. - * Skip over unsupported sizes, or sizes that have 0 gpages allocated. - * When we reach the point in the list where pages are no longer - * considered gpages, we're done. - */ - for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { - if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) - continue; - else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) - break; - - size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); - base = memblock_alloc_base(size * gpage_npages[i], size, - MEMBLOCK_ALLOC_ANYWHERE); - add_gpage(base, size, gpage_npages[i]); - } -} - -#else /* !PPC_FSL_BOOK3E */ - -/* Build list of addresses of gigantic pages. This function is used in early + * Build list of addresses of gigantic pages. This function is used in early * boot before the buddy allocator is setup. */ -void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) +void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { if (!addr) return; @@ -360,10 +215,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) } } -/* Moves the gigantic page addresses from the temporary list to the - * huge_boot_pages list. - */ -int alloc_bootmem_huge_page(struct hstate *hstate) +int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) @@ -376,6 +228,17 @@ int alloc_bootmem_huge_page(struct hstate *hstate) } #endif + +int __init alloc_bootmem_huge_page(struct hstate *h) +{ + +#ifdef CONFIG_PPC_BOOK3S_64 + if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) + return pseries_alloc_bootmem_huge_page(h); +#endif + return __alloc_bootmem_huge_page(h); +} + #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 7d5fee1bb116..6419b33ca309 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -138,8 +138,6 @@ void __init MMU_init(void) * Reserve gigantic pages for hugetlb. This MUST occur before * lowmem_end_addr is initialized below. */ - reserve_hugetlb_gpages(); - if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII memblock_enforce_memory_limit(memblock.memory.regions[0].size); -- cgit v1.2.3-58-ga151 From 4ae279c2c96ab38a78b954d218790a8f6db714e5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 28 Jul 2017 10:31:27 +0530 Subject: powerpc/mm/hugetlb: Allow runtime allocation of 16G. Now that we have GIGANTIC_PAGE enabled on powerpc, use this for 16G hugepages with hash translation mode. Depending on the total system memory we have, we may be able to allocate 16G hugepages runtime. This also remove the hugetlb setup difference between hash/radix translation mode. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index 5c28bd6f2ae1..2d1ca488ca44 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -54,9 +54,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE static inline bool gigantic_page_supported(void) { - if (radix_enabled()) - return true; - return false; + return true; } #endif -- cgit v1.2.3-58-ga151 From 7baebe54a64af44dc34d0bd51462f09fe07a2acb Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 26 Jul 2017 21:34:30 +0800 Subject: powerpc/topology: Remove the unused parent_node() macro Commit a7be6e5a7f8d ("mm: drop useless local parameters of __register_one_node()") removes the last user of parent_node(). The parent_node() macro in POWERPC platform is unnecessary. Remove it for cleanup. Reported-by: Michael Ellerman Signed-off-by: Dou Liyang Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/topology.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index dc4e15937ccf..2d84bca8d053 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -16,8 +16,6 @@ struct device_node; #include -#define parent_node(node) (node) - #define cpumask_of_node(node) ((node) == -1 ? \ cpu_all_mask : \ node_to_cpumask_map[node]) -- cgit v1.2.3-58-ga151 From 694fc88ce271fd48f7939c032c1247fef81db57f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 28 Mar 2017 01:07:40 +0530 Subject: powerpc/string: Implement optimized memset variants Based on Matthew Wilcox's patches for other architectures. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/string.h | 24 ++++++++++++++++++++++++ arch/powerpc/lib/mem_64.S | 19 ++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index da3cdffca440..b0e82466d4e8 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -23,6 +23,30 @@ extern void * memmove(void *,const void *,__kernel_size_t); extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); +#ifdef CONFIG_PPC64 +#define __HAVE_ARCH_MEMSET16 +#define __HAVE_ARCH_MEMSET32 +#define __HAVE_ARCH_MEMSET64 + +extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); +extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); +extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); + +static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n) +{ + return __memset16(p, v, n * 2); +} + +static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n) +{ + return __memset32(p, v, n * 4); +} + +static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) +{ + return __memset64(p, v, n * 8); +} +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_STRING_H */ diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 85fa9869aec5..ec531de99996 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -13,6 +13,23 @@ #include #include +_GLOBAL(__memset16) + rlwimi r4,r4,16,0,15 + /* fall through */ + +_GLOBAL(__memset32) + rldimi r4,r4,32,0 + /* fall through */ + +_GLOBAL(__memset64) + neg r0,r3 + andi. r0,r0,7 + cmplw cr1,r5,r0 + b .Lms +EXPORT_SYMBOL(__memset16) +EXPORT_SYMBOL(__memset32) +EXPORT_SYMBOL(__memset64) + _GLOBAL(memset) neg r0,r3 rlwimi r4,r4,8,16,23 @@ -20,7 +37,7 @@ _GLOBAL(memset) rlwimi r4,r4,16,0,15 cmplw cr1,r5,r0 /* do we get that far? */ rldimi r4,r4,32,0 - PPC_MTOCRF(1,r0) +.Lms: PPC_MTOCRF(1,r0) mr r6,r3 blt cr1,8f beq+ 3f /* if already 8-byte aligned */ -- cgit v1.2.3-58-ga151 From 94171b19c3f1f4d9d4c0e3aaa1aa161def1ec7ea Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 27 Jul 2017 11:54:53 +0530 Subject: powerpc/mm: Rename find_linux_pte_or_hugepte() Add newer helpers to make the function usage simpler. It is always recommended to use find_current_mm_pte() for walking the page table. If we cannot use find_current_mm_pte(), it should be documented why the said usage of __find_linux_pte() is safe against a parallel THP split. For now we have KVM code using __find_linux_pte(). This is because kvm code ends up calling __find_linux_pte() in real mode with MSR_EE=0 but with PACA soft_enabled = 1. We may want to fix that later and make sure we keep the MSR_EE and PACA soft_enabled in sync. When we do that we can switch kvm to use find_linux_pte(). Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable.h | 10 +--------- arch/powerpc/include/asm/pte-walk.h | 35 ++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/eeh.c | 4 ++-- arch/powerpc/kernel/io-workarounds.c | 5 +++-- arch/powerpc/kvm/book3s_64_mmu_hv.c | 5 +++-- arch/powerpc/kvm/book3s_64_mmu_radix.c | 28 +++++++++++++-------------- arch/powerpc/kvm/book3s_64_vio_hv.c | 12 +++++++++++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 18 ++++++++--------- arch/powerpc/kvm/e500_mmu_host.c | 3 ++- arch/powerpc/mm/hash_utils_64.c | 5 +++-- arch/powerpc/mm/hugetlbpage.c | 24 +++++++++++++---------- arch/powerpc/mm/tlb_hash64.c | 6 ++++-- arch/powerpc/perf/callchain.c | 3 ++- 13 files changed, 103 insertions(+), 55 deletions(-) create mode 100644 arch/powerpc/include/asm/pte-walk.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index afae9a336136..eb9d57defb75 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -66,16 +66,8 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif -pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *shift); -static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *shift) -{ - VM_WARN(!arch_irqs_disabled(), - "%s called with irq enabled\n", __func__); - return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); -} +/* can we use this in kvm */ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h new file mode 100644 index 000000000000..2d633e9d686c --- /dev/null +++ b/arch/powerpc/include/asm/pte-walk.h @@ -0,0 +1,35 @@ +#ifndef _ASM_POWERPC_PTE_WALK_H +#define _ASM_POWERPC_PTE_WALK_H + +#include + +/* Don't use this directly */ +extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift); + +static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift) +{ + VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); + return __find_linux_pte(pgdir, ea, is_thp, hshift); +} + +static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) +{ + pgd_t *pgdir = init_mm.pgd; + return __find_linux_pte(pgdir, ea, NULL, hshift); +} +/* + * This is what we should always use. Any other lockless page table lookup needs + * careful audit against THP split. + */ +static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift) +{ + VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); + VM_WARN(pgdir != current->mm->pgd, + "%s lock less page table lookup called on wrong mm\n", __func__); + return __find_linux_pte(pgdir, ea, is_thp, hshift); +} + +#endif /* _ASM_POWERPC_PTE_WALK_H */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 63992b2d8e15..5e6887c40528 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -44,6 +44,7 @@ #include #include #include +#include /** Overview: @@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) * worried about _PAGE_SPLITTING/collapse. Also we will not hit * page table free, because of init_mm. */ - ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, - NULL, &hugepage_shift); + ptep = find_init_mm_pte(token, &hugepage_shift); if (!ptep) return token; WARN_ON(hugepage_shift); diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index a582e0d42525..bbe85f5aea71 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -19,6 +19,8 @@ #include #include #include +#include + #define IOWA_MAX_BUS 8 @@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) * We won't find huge pages here (iomem). Also can't hit * a page table free due to init_mm */ - ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr, - NULL, &hugepage_shift); + ptep = find_init_mm_pte(vaddr, &hugepage_shift); if (ptep == NULL) paddr = 0; else { diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8cb0190e2a73..4b219db39c47 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "trace_hv.h" @@ -597,8 +598,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * hugepage split and collapse. */ local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); if (__pte_write(pte)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index f6b3e67c5762..7d719c8aa0bb 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * Supported radix tree geometry. @@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (writing) pgflags |= _PAGE_DIRTY; local_irq_save(flags); - ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva, - NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); if (ptep) { pte = READ_ONCE(*ptep); if (pte_present(pte) && @@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, spin_unlock(&kvm->mmu_lock); return RESUME_GUEST; } - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, - gpa, NULL, &shift); + /* + * We are walking the secondary page table here. We can do this + * without disabling irq. + */ + ptep = __find_linux_pte(kvm->arch.pgtable, + gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); @@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pgflags |= _PAGE_WRITE; } else { local_irq_save(flags); - ptep = __find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) pgflags |= _PAGE_WRITE; local_irq_restore(flags); @@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; unsigned long old; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, gpa, shift); @@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; int ref = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; int ref = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, unsigned int shift; int ret = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { ret = 1; if (shift) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 3adfd2f5301c..c32e9bfe75b1 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_BUG @@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, pte_t *ptep, pte; unsigned shift = 0; - ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift); + /* + * Called in real mode with MSR_EE = 0. We are safe here. + * It is ok to do the lookup with arch.pgdir here, because + * we are doing this on secondary cpus and current task there + * is not the hypervisor. Also this is safe against THP in the + * host, because an IPI to primary thread will wait for the secondary + * to exit which will agains result in the below page table walk + * to finish. + */ + ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); if (!ptep || !pte_present(*ptep)) return -ENXIO; pte = *ptep; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 584c74c8119f..fedb0139524c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -22,6 +22,7 @@ #include #include #include +#include /* Translate address of a vmalloc'd thing to a linear map address */ static void *real_vmalloc_addr(void *x) @@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x) /* * assume we don't have huge pages in vmalloc space... * So don't worry about THP collapse/split. Called - * Only in realmode, hence won't need irq_save/restore. + * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. */ - p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL); + p = find_init_mm_pte(addr, NULL); if (!p || !pte_present(*p)) return NULL; addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); @@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, * If we had a page table table change after lookup, we would * retry via mmu_notifier_retry. */ - if (realmode) - ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL, - &hpage_shift); - else { + if (!realmode) local_irq_save(irq_flags); - ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, - &hpage_shift); - } + /* + * If called in real mode we have MSR_EE = 0. Otherwise + * we disable irq above. + */ + ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 77fd043b3ecc..c6c734424c70 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "e500.h" #include "timing.h" @@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, * can't run hence pfn won't change. */ local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, NULL); + ptep = find_linux_pte(pgdir, hva, NULL, NULL); if (ptep) { pte_t pte = READ_ONCE(*ptep); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7a20669c19e7..5b10b4fcbf76 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -61,6 +61,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -1297,7 +1298,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, #endif /* CONFIG_PPC_64K_PAGES */ /* Get PTE and page size from page tables */ - ptep = __find_linux_pte_or_hugepte(pgdir, ea, &is_thp, &hugeshift); + ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift); if (ptep == NULL || !pte_present(*ptep)) { DBG_LOW(" no PTE !\n"); rc = 1; @@ -1526,7 +1527,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, * THP pages use update_mmu_cache_pmd. We don't do * hash preload there. Hence can ignore THP here */ - ptep = find_linux_pte_or_hugepte(pgdir, ea, NULL, &hugepage_shift); + ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift); if (!ptep) goto out_exit; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index e1bf5ca397fe..70a3a2bdf06c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,6 +24,8 @@ #include #include #include +#include + #ifdef CONFIG_HUGETLB_PAGE @@ -60,8 +62,11 @@ static unsigned nr_gpages; pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { - /* Only called for hugetlbfs pages, hence can ignore THP */ - return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL); + /* + * Only called for hugetlbfs pages, hence can ignore THP and the + * irq disabled walk. + */ + return __find_linux_pte(mm->pgd, addr, NULL, NULL); } static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, @@ -886,9 +891,8 @@ void flush_dcache_icache_hugepage(struct page *page) * This function need to be called with interrupts disabled. We use this variant * when we have MSR[EE] = 0 but the paca->soft_enabled = 1 */ - -pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *shift) +pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hpage_shift) { pgd_t pgd, *pgdp; pud_t pud, *pudp; @@ -897,8 +901,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, hugepd_t *hpdp = NULL; unsigned pdshift = PGDIR_SHIFT; - if (shift) - *shift = 0; + if (hpage_shift) + *hpage_shift = 0; if (is_thp) *is_thp = false; @@ -968,11 +972,11 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: - if (shift) - *shift = pdshift; + if (hpage_shift) + *hpage_shift = pdshift; return ret_pte; } -EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte); +EXPORT_SYMBOL_GPL(__find_linux_pte); int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index b5b0fb97b9c0..71cb2742bd16 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -29,6 +29,8 @@ #include #include #include +#include + #include @@ -207,8 +209,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, local_irq_save(flags); arch_enter_lazy_mmu_mode(); for (; start < end; start += PAGE_SIZE) { - pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, &is_thp, - &hugepage_shift); + pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp, + &hugepage_shift); unsigned long pte; if (ptep == NULL) diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 0fc26714780a..0af051a1974e 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -22,6 +22,7 @@ #ifdef CONFIG_PPC64 #include "../kernel/ppc32.h" #endif +#include /* @@ -127,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb) return -EFAULT; local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(pgdir, addr, NULL, &shift); + ptep = find_current_mm_pte(pgdir, addr, NULL, &shift); if (!ptep) goto err_out; if (!shift) -- cgit v1.2.3-58-ga151 From fa4531f753f1c80d21b5eb86ec5c0229310c5fb0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 27 Jul 2017 11:54:54 +0530 Subject: powerpc/mm: Don't send IPI to all cpus on THP updates Now that we made sure that lockless walk of linux page table is mostly limitted to current task(current->mm->pgdir) we can update the THP update sequence to only send IPI to CPUs on which this task has run. This helps in reducing the IPI overload on systems with large number of CPUs. WRT kvm even though kvm is walking page table with vpc->arch.pgdir, it is done only on secondary CPUs and in that case we have primary CPU added to task's mm cpumask. Sending an IPI to primary will force the secondary to do a vm exit and hence this mm cpumask usage is safe here. WRT CAPI, we still end up walking linux page table with capi context MM. For now the pte lookup serialization sends an IPI to all CPUs in CPI is in use. We can further improve this by adding the CAPI interrupt handling CPU to task mm cpumask. That will be done in a later patch. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + arch/powerpc/mm/pgtable-book3s64.c | 32 +++++++++++++++++++++++++++- arch/powerpc/mm/pgtable-hash64.c | 8 +++---- arch/powerpc/mm/pgtable-radix.c | 8 +++---- 4 files changed, 40 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1071d52f382d..a43c60aa1c2c 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1165,6 +1165,7 @@ static inline bool arch_needs_pgtable_deposit(void) return false; return true; } +extern void serialize_against_pte_lookup(struct mm_struct *mm); static inline pmd_t pmd_mkdevmap(pmd_t pmd) diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 31eed8fa8e99..57b947cde2bf 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -64,6 +65,35 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, trace_hugepage_set_pmd(addr, pmd_val(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } + +static void do_nothing(void *unused) +{ + +} +/* + * Serialize against find_current_mm_pte which does lock-less + * lookup in page tables with local interrupts disabled. For huge pages + * it casts pmd_t to pte_t. Since format of pte_t is different from + * pmd_t we want to prevent transit from pmd pointing to page table + * to pmd pointing to huge page (and back) while interrupts are disabled. + * We clear pmd to possibly replace it with page table pointer in + * different code paths. So make sure we wait for the parallel + * find_current_mm_pte to finish. + */ +void serialize_against_pte_lookup(struct mm_struct *mm) +{ + smp_mb(); + /* + * Cxl fault handling requires us to do a lockless page table + * walk while inserting hash page table entry with mm tracked + * in cxl context. Hence we need to do a global flush. + */ + if (cxl_ctx_in_use()) + smp_call_function(do_nothing, NULL, 1); + else + smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1); +} + /* * We use this to invalidate a pmdp entry before switching from a * hugepte to regular pmd entry. @@ -77,7 +107,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. */ - kick_all_cpus_sync(); + serialize_against_pte_lookup(vma->vm_mm); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index 443a2c66a304..c0a7372bdaa6 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -239,7 +239,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres * by sending an IPI to all the cpus and executing a dummy * function there. */ - kick_all_cpus_sync(); + serialize_against_pte_lookup(vma->vm_mm); /* * Now invalidate the hpte entries in the range * covered by pmd. This make sure we take a @@ -380,16 +380,16 @@ pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, */ memset(pgtable, 0, PTE_FRAG_SIZE); /* - * Serialize against find_linux_pte_or_hugepte which does lock-less + * Serialize against find_current_mm_pte variants which does lock-less * lookup in page tables with local interrupts disabled. For huge pages * it casts pmd_t to pte_t. Since format of pte_t is different from * pmd_t we want to prevent transit from pmd pointing to page table * to pmd pointing to huge page (and back) while interrupts are disabled. * We clear pmd to possibly replace it with page table pointer in * different code paths. So make sure we wait for the parallel - * find_linux_pte_or_hugepage to finish. + * find_curren_mm_pte to finish. */ - kick_all_cpus_sync(); + serialize_against_pte_lookup(mm); return old_pmd; } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 5d8be076f8e5..1ee5452daf3a 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -811,7 +811,7 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre pmd_clear(pmdp); /*FIXME!! Verify whether we need this kick below */ - kick_all_cpus_sync(); + serialize_against_pte_lookup(vma->vm_mm); radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); @@ -873,16 +873,16 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0); old_pmd = __pmd(old); /* - * Serialize against find_linux_pte_or_hugepte which does lock-less + * Serialize against find_current_mm_pte which does lock-less * lookup in page tables with local interrupts disabled. For huge pages * it casts pmd_t to pte_t. Since format of pte_t is different from * pmd_t we want to prevent transit from pmd pointing to page table * to pmd pointing to huge page (and back) while interrupts are disabled. * We clear pmd to possibly replace it with page table pointer in * different code paths. So make sure we wait for the parallel - * find_linux_pte_or_hugepage to finish. + * find_current_mm_pte to finish. */ - kick_all_cpus_sync(); + serialize_against_pte_lookup(mm); return old_pmd; } -- cgit v1.2.3-58-ga151 From 43ed84a891b70165a621a5c92196949efd57be39 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:27:58 +1000 Subject: powerpc/mm: Move pgdir setting into a helper Makes switch_mm_irqs_off() a bit more readable Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 2ab328f0159e..992123e4e7f6 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -77,6 +77,26 @@ extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); +#if defined(CONFIG_PPC32) +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) +{ + /* 32-bit keeps track of the current PGDIR in the thread struct */ + tsk->thread.pgdir = mm->pgd; +} +#elif defined(CONFIG_PPC_BOOK3E_64) +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) +{ + /* 64-bit Book3E keeps track of current PGD in the PACA */ + get_paca()->pgd = mm->pgd; +} +#else +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) { } +#endif + + /* * switch_mm is the entry point called from the architecture independent * code in kernel/sched/core.c @@ -111,15 +131,9 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, new_on_cpu = true; } - /* 32-bit keeps track of the current PGDIR in the thread struct */ -#ifdef CONFIG_PPC32 - tsk->thread.pgdir = next->pgd; -#endif /* CONFIG_PPC32 */ + /* Some subarchs need to track the PGD elsewhere */ + switch_mm_pgdir(tsk, next); - /* 64-bit Book3E keeps track of current PGD in the PACA */ -#ifdef CONFIG_PPC_BOOK3E_64 - get_paca()->pgd = next->pgd; -#endif /* Nothing else to do if we aren't actually switching */ if (prev == next) return; -- cgit v1.2.3-58-ga151 From 058ccc3465d9b8fb34d59ca099a8a7ace1a62cdd Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:27:59 +1000 Subject: powerpc/mm: Avoid double irq save/restore in activate_mm It calls switch_mm() which already does the irq save/restore these days. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 992123e4e7f6..fb99c27bbf5e 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -176,11 +176,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - unsigned long flags; - - local_irq_save(flags); switch_mm(prev, next, current); - local_irq_restore(flags); } /* We don't currently use enter_lazy_tlb() for anything */ -- cgit v1.2.3-58-ga151 From a619e59c075c66e530a88e57b45bb0417e2f4fff Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:28:02 +1000 Subject: powerpc/mm: Optimize detection of thread local mm's Instead of comparing the whole CPU mask every time, let's keep a counter of how many bits are set in the mask. Thus testing for a local mm only requires testing if that counter is 1 and the current CPU bit is set in the mask. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 3 +++ arch/powerpc/include/asm/mmu_context.h | 9 +++++++++ arch/powerpc/include/asm/tlb.h | 11 ++++++++++- arch/powerpc/mm/mmu_context_book3s64.c | 2 ++ 4 files changed, 24 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 1a220cdff923..c3b00e8ff791 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -83,6 +83,9 @@ typedef struct { mm_context_id_t id; u16 user_psize; /* page size index */ + /* Number of bits in the mm_cpumask */ + atomic_t active_cpus; + /* NPU NMMU context */ struct npu_context *npu_context; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index fb99c27bbf5e..2338abf6101a 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -96,6 +96,14 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, struct mm_struct *mm) { } #endif +#ifdef CONFIG_PPC_BOOK3S_64 +static inline void inc_mm_active_cpus(struct mm_struct *mm) +{ + atomic_inc(&mm->context.active_cpus); +} +#else +static inline void inc_mm_active_cpus(struct mm_struct *mm) { } +#endif /* * switch_mm is the entry point called from the architecture independent @@ -110,6 +118,7 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + inc_mm_active_cpus(next); /* * This full barrier orders the store to the cpumask above vs diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 609557569f65..a7eabff27a0f 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -69,13 +69,22 @@ static inline int mm_is_core_local(struct mm_struct *mm) topology_sibling_cpumask(smp_processor_id())); } +#ifdef CONFIG_PPC_BOOK3S_64 +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + if (atomic_read(&mm->context.active_cpus) > 1) + return false; + return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); +} +#else /* CONFIG_PPC_BOOK3S_64 */ static inline int mm_is_thread_local(struct mm_struct *mm) { return cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())); } +#endif /* !CONFIG_PPC_BOOK3S_64 */ -#else +#else /* CONFIG_SMP */ static inline int mm_is_core_local(struct mm_struct *mm) { return 1; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 60188f4b3ecd..05e15386d4cb 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -170,6 +170,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) #ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(mm); #endif + atomic_set(&mm->context.active_cpus, 0); + return 0; } -- cgit v1.2.3-58-ga151 From 3a2df3798d4da2fc40052c25f0d9c687b1467d53 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:28:03 +1000 Subject: powerpc/mm: Make switch_mm_irqs_off() out of line It's too big to be inline, there is no reason to keep it that way. Signed-off-by: Benjamin Herrenschmidt [mpe: Rework to incorporate the comment changes via fixes branch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 89 +----------------------------- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/mmu_context.c | 99 ++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 88 deletions(-) create mode 100644 arch/powerpc/mm/mmu_context.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 2338abf6101a..309592589e30 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -77,93 +77,8 @@ extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); -#if defined(CONFIG_PPC32) -static inline void switch_mm_pgdir(struct task_struct *tsk, - struct mm_struct *mm) -{ - /* 32-bit keeps track of the current PGDIR in the thread struct */ - tsk->thread.pgdir = mm->pgd; -} -#elif defined(CONFIG_PPC_BOOK3E_64) -static inline void switch_mm_pgdir(struct task_struct *tsk, - struct mm_struct *mm) -{ - /* 64-bit Book3E keeps track of current PGD in the PACA */ - get_paca()->pgd = mm->pgd; -} -#else -static inline void switch_mm_pgdir(struct task_struct *tsk, - struct mm_struct *mm) { } -#endif - -#ifdef CONFIG_PPC_BOOK3S_64 -static inline void inc_mm_active_cpus(struct mm_struct *mm) -{ - atomic_inc(&mm->context.active_cpus); -} -#else -static inline void inc_mm_active_cpus(struct mm_struct *mm) { } -#endif - -/* - * switch_mm is the entry point called from the architecture independent - * code in kernel/sched/core.c - */ -static inline void switch_mm_irqs_off(struct mm_struct *prev, - struct mm_struct *next, - struct task_struct *tsk) -{ - bool new_on_cpu = false; - - /* Mark this context has been used on the new CPU */ - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - inc_mm_active_cpus(next); - - /* - * This full barrier orders the store to the cpumask above vs - * a subsequent operation which allows this CPU to begin loading - * translations for next. - * - * When using the radix MMU that operation is the load of the - * MMU context id, which is then moved to SPRN_PID. - * - * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or the store of paca->mm_ctx_id in - * copy_mm_to_paca(). - * - * On the read side the barrier is in pte_xchg(), which orders - * the store to the PTE vs the load of mm_cpumask. - */ - smp_mb(); - - new_on_cpu = true; - } - - /* Some subarchs need to track the PGD elsewhere */ - switch_mm_pgdir(tsk, next); - - /* Nothing else to do if we aren't actually switching */ - if (prev == next) - return; - - /* We must stop all altivec streams before changing the HW - * context - */ -#ifdef CONFIG_ALTIVEC - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile ("dssall"); -#endif /* CONFIG_ALTIVEC */ - - if (new_on_cpu) - radix_kvm_prefetch_workaround(next); - - /* - * The actual HW switching method differs between the various - * sub architectures. Out of line for now - */ - switch_mmu_context(prev, next, tsk); -} +extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk); static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index fa6a6ba34355..fb844d2f266e 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -8,7 +8,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ - init-common.o + init-common.o mmu_context.o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c new file mode 100644 index 000000000000..0f613bc63c50 --- /dev/null +++ b/arch/powerpc/mm/mmu_context.c @@ -0,0 +1,99 @@ +/* + * Common implementation of switch_mm_irqs_off + * + * Copyright IBM Corp. 2017 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include + +#include + +#if defined(CONFIG_PPC32) +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) +{ + /* 32-bit keeps track of the current PGDIR in the thread struct */ + tsk->thread.pgdir = mm->pgd; +} +#elif defined(CONFIG_PPC_BOOK3E_64) +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) +{ + /* 64-bit Book3E keeps track of current PGD in the PACA */ + get_paca()->pgd = mm->pgd; +} +#else +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) { } +#endif + +#ifdef CONFIG_PPC_BOOK3S_64 +static inline void inc_mm_active_cpus(struct mm_struct *mm) +{ + atomic_inc(&mm->context.active_cpus); +} +#else +static inline void inc_mm_active_cpus(struct mm_struct *mm) { } +#endif + +void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + bool new_on_cpu = false; + + /* Mark this context has been used on the new CPU */ + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + inc_mm_active_cpus(next); + + /* + * This full barrier orders the store to the cpumask above vs + * a subsequent operation which allows this CPU to begin loading + * translations for next. + * + * When using the radix MMU that operation is the load of the + * MMU context id, which is then moved to SPRN_PID. + * + * For the hash MMU it is either the first load from slb_cache + * in switch_slb(), and/or the store of paca->mm_ctx_id in + * copy_mm_to_paca(). + * + * On the read side the barrier is in pte_xchg(), which orders + * the store to the PTE vs the load of mm_cpumask. + */ + smp_mb(); + + new_on_cpu = true; + } + + /* Some subarchs need to track the PGD elsewhere */ + switch_mm_pgdir(tsk, next); + + /* Nothing else to do if we aren't actually switching */ + if (prev == next) + return; + + /* + * We must stop all altivec streams before changing the HW + * context + */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + asm volatile ("dssall"); + + if (new_on_cpu) + radix_kvm_prefetch_workaround(next); + + /* + * The actual HW switching method differs between the various + * sub architectures. Out of line for now + */ + switch_mmu_context(prev, next, tsk); +} + -- cgit v1.2.3-58-ga151 From 94a04bc25a2c6296bd0c5e82c10e8231c2b11f77 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 25 Aug 2017 14:30:33 +1000 Subject: KVM: PPC: Book3S HV: POWER9 does not require secondary thread management POWER9 CPUs have independent MMU contexts per thread, so KVM does not need to quiesce secondary threads, so the hwthread_req/hwthread_state protocol does not have to be used. So patch it away on POWER9, and patch away the branch from the Linux idle wakeup to kvm_start_guest that is never used. Add a warning and error out of kvmppc_grab_hwthread in case it is ever called on POWER9. This avoids a hwsync in the idle wakeup path on POWER9. Signed-off-by: Nicholas Piggin Acked-by: Paul Mackerras [mpe: Use WARN(...) instead of WARN_ON()/pr_err(...)] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kvm_book3s_asm.h | 4 ++++ arch/powerpc/kernel/idle_book3s.S | 35 +++++++++++++++++++++---------- arch/powerpc/kvm/book3s_hv.c | 13 +++++++++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 +++++++ 4 files changed, 48 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 7cea76f11c26..83596f32f50b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -104,6 +104,10 @@ struct kvmppc_host_state { u8 napping; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * hwthread_req/hwthread_state pair is used to pull sibling threads + * out of guest on pre-ISAv3.0B CPUs where threads share MMU. + */ u8 hwthread_req; u8 hwthread_state; u8 host_ipi; diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 516ebef905c0..294d024bfb61 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -242,13 +242,20 @@ enter_winkle: /* * r3 - PSSCR value corresponding to the requested stop state. */ -power_enter_stop: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ +power_enter_stop_kvm_rm: + /* + * This is currently unused because POWER9 KVM does not have to + * gather secondary threads into sibling mode, but the code is + * here in case that function is required. + * + * Tell KVM we're entering idle. + */ li r4,KVM_HWTHREAD_IN_IDLE /* DO THIS IN REAL MODE! See comment above. */ stb r4,HSTATE_HWTHREAD_STATE(r13) #endif +power_enter_stop: /* * Check if we are executing the lite variant with ESL=EC=0 */ @@ -411,6 +418,18 @@ pnv_powersave_wakeup_mce: b pnv_powersave_wakeup +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +kvm_start_guest_check: + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beqlr + b kvm_start_guest +#endif + /* * Called from reset vector for powersave wakeups. * cr3 - set to gt if waking up with partial/complete hypervisor state loss @@ -435,15 +454,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mr r3,r12 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - b kvm_start_guest -1: +BEGIN_FTR_SECTION + bl kvm_start_guest_check +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #endif /* Return SRR1 from power7_nap() */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0b436df746fc..8bad44b46dc8 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2111,6 +2111,15 @@ static int kvmppc_grab_hwthread(int cpu) struct paca_struct *tpaca; long timeout = 10000; + /* + * ISA v3.0 idle routines do not set hwthread_state or test + * hwthread_req, so they can not grab idle threads. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + WARN(1, "KVM: can not control sibling threads\n"); + return -EBUSY; + } + tpaca = &paca[cpu]; /* Ensure the thread won't go into the kernel if it wakes */ @@ -2145,10 +2154,12 @@ static void kvmppc_release_hwthread(int cpu) struct paca_struct *tpaca; tpaca = &paca[cpu]; - tpaca->kvm_hstate.hwthread_req = 0; tpaca->kvm_hstate.kvm_vcpu = NULL; tpaca->kvm_hstate.kvm_vcore = NULL; tpaca->kvm_hstate.kvm_split_mode = NULL; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + tpaca->kvm_hstate.hwthread_req = 0; + } static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index cb44065e2946..eacf3d06eb75 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -149,9 +149,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) subf r4, r4, r3 mtspr SPRN_DEC, r4 +BEGIN_FTR_SECTION /* hwthread_req may have got set by cede or no vcpu, so clear it */ li r0, 0 stb r0, HSTATE_HWTHREAD_REQ(r13) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* * For external interrupts we need to call the Linux @@ -314,6 +316,7 @@ kvm_novcpu_exit: * Relocation is off and most register values are lost. * r13 points to the PACA. * r3 contains the SRR1 wakeup value, SRR1 is trashed. + * This is not used by ISAv3.0B processors. */ .globl kvm_start_guest kvm_start_guest: @@ -432,6 +435,9 @@ kvm_secondary_got_guest: * While waiting we also need to check if we get given a vcpu to run. */ kvm_no_guest: +BEGIN_FTR_SECTION + twi 31,0,0 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r3, HSTATE_HWTHREAD_REQ(r13) cmpwi r3, 0 bne 53f @@ -2466,8 +2472,10 @@ kvm_do_nap: clrrdi r0, r0, 1 mtspr SPRN_CTRLT, r0 +BEGIN_FTR_SECTION li r0,1 stb r0,HSTATE_HWTHREAD_REQ(r13) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mfspr r5,SPRN_LPCR ori r5,r5,LPCR_PECE0 | LPCR_PECE1 BEGIN_FTR_SECTION -- cgit v1.2.3-58-ga151 From aafc8a8300ec0f20b99c72e5a10e22535cb99dd1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 29 Aug 2017 21:36:35 +1000 Subject: powerpc/64s: Move IDLE_STATE_ENTER_SEQ[_NORET] into idle_book3s.S This macro is only used in idle_book3s.S, move it in there and add a more descriptive comment. Signed-off-by: Nicholas Piggin [mpe: Split out of larger patch and write change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 16 ---------------- arch/powerpc/kernel/idle_book3s.S | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 8a174cba5567..eb43b5c3a7b5 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -101,20 +101,4 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) #endif -/* Idle state entry routines */ -#ifdef CONFIG_PPC_P7_NAP -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ - ptesync; \ - ld r0,0(r1); \ -236: cmpd cr0,r0,r0; \ - bne 236b; \ - IDLE_INST; \ - -#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ - IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - b . -#endif /* CONFIG_PPC_P7_NAP */ - #endif diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 4924647d964d..c3cc18573ca6 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -205,6 +205,23 @@ pnv_powersave_common: mtmsrd r7,0 bctr +/* + * This is the sequence required to execute idle instructions, as + * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. + */ +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ + IDLE_INST; + +#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ + IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + b . + .globl pnv_enter_arch207_idle_mode pnv_enter_arch207_idle_mode: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -- cgit v1.2.3-58-ga151 From 70412c55d419e971785094e9f7880fdbcd690520 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Aug 2017 14:27:19 +1000 Subject: powerpc/64: Fix watchdog configuration regressions This fixes a couple more bits of fallout from the new hard lockup watchdog patch. It restores the required hw_nmi_get_sample_period() function for the perf watchdog, and removes some function declarations on 64e that are only defined for 64s. This fixes the 64e build when the hardlockup detector is enabled. It restores the default behaviour of disabling the perf watchdog, and also fixes disabling the 64s watchdog when running as a guest. Fixes: 2104180a53 ("powerpc/64s: implement arch-specific hardlockup watchdog") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nmi.h | 3 +-- arch/powerpc/kernel/kvm.c | 7 ------- arch/powerpc/kernel/setup_64.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 6f8e79cd35d8..3760150a0ff0 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -1,9 +1,8 @@ #ifndef _ASM_NMI_H #define _ASM_NMI_H -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); - extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 1086ea37c832..9ad37f827a97 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,7 +25,6 @@ #include #include #include -#include /* hardlockup_detector_disable() */ #include #include @@ -719,12 +718,6 @@ static __init void kvm_free_tmp(void) static int __init kvm_guest_init(void) { - /* - * The hardlockup detector is likely to get false positives in - * KVM guests, so disable it by default. - */ - hardlockup_detector_disable(); - if (!kvm_para_available()) goto free_tmp; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7393bac3c7f4..b89c6aac48c9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -756,3 +756,31 @@ unsigned long memory_block_size_bytes(void) struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif + +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF +u64 hw_nmi_get_sample_period(int watchdog_thresh) +{ + return ppc_proc_freq * watchdog_thresh; +} +#endif + +/* + * The perf based hardlockup detector breaks PMU event based branches, so + * disable it by default. Book3S has a soft-nmi hardlockup detector based + * on the decrementer interrupt, so it does not suffer from this problem. + * + * It is likely to get false positives in VM guests, so disable it there + * by default too. + */ +static int __init disable_hardlockup_detector(void) +{ +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF + hardlockup_detector_disable(); +#else + if (firmware_has_feature(FW_FEATURE_LPAR)) + hardlockup_detector_disable(); +#endif + + return 0; +} +early_initcall(disable_hardlockup_detector); -- cgit v1.2.3-58-ga151 From a3b2cb30f252b21a6f962e0dd107c8b897ca65e4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 5 Jul 2017 13:56:25 +1000 Subject: powerpc: Do not call ppc_md.panic in fadump panic notifier If fadump is not registered, and no other crash or debug handlers are registered, the powerpc panic handler stops the guest before the generic panic code can push out debug information to the console. Currently, system reset injection causes the guest to silently stop. Stop calling ppc_md.panic in the panic notifier. crash_fadump already does rtas_os_term() to terminate the guest if fadump is registered. Remove ppc_md.panic. Move fadump panic notifier into fadump code. Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 1 - arch/powerpc/include/asm/setup.h | 1 - arch/powerpc/kernel/fadump.c | 22 ++++++++++++++++++++++ arch/powerpc/kernel/setup-common.c | 27 --------------------------- arch/powerpc/platforms/ps3/setup.c | 15 --------------- arch/powerpc/platforms/pseries/setup.c | 1 - 6 files changed, 22 insertions(+), 45 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index cd2fc1cc1cc7..73b92017b6d7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -76,7 +76,6 @@ struct machdep_calls { void __noreturn (*restart)(char *cmd); void __noreturn (*halt)(void); - void (*panic)(char *str); void (*cpu_die)(void); long (*time_init)(void); /* Optional, may be NULL */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 654d64c9f3ac..3a3fb0ca68f5 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -23,7 +23,6 @@ extern void reloc_got2(unsigned long); void check_for_initrd(void); void initmem_init(void); -void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index dc0c49cfd90a..dd2c85f0bd8d 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1446,6 +1446,25 @@ static void fadump_init_files(void) return; } +static int fadump_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + /* + * If firmware-assisted dump has been registered then trigger + * firmware-assisted dump and let firmware handle everything + * else. If this returns, then fadump was not registered, so + * go through the rest of the panic path. + */ + crash_fadump(NULL, ptr); + + return NOTIFY_DONE; +} + +static struct notifier_block fadump_panic_block = { + .notifier_call = fadump_panic_event, + .priority = INT_MIN /* may not return; must be done last */ +}; + /* * Prepare for firmware-assisted dump. */ @@ -1478,6 +1497,9 @@ int __init setup_fadump(void) init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); fadump_init_files(); + atomic_notifier_chain_register(&panic_notifier_list, + &fadump_panic_block); + return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index c8ea5bd28302..7de73589d8e2 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -704,30 +704,6 @@ int check_legacy_ioport(unsigned long base_port) } EXPORT_SYMBOL(check_legacy_ioport); -static int ppc_panic_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - /* - * If firmware-assisted dump has been registered then trigger - * firmware-assisted dump and let firmware handle everything else. - */ - crash_fadump(NULL, ptr); - ppc_md.panic(ptr); /* May not return */ - return NOTIFY_DONE; -} - -static struct notifier_block ppc_panic_block = { - .notifier_call = ppc_panic_event, - .priority = INT_MIN /* may not return; must be done last */ -}; - -void __init setup_panic(void) -{ - if (!ppc_md.panic) - return; - atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); -} - #ifdef CONFIG_CHECK_CACHE_COHERENCY /* * For platforms that have configurable cache-coherency. This function @@ -872,9 +848,6 @@ void __init setup_arch(char **cmdline_p) /* Probe the machine type, establish ppc_md. */ probe_machine(); - /* Setup panic notifier if requested by the platform. */ - setup_panic(); - /* * Configure ppc_md.power_save (ppc32 only, 64-bit machines do * it from their respective probe() function. diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 6244bc849469..9dabea6e1443 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -104,20 +104,6 @@ static void __noreturn ps3_halt(void) ps3_sys_manager_halt(); /* never returns */ } -static void ps3_panic(char *str) -{ - DBG("%s:%d %s\n", __func__, __LINE__, str); - - smp_send_stop(); - printk("\n"); - printk(" System does not reboot automatically.\n"); - printk(" Please press POWER button.\n"); - printk("\n"); - - while(1) - lv1_pause(1); -} - #if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \ defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE) static void __init prealloc(struct ps3_prealloc *p) @@ -269,7 +255,6 @@ define_machine(ps3) { .probe = ps3_probe, .setup_arch = ps3_setup_arch, .init_IRQ = ps3_init_IRQ, - .panic = ps3_panic, .get_boot_time = ps3_get_boot_time, .set_dabr = ps3_set_dabr, .calibrate_decr = ps3_calibrate_decr, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b5d86426e97b..b5b650910cf5 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -722,7 +722,6 @@ define_machine(pseries) { .pcibios_fixup = pSeries_final_fixup, .restart = rtas_restart, .halt = rtas_halt, - .panic = rtas_os_term, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, .set_rtc_time = rtas_set_rtc_time, -- cgit v1.2.3-58-ga151 From b746e3e01e70d23ef53dcde1203ab78a1b7ac514 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 19 Jul 2017 16:59:10 +1000 Subject: powerpc/powernv: Flush console before platform error reboot Unrecovered MCE and HMI errors are sent through a special restart OPAL call to log the platform error. The downside is that they don't go through normal Linux crash paths, so they don't give much information to the Linux console. Change this by providing a special crash function which does some of the console flushing from the panic() path before calling firmware to reboot. The downside of this is a little more code to execute before reaching the firmware reboot. However in practice, it's critical to get the Linux console messages output in order to debug a problem. So this is a desirable tradeoff. Note on the implementation: It is difficult to plumb a custom reboot handler into the panic path, because panic does a little bit too much work. For example, it will try to delay with the timebase, but that may be corrupted in some cases resulting in a hang without reaching the platform reboot. Another problem is that panic can invoke the crash dump code which is not what we want in the case of a hardware platform error. Long-term the best solution will be to rework the panic path so it can be suitable for this kind of panic, but for now we just duplicate a bit of the code. Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 2 +- arch/powerpc/platforms/powernv/opal-hmi.c | 22 ++------ arch/powerpc/platforms/powernv/opal.c | 89 ++++++++++++++++++------------- arch/powerpc/platforms/powernv/powernv.h | 2 + 4 files changed, 57 insertions(+), 58 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 97ff192f5cfb..726c23304a57 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -50,7 +50,7 @@ int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day, uint32_t hour_min); int64_t opal_cec_power_down(uint64_t request); int64_t opal_cec_reboot(void); -int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag); +int64_t opal_cec_reboot2(uint32_t reboot_type, const char *diag); int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset); int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset); int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 88f3c61eec95..d78fed728cdf 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -30,6 +30,8 @@ #include #include +#include "powernv.h" + static int opal_hmi_handler_nb_init; struct OpalHmiEvtNode { struct list_head list; @@ -267,8 +269,6 @@ static void hmi_event_handler(struct work_struct *work) spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); if (unrecoverable) { - int ret; - /* Pull all HMI events from OPAL before we panic. */ while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) { u32 type; @@ -284,23 +284,7 @@ static void hmi_event_handler(struct work_struct *work) print_hmi_event_info(hmi_evt); } - /* - * Unrecoverable HMI exception. We need to inform BMC/OCC - * about this error so that it can collect relevant data - * for error analysis before rebooting. - */ - ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, - "Unrecoverable HMI exception"); - if (ret == OPAL_UNSUPPORTED) { - pr_emerg("Reboot type %d not supported\n", - OPAL_REBOOT_PLATFORM_ERROR); - } - - /* - * Fall through and panic if opal_cec_reboot2() returns - * OPAL_UNSUPPORTED. - */ - panic("Unrecoverable HMI exception"); + pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception"); } } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 4448e458a797..d246f919cba2 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -26,6 +26,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -435,10 +439,55 @@ static int opal_recover_mce(struct pt_regs *regs, return recovered; } +void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) +{ + /* + * This is mostly taken from kernel/panic.c, but tries to do + * relatively minimal work. Don't use delay functions (TB may + * be broken), don't crash dump (need to set a firmware log), + * don't run notifiers. We do want to get some information to + * Linux console. + */ + console_verbose(); + bust_spinlocks(1); + pr_emerg("Hardware platform error: %s\n", msg); + if (regs) + show_regs(regs); + smp_send_stop(); + printk_safe_flush_on_panic(); + kmsg_dump(KMSG_DUMP_PANIC); + bust_spinlocks(0); + debug_locks_off(); + console_flush_on_panic(); + + /* + * Don't bother to shut things down because this will + * xstop the system. + */ + if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg) + == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported for %s\n", + OPAL_REBOOT_PLATFORM_ERROR, msg); + } + + /* + * We reached here. There can be three possibilities: + * 1. We are running on a firmware level that do not support + * opal_cec_reboot2() + * 2. We are running on a firmware level that do not support + * OPAL_REBOOT_PLATFORM_ERROR reboot type. + * 3. We are running on FSP based system that does not need + * opal to trigger checkstop explicitly for error analysis. + * The FSP PRD component would have already got notified + * about this error through other channels. + */ + + ppc_md.restart(NULL); +} + int opal_machine_check(struct pt_regs *regs) { struct machine_check_event evt; - int ret; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return 0; @@ -454,43 +503,7 @@ int opal_machine_check(struct pt_regs *regs) if (opal_recover_mce(regs, &evt)) return 1; - /* - * Unrecovered machine check, we are heading to panic path. - * - * We may have hit this MCE in very early stage of kernel - * initialization even before opal-prd has started running. If - * this is the case then this MCE error may go un-noticed or - * un-analyzed if we go down panic path. We need to inform - * BMC/OCC about this error so that they can collect relevant - * data for error analysis before rebooting. - * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so. - * This function may not return on BMC based system. - */ - ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, - "Unrecoverable Machine Check exception"); - if (ret == OPAL_UNSUPPORTED) { - pr_emerg("Reboot type %d not supported\n", - OPAL_REBOOT_PLATFORM_ERROR); - } - - /* - * We reached here. There can be three possibilities: - * 1. We are running on a firmware level that do not support - * opal_cec_reboot2() - * 2. We are running on a firmware level that do not support - * OPAL_REBOOT_PLATFORM_ERROR reboot type. - * 3. We are running on FSP based system that does not need opal - * to trigger checkstop explicitly for error analysis. The FSP - * PRD component would have already got notified about this - * error through other channels. - * - * If hardware marked this as an unrecoverable MCE, we are - * going to panic anyway. Even if it didn't, it's not safe to - * continue at this point, so we should explicitly panic. - */ - - panic("PowerNV Unrecovered Machine Check"); - return 0; + pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception"); } /* Early hmi handler called in real mode. */ diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 6dbc0a1da1f6..a159d48573d7 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -7,6 +7,8 @@ extern void pnv_smp_init(void); static inline void pnv_smp_init(void) { } #endif +extern void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) __noreturn; + struct pci_dev; #ifdef CONFIG_PCI -- cgit v1.2.3-58-ga151 From 6fcd6baa90aeec9dcbe30786e15c125bf50503b2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 19 Jul 2017 16:59:11 +1000 Subject: powerpc/powernv: Use kernel crash path for machine checks There are quite a few machine check exceptions that can be caused by kernel bugs. To make debugging easier, use the kernel crash path in cases of synchronous machine checks that occur in kernel mode, if that would not result in the machine going straight to panic or crash dump. There is a downside here that die()ing the process in kernel mode can still leave the system unstable. panic_on_oops will always force the system to fail-stop, so systems where that behaviour is important will still do the right thing. As a test, when triggering an i-side 0111b error (ifetch from foreign address) in kernel mode process context on POWER9, the kernel currently dies quickly like this: Severe Machine check interrupt [Not recovered] NIP [ffff000000000000]: 0xffff000000000000 Initiator: CPU Error type: Real address [Instruction fetch (foreign)] [ 127.426651616,0] OPAL: Reboot requested due to Platform error. Effective[ 127.426693712,3] OPAL: Reboot requested due to Platform error. address: ffff000000000000 opal: Reboot type 1 not supported Kernel panic - not syncing: PowerNV Unrecovered Machine Check CPU: 56 PID: 4425 Comm: syscall Tainted: G M 4.12.0-rc1-13857-ga4700a261072-dirty #35 Call Trace: [ 128.017988928,4] IPMI: BUG: Dropping ESEL on the floor due to buggy/mising code in OPAL for this BMC Rebooting in 10 seconds.. Trying to free IRQ 496 from IRQ context! After this patch, the process is killed and the kernel continues with this message, which gives enough information to identify the offending branch (i.e., with CFAR): Severe Machine check interrupt [Not recovered] NIP [ffff000000000000]: 0xffff000000000000 Initiator: CPU Error type: Real address [Instruction fetch (foreign)] Effective address: ffff000000000000 Oops: Machine check, sig: 7 [#1] SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 ... CPU: 22 PID: 4436 Comm: syscall Tainted: G M 4.12.0-rc1-13857-ga4700a261072-dirty #36 task: c000000932300000 task.stack: c000000932380000 NIP: ffff000000000000 LR: 00000000217706a4 CTR: ffff000000000000 REGS: c00000000fc8fd80 TRAP: 0200 Tainted: G M (4.12.0-rc1-13857-ga4700a261072-dirty) MSR: 90000000001c1003 CR: 24000484 XER: 20000000 CFAR: c000000000004c80 DAR: 0000000021770a90 DSISR: 0a000000 SOFTE: 1 GPR00: 0000000000001ebe 00007fffce4818b0 0000000021797f00 0000000000000000 GPR04: 00007fff8007ac24 0000000044000484 0000000000004000 00007fff801405e8 GPR08: 900000000280f033 0000000024000484 0000000000000000 0000000000000030 GPR12: 9000000000001003 00007fff801bc370 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR28: 00007fff801b0000 0000000000000000 00000000217707a0 00007fffce481918 NIP [ffff000000000000] 0xffff000000000000 LR [00000000217706a4] 0x217706a4 Call Trace: Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/include/asm/fadump.h | 2 ++ arch/powerpc/kernel/fadump.c | 9 ++++++++- arch/powerpc/kernel/traps.c | 22 ++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 32 ++++++++++++++++++++++++++------ 5 files changed, 59 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 87fcc1948817..7ee763d3bea9 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,6 +133,7 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void die(const char *, struct pt_regs *, long); +extern bool die_will_crash(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index ce88bbe1d809..5a23010af600 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -209,11 +209,13 @@ extern int early_init_dt_scan_fw_dump(unsigned long node, extern int fadump_reserve_mem(void); extern int setup_fadump(void); extern int is_fadump_active(void); +extern int should_fadump_crash(void); extern void crash_fadump(struct pt_regs *, const char *); extern void fadump_cleanup(void); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } +static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } #endif #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index dd2c85f0bd8d..e1431800bfb9 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -125,6 +125,13 @@ int is_fadump_boot_memory_area(u64 addr, ulong size) return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size; } +int should_fadump_crash(void) +{ + if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) + return 0; + return 1; +} + int is_fadump_active(void) { return fw_dump.dump_active; @@ -518,7 +525,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) struct fadump_crash_info_header *fdh = NULL; int old_cpu, this_cpu; - if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) + if (!should_fadump_crash()) return; /* diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5e4db2fc2add..6858d910b47e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -114,6 +114,28 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif +/* + * If oops/die is expected to crash the machine, return true here. + * + * This should not be expected to be 100% accurate, there may be + * notifiers registered or other unexpected conditions that may bring + * down the kernel. Or if the current process in the kernel is holding + * locks or has other critical state, the kernel may become effectively + * unusable anyway. + */ +bool die_will_crash(void) +{ + if (should_fadump_crash()) + return true; + if (kexec_should_crash(current)) + return true; + if (in_interrupt() || panic_on_oops || + !current->pid || is_global_init(current)) + return true; + + return false; +} + static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d246f919cba2..65c79ecf5a4d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "powernv.h" @@ -425,17 +426,36 @@ static int opal_recover_mce(struct pt_regs *regs, /* Fatal machine check */ pr_err("Machine check interrupt is fatal\n"); recovered = 0; - } else if ((evt->severity == MCE_SEV_ERROR_SYNC) && - (user_mode(regs) && !is_global_init(current))) { + } + + if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) { /* - * For now, kill the task if we have received exception when - * in userspace. + * Try to kill processes if we get a synchronous machine check + * (e.g., one caused by execution of this instruction). This + * will devolve into a panic if we try to kill init or are in + * an interrupt etc. * * TODO: Queue up this address for hwpoisioning later. + * TODO: This is not quite right for d-side machine + * checks ->nip is not necessarily the important + * address. */ - _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); - recovered = 1; + if ((user_mode(regs))) { + _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); + recovered = 1; + } else if (die_will_crash()) { + /* + * die() would kill the kernel, so better to go via + * the platform reboot code that will log the + * machine check. + */ + recovered = 0; + } else { + die("Machine check", regs, SIGBUS); + recovered = 1; + } } + return recovered; } -- cgit v1.2.3-58-ga151 From 8bae6a23198defc5576e4c6f1f97822883d705ae Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 29 Aug 2017 17:34:00 +1000 Subject: powerpc/eeh: Reduce to one the number of places where edev is allocated arch/powerpc/kernel/eeh_dev.c:57 is the only legit place where edev is allocated; other 2 places allocate it on stack and in the heap for a very short period of time to use eeh_pe_get() as takes edev. This changes eeh_pe_get() to receive required parameters explicitly. This removes unnecessary temporary allocation of edev. This uses the "pe_no" name instead of the "pe_config_addr" name as it actually is a PE number and not a config space address as it seemed. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Andrew Donnellan Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 3 ++- arch/powerpc/kernel/eeh_pe.c | 32 ++++++++++++++++++---------- arch/powerpc/platforms/powernv/eeh-powernv.c | 15 ++----------- 3 files changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 8e37b71674f4..26a6a43f8799 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -262,7 +262,8 @@ typedef void *(*eeh_traverse_func)(void *data, void *flag); void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); -struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); +struct eeh_pe *eeh_pe_get(struct pci_controller *phb, + int pe_no, int config_addr); int eeh_add_to_parent_pe(struct eeh_dev *edev); int eeh_rmv_from_parent_pe(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index cc4b206f77e4..84d79f3da7d6 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -230,10 +230,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, * Bus/Device/Function number. The extra data referred by flag * indicates which type of address should be used. */ +struct eeh_pe_get_flag { + int pe_no; + int config_addr; +}; + static void *__eeh_pe_get(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - struct eeh_dev *edev = (struct eeh_dev *)flag; + struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag; /* Unexpected PHB PE */ if (pe->type & EEH_PE_PHB) @@ -244,17 +249,17 @@ static void *__eeh_pe_get(void *data, void *flag) * have non-zero PE address */ if (eeh_has_flag(EEH_VALID_PE_ZERO)) { - if (edev->pe_config_addr == pe->addr) + if (tmp->pe_no == pe->addr) return pe; } else { - if (edev->pe_config_addr && - (edev->pe_config_addr == pe->addr)) + if (tmp->pe_no && + (tmp->pe_no == pe->addr)) return pe; } /* Try BDF address */ - if (edev->config_addr && - (edev->config_addr == pe->config_addr)) + if (tmp->config_addr && + (tmp->config_addr == pe->config_addr)) return pe; return NULL; @@ -262,7 +267,9 @@ static void *__eeh_pe_get(void *data, void *flag) /** * eeh_pe_get - Search PE based on the given address - * @edev: EEH device + * @phb: PCI controller + * @pe_no: PE number + * @config_addr: Config address * * Search the corresponding PE based on the specified address which * is included in the eeh device. The function is used to check if @@ -271,12 +278,14 @@ static void *__eeh_pe_get(void *data, void *flag) * which is composed of PCI bus/device/function number, or unified * PE address. */ -struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) +struct eeh_pe *eeh_pe_get(struct pci_controller *phb, + int pe_no, int config_addr) { - struct eeh_pe *root = eeh_phb_pe_get(edev->phb); + struct eeh_pe *root = eeh_phb_pe_get(phb); + struct eeh_pe_get_flag tmp = { pe_no, config_addr }; struct eeh_pe *pe; - pe = eeh_pe_traverse(root, __eeh_pe_get, edev); + pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp); return pe; } @@ -344,7 +353,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(edev); + pe = eeh_pe_get(edev->pdn->phb, edev->pe_config_addr, + edev->config_addr); if (pe && !(pe->type & EEH_PE_INVALID)) { /* Mark the PE as type of PCI bus */ pe->type = EEH_PE_BUS; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 3f48f6df1cf3..ac8c01cd251c 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -113,7 +113,6 @@ static ssize_t pnv_eeh_ei_write(struct file *filp, size_t count, loff_t *ppos) { struct pci_controller *hose = filp->private_data; - struct eeh_dev *edev; struct eeh_pe *pe; int pe_no, type, func; unsigned long addr, mask; @@ -135,13 +134,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp, return -EINVAL; /* Retrieve PE */ - edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) - return -ENOMEM; - edev->phb = hose; - edev->pe_config_addr = pe_no; - pe = eeh_pe_get(edev); - kfree(edev); + pe = eeh_pe_get(hose, pe_no, 0); if (!pe) return -ENODEV; @@ -1381,7 +1374,6 @@ static int pnv_eeh_get_pe(struct pci_controller *hose, struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *pnv_pe; struct eeh_pe *dev_pe; - struct eeh_dev edev; /* * If PHB supports compound PE, to fetch @@ -1397,10 +1389,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose, } /* Find the PE according to PE# */ - memset(&edev, 0, sizeof(struct eeh_dev)); - edev.phb = hose; - edev.pe_config_addr = pe_no; - dev_pe = eeh_pe_get(&edev); + dev_pe = eeh_pe_get(hose, pe_no, 0); if (!dev_pe) return -EEXIST; -- cgit v1.2.3-58-ga151 From 69672bd7489f8a995e9cb89655dc1dcee555dadb Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 29 Aug 2017 17:34:01 +1000 Subject: powerpc/eeh: Remove unnecessary pointer to phb from eeh_dev The eeh_dev struct already holds a pointer to pci_dn which it does not exist without and pci_dn itself holds the very same pointer so just use it. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 - arch/powerpc/kernel/eeh.c | 7 +++---- arch/powerpc/kernel/eeh_dev.c | 2 -- arch/powerpc/kernel/eeh_driver.c | 2 +- arch/powerpc/kernel/eeh_pe.c | 24 +++++++++++++----------- arch/powerpc/platforms/powernv/eeh-powernv.c | 5 ++--- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- 7 files changed, 20 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 26a6a43f8799..777d37aa0a7f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -141,7 +141,6 @@ struct eeh_dev { struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ struct list_head rmv_list; /* Record the removed edevs */ - struct pci_controller *phb; /* Associated PHB */ struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ bool in_error; /* Error flag for edev */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 5e6887c40528..66c98c158ee3 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -170,10 +170,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) char buffer[128]; n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", - edev->phb->global_number, pdn->busno, + pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", - edev->phb->global_number, pdn->busno, + pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); @@ -1064,7 +1064,7 @@ core_initcall_sync(eeh_init); */ void eeh_add_device_early(struct pci_dn *pdn) { - struct pci_controller *phb; + struct pci_controller *phb = pdn ? pdn->phb : NULL; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); if (!edev) @@ -1074,7 +1074,6 @@ void eeh_add_device_early(struct pci_dn *pdn) return; /* USB Bus children of PCI devices will not have BUID's */ - phb = edev->phb; if (NULL == phb || (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) return; diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index d6b2ca70d14d..bdf4a3698a35 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -50,7 +50,6 @@ */ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) { - struct pci_controller *phb = pdn->phb; struct eeh_dev *edev; /* Allocate EEH device */ @@ -64,7 +63,6 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) /* Associate EEH device with OF node */ pdn->edev = edev; edev->pdn = pdn; - edev->phb = phb; INIT_LIST_HEAD(&edev->list); INIT_LIST_HEAD(&edev->rmv_list); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index c405c79e50cd..8b840191df59 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -428,7 +428,7 @@ static void *eeh_add_virt_device(void *data, void *userdata) if (!(edev->physfn)) { pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", - __func__, edev->phb->global_number, pdn->busno, + __func__, pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); return NULL; } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 84d79f3da7d6..419c3f07afd5 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -339,11 +339,12 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) int eeh_add_to_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n", - __func__, edev->config_addr, edev->phb->global_number); + __func__, edev->config_addr, pdn->phb->global_number); return -EINVAL; } @@ -353,7 +354,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(edev->pdn->phb, edev->pe_config_addr, + pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, edev->config_addr); if (pe && !(pe->type & EEH_PE_INVALID)) { /* Mark the PE as type of PCI bus */ @@ -363,7 +364,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Put the edev to PE */ list_add_tail(&edev->list, &pe->edevs); pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n", - edev->phb->global_number, + pdn->phb->global_number, edev->config_addr >> 8, PCI_SLOT(edev->config_addr & 0xFF), PCI_FUNC(edev->config_addr & 0xFF), @@ -386,7 +387,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device " "PE#%x, Parent PE#%x\n", - edev->phb->global_number, + pdn->phb->global_number, edev->config_addr >> 8, PCI_SLOT(edev->config_addr & 0xFF), PCI_FUNC(edev->config_addr & 0xFF), @@ -396,9 +397,9 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Create a new EEH PE */ if (edev->physfn) - pe = eeh_pe_alloc(edev->phb, EEH_PE_VF); + pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF); else - pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; @@ -414,10 +415,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) */ parent = eeh_pe_get_parent(edev); if (!parent) { - parent = eeh_phb_pe_get(edev->phb); + parent = eeh_phb_pe_get(pdn->phb); if (!parent) { pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", - __func__, edev->phb->global_number); + __func__, pdn->phb->global_number); edev->pe = NULL; kfree(pe); return -EEXIST; @@ -434,7 +435,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) edev->pe = pe; pr_debug("EEH: Add %04x:%02x:%02x.%01x to " "Device PE#%x, Parent PE#%x\n", - edev->phb->global_number, + pdn->phb->global_number, edev->config_addr >> 8, PCI_SLOT(edev->config_addr & 0xFF), PCI_FUNC(edev->config_addr & 0xFF), @@ -456,10 +457,11 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; int cnt; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); if (!edev->pe) { pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n", - __func__, edev->phb->global_number, + __func__, pdn->phb->global_number, edev->config_addr >> 8, PCI_SLOT(edev->config_addr & 0xFF), PCI_FUNC(edev->config_addr & 0xFF)); @@ -722,7 +724,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) return; pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n", - __func__, edev->phb->global_number, + __func__, pdn->phb->global_number, edev->config_addr >> 8, PCI_SLOT(edev->config_addr & 0xFF), PCI_FUNC(edev->config_addr & 0xFF)); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ac8c01cd251c..552b0cd4e8ba 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -926,7 +926,6 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, int pos, u16 mask) { - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); int i, status = 0; /* Wait for Transaction Pending bit to be cleared */ @@ -940,7 +939,7 @@ static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n", __func__, type, - edev->phb->global_number, pdn->busno, + pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); } @@ -1714,7 +1713,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) if (edev->physfn) { ret = pnv_eeh_restore_vf_config(pdn); } else { - phb = edev->phb->private_data; + phb = pdn->phb->private_data; ret = opal_pci_reinit(phb->opal_id, OPAL_REINIT_PCI_DEV, edev->config_addr); } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1eef46d9cf30..04c8c1827549 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -247,7 +247,7 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) /* Initialize the fake PE */ memset(&pe, 0, sizeof(struct eeh_pe)); - pe.phb = edev->phb; + pe.phb = pdn->phb; pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8); /* Enable EEH on the device */ -- cgit v1.2.3-58-ga151 From 405b33a76d26e426276c088b38dc7efba7538ff2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 29 Aug 2017 17:34:02 +1000 Subject: powerpc/eeh: Remove unnecessary config_addr from eeh_dev The eeh_dev struct hold a config space address of an associated node and the very same address is also stored in the pci_dn struct which is always present during the eeh_dev lifetime. This uses bus:devfn directly from pci_dn instead of cached and packed config_addr. Since config_addr is made from device's bus:dev.fn, there is no point in keeping it in the debugfs either so remove that too. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 - arch/powerpc/kernel/eeh_pe.c | 42 ++++++++++++++-------------- arch/powerpc/kernel/eeh_sysfs.c | 3 -- arch/powerpc/platforms/powernv/eeh-powernv.c | 9 +++--- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 -- 5 files changed, 26 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 777d37aa0a7f..9847ae3a12d1 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -131,7 +131,6 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe) struct eeh_dev { int mode; /* EEH mode */ int class_code; /* Class code of the device */ - int config_addr; /* Config address */ int pe_config_addr; /* PE config address */ u32 config_space[16]; /* Saved PCI config space */ int pcix_cap; /* Saved PCIx capability */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 419c3f07afd5..2e8d1b2b5af4 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -340,11 +340,12 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent; struct pci_dn *pdn = eeh_dev_to_pdn(edev); + int config_addr = (pdn->busno << 8) | (pdn->devfn); /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n", - __func__, edev->config_addr, pdn->phb->global_number); + __func__, config_addr, pdn->phb->global_number); return -EINVAL; } @@ -354,8 +355,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, - edev->config_addr); + pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr); if (pe && !(pe->type & EEH_PE_INVALID)) { /* Mark the PE as type of PCI bus */ pe->type = EEH_PE_BUS; @@ -365,10 +365,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) list_add_tail(&edev->list, &pe->edevs); pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n", pdn->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF), - pe->addr); + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn), + pe->addr); return 0; } else if (pe && (pe->type & EEH_PE_INVALID)) { list_add_tail(&edev->list, &pe->edevs); @@ -388,10 +388,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device " "PE#%x, Parent PE#%x\n", pdn->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF), - pe->addr, pe->parent->addr); + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn), + pe->addr, pe->parent->addr); return 0; } @@ -405,7 +405,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) return -ENOMEM; } pe->addr = edev->pe_config_addr; - pe->config_addr = edev->config_addr; + pe->config_addr = config_addr; /* * Put the new EEH PE into hierarchy tree. If the parent @@ -436,9 +436,9 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pr_debug("EEH: Add %04x:%02x:%02x.%01x to " "Device PE#%x, Parent PE#%x\n", pdn->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF), + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn), pe->addr, pe->parent->addr); return 0; @@ -462,9 +462,9 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) if (!edev->pe) { pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n", __func__, pdn->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF)); + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn)); return -EEXIST; } @@ -725,9 +725,9 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n", __func__, pdn->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF)); + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn)); /* Check slot status */ cap = edev->pcie_cap; diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 1ceecdda810b..797549289798 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -51,7 +51,6 @@ static ssize_t eeh_show_##_name(struct device *dev, \ static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); -EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); static ssize_t eeh_pe_state_show(struct device *dev, @@ -103,7 +102,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) return; rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state); @@ -128,7 +126,6 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) } device_remove_file(&pdev->dev, &dev_attr_eeh_mode); - device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 552b0cd4e8ba..8864065eba22 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -352,6 +352,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) struct eeh_dev *edev = pdn_to_eeh_dev(pdn); uint32_t pcie_flags; int ret; + int config_addr = (pdn->busno << 8) | (pdn->devfn); /* * When probing the root bridge, which doesn't have any @@ -386,8 +387,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) } } - edev->config_addr = (pdn->busno << 8) | (pdn->devfn); - edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr]; + edev->pe_config_addr = phb->ioda.pe_rmap[config_addr]; /* Create PE */ ret = eeh_add_to_parent_pe(edev); @@ -1699,6 +1699,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) struct eeh_dev *edev = pdn_to_eeh_dev(pdn); struct pnv_phb *phb; s64 ret; + int config_addr = (pdn->busno << 8) | (pdn->devfn); if (!edev) return -EEXIST; @@ -1715,12 +1716,12 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) } else { phb = pdn->phb->private_data; ret = opal_pci_reinit(phb->opal_id, - OPAL_REINIT_PCI_DEV, edev->config_addr); + OPAL_REINIT_PCI_DEV, config_addr); } if (ret) { pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", - __func__, edev->config_addr, ret); + __func__, config_addr, ret); return -EIO; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 04c8c1827549..6b812ad990e4 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -254,7 +254,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); if (!ret) { /* Retrieve PE address */ - edev->config_addr = (pdn->busno << 16) | (pdn->devfn << 8); edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); pe.addr = edev->pe_config_addr; @@ -279,7 +278,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) /* This device doesn't support EEH, but it may have an * EEH parent, in which case we mark it as supported. */ - edev->config_addr = pdn_to_eeh_dev(pdn->parent)->config_addr; edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr; eeh_add_to_parent_pe(edev); } -- cgit v1.2.3-58-ga151 From f1e08232ede8d1888d51e94940645f93a8462d75 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 29 Aug 2017 17:34:04 +1000 Subject: powerpc/pci: Remove OF node back pointer from pci_dn The check_req() helper uses pci_get_pdn() to get an OF node pointer. pci_get_pdn() returns a pci_dn pointer which either: 1) from the OF node returned by pci_device_to_OF_node(); 2) from the parent child_list where entries don't have OF node pointers. Since check_req() does not care about 2), it can call pci_device_to_OF_node() directly, hence the change. The find_pe_dn() helper uses embedded pci_dn to get an OF node which is also stored in edev->pdev so let's take a shortcut and call pci_device_to_OF_node() directly. With these 2 changes, we can finally get rid of the OF node back pointer. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 1 - arch/powerpc/kernel/pci_dn.c | 1 - arch/powerpc/platforms/pseries/msi.c | 11 ++--------- 3 files changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 56c67d3f0108..0b8aa1fe2d5f 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -195,7 +195,6 @@ struct pci_dn { struct pci_dn *parent; struct pci_controller *phb; /* for pci devices */ struct iommu_table_group *table_group; /* for phb's or bridges */ - struct device_node *node; /* back-pointer to the device_node */ int pci_ext_config_space; /* for pci devices */ diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index dfb107631116..0e395afbf0f4 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -293,7 +293,6 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, if (pdn == NULL) return NULL; dn->data = pdn; - pdn->node = dn; pdn->phb = hose; #ifdef CONFIG_PPC_POWERNV pdn->pe_number = IODA_INVALID_PE; diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 353912e700eb..b7496948129e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -132,15 +132,10 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev) static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) { struct device_node *dn; - struct pci_dn *pdn; const __be32 *p; u32 req_msi; - pdn = pci_get_pdn(pdev); - if (!pdn) - return -ENODEV; - - dn = pdn->node; + dn = pci_device_to_OF_node(pdev); p = of_get_property(dn, prop_name, NULL); if (!p) { @@ -197,7 +192,6 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) { struct device_node *dn; - struct pci_dn *pdn; struct eeh_dev *edev; /* Found our PE and assume 8 at that point. */ @@ -210,8 +204,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) edev = pdn_to_eeh_dev(PCI_DN(dn)); if (edev->pe) edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list); - pdn = eeh_dev_to_pdn(edev); - dn = pdn ? pdn->node : NULL; + dn = pci_device_to_OF_node(edev->pdev); if (!dn) return NULL; -- cgit v1.2.3-58-ga151 From 967689141eb37c4365eac0fac82d857773098475 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:31 -0700 Subject: powerpc/powernv/vas: Define macros, register fields and structures Define macros for the VAS hardware registers and bit-fields as well as couple of data structures needed by the VAS driver. Signed-off-by: Sukadev Bhattiprolu [mpe: Fixup include guard to use _ASM_POWERPC_VAS_H] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 45 +++++ arch/powerpc/platforms/powernv/vas.h | 382 +++++++++++++++++++++++++++++++++++ 2 files changed, 427 insertions(+) create mode 100644 arch/powerpc/include/asm/vas.h create mode 100644 arch/powerpc/platforms/powernv/vas.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h new file mode 100644 index 000000000000..9f81db68f27b --- /dev/null +++ b/arch/powerpc/include/asm/vas.h @@ -0,0 +1,45 @@ +/* + * Copyright 2016-17 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_VAS_H +#define _ASM_POWERPC_VAS_H + +/* + * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25 + * (Local FIFO Size Register) of the VAS workbook. + */ +#define VAS_RX_FIFO_SIZE_MIN (1 << 10) /* 1KB */ +#define VAS_RX_FIFO_SIZE_MAX (8 << 20) /* 8MB */ + +/* + * Threshold Control Mode: Have paste operation fail if the number of + * requests in receive FIFO exceeds a threshold. + * + * NOTE: No special error code yet if paste is rejected because of these + * limits. So users can't distinguish between this and other errors. + */ +#define VAS_THRESH_DISABLED 0 +#define VAS_THRESH_FIFO_GT_HALF_FULL 1 +#define VAS_THRESH_FIFO_GT_QTR_FULL 2 +#define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 + +/* + * Co-processor Engine type. + */ +enum vas_cop_type { + VAS_COP_TYPE_FAULT, + VAS_COP_TYPE_842, + VAS_COP_TYPE_842_HIPRI, + VAS_COP_TYPE_GZIP, + VAS_COP_TYPE_GZIP_HIPRI, + VAS_COP_TYPE_FTW, + VAS_COP_TYPE_MAX, +}; + +#endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h new file mode 100644 index 000000000000..abb545f97c77 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas.h @@ -0,0 +1,382 @@ +/* + * Copyright 2016-17 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _VAS_H +#define _VAS_H +#include +#include +#include + +/* + * Overview of Virtual Accelerator Switchboard (VAS). + * + * VAS is a hardware "switchboard" that allows senders and receivers to + * exchange messages with _minimal_ kernel involvment. The receivers are + * typically NX coprocessor engines that perform compression or encryption + * in hardware, but receivers can also be other software threads. + * + * Senders are user/kernel threads that submit compression/encryption or + * other requests to the receivers. Senders must format their messages as + * Coprocessor Request Blocks (CRB)s and submit them using the "copy" and + * "paste" instructions which were introduced in Power9. + * + * A Power node can have (upto?) 8 Power chips. There is one instance of + * VAS in each Power9 chip. Each instance of VAS has 64K windows or ports, + * Senders and receivers must each connect to a separate window before they + * can exchange messages through the switchboard. + * + * Each window is described by two types of window contexts: + * + * Hypervisor Window Context (HVWC) of size VAS_HVWC_SIZE bytes + * + * OS/User Window Context (UWC) of size VAS_UWC_SIZE bytes. + * + * A window context can be viewed as a set of 64-bit registers. The settings + * in these registers configure/control/determine the behavior of the VAS + * hardware when messages are sent/received through the window. The registers + * in the HVWC are configured by the kernel while the registers in the UWC can + * be configured by the kernel or by the user space application that is using + * the window. + * + * The HVWCs for all windows on a specific instance of VAS are in a contiguous + * range of hardware addresses or Base address region (BAR) referred to as the + * HVWC BAR for the instance. Similarly the UWCs for all windows on an instance + * are referred to as the UWC BAR for the instance. + * + * The two BARs for each instance are defined Power9 MMIO Ranges spreadsheet + * and available to the kernel in the VAS node's "reg" property in the device + * tree: + * + * /proc/device-tree/vasm@.../reg + * + * (see vas_probe() for details on the reg property). + * + * The kernel maps the HVWC and UWC BAR regions into the kernel address + * space (hvwc_map and uwc_map). The kernel can then access the window + * contexts of a specific window using: + * + * hvwc = hvwc_map + winid * VAS_HVWC_SIZE. + * uwc = uwc_map + winid * VAS_UWC_SIZE. + * + * where winid is the window index (0..64K). + * + * As mentioned, a window context is used to "configure" a window. Besides + * this configuration address, each _send_ window also has a unique hardware + * "paste" address that is used to submit requests/CRBs (see vas_paste_crb()). + * + * The hardware paste address for a window is computed using the "paste + * base address" and "paste win id shift" reg properties in the VAS device + * tree node using: + * + * paste_addr = paste_base + ((winid << paste_win_id_shift)) + * + * (again, see vas_probe() for ->paste_base_addr and ->paste_win_id_shift). + * + * The kernel maps this hardware address into the sender's address space + * after which they can use the 'paste' instruction (new in Power9) to + * send a message (submit a request aka CRB) to the coprocessor. + * + * NOTE: In the initial version, senders can only in-kernel drivers/threads. + * Support for user space threads will be added in follow-on patches. + * + * TODO: Do we need to map the UWC into user address space so they can return + * credits? Its NA for NX but may be needed for other receive windows. + * + */ + +#define VAS_WINDOWS_PER_CHIP (64 << 10) + +/* + * Hypervisor and OS/USer Window Context sizes + */ +#define VAS_HVWC_SIZE 512 +#define VAS_UWC_SIZE PAGE_SIZE + +/* + * Initial per-process credits. + * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED) + * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED) + * + * TODO: Needs tuning for per-process credits + */ +#define VAS_WCREDS_MIN 16 +#define VAS_WCREDS_MAX ((64 << 10) - 1) +#define VAS_WCREDS_DEFAULT (1 << 10) + +/* + * VAS Window Context Register Offsets and bitmasks. + * See Section 3.1.4 of VAS Work book + */ +#define VAS_LPID_OFFSET 0x010 +#define VAS_LPID PPC_BITMASK(0, 11) + +#define VAS_PID_OFFSET 0x018 +#define VAS_PID_ID PPC_BITMASK(0, 19) + +#define VAS_XLATE_MSR_OFFSET 0x020 +#define VAS_XLATE_MSR_DR PPC_BIT(0) +#define VAS_XLATE_MSR_TA PPC_BIT(1) +#define VAS_XLATE_MSR_PR PPC_BIT(2) +#define VAS_XLATE_MSR_US PPC_BIT(3) +#define VAS_XLATE_MSR_HV PPC_BIT(4) +#define VAS_XLATE_MSR_SF PPC_BIT(5) + +#define VAS_XLATE_LPCR_OFFSET 0x028 +#define VAS_XLATE_LPCR_PAGE_SIZE PPC_BITMASK(0, 2) +#define VAS_XLATE_LPCR_ISL PPC_BIT(3) +#define VAS_XLATE_LPCR_TC PPC_BIT(4) +#define VAS_XLATE_LPCR_SC PPC_BIT(5) + +#define VAS_XLATE_CTL_OFFSET 0x030 +#define VAS_XLATE_MODE PPC_BITMASK(0, 1) + +#define VAS_AMR_OFFSET 0x040 +#define VAS_AMR PPC_BITMASK(0, 63) + +#define VAS_SEIDR_OFFSET 0x048 +#define VAS_SEIDR PPC_BITMASK(0, 63) + +#define VAS_FAULT_TX_WIN_OFFSET 0x050 +#define VAS_FAULT_TX_WIN PPC_BITMASK(48, 63) + +#define VAS_OSU_INTR_SRC_RA_OFFSET 0x060 +#define VAS_OSU_INTR_SRC_RA PPC_BITMASK(8, 63) + +#define VAS_HV_INTR_SRC_RA_OFFSET 0x070 +#define VAS_HV_INTR_SRC_RA PPC_BITMASK(8, 63) + +#define VAS_PSWID_OFFSET 0x078 +#define VAS_PSWID_EA_HANDLE PPC_BITMASK(0, 31) + +#define VAS_SPARE1_OFFSET 0x080 +#define VAS_SPARE2_OFFSET 0x088 +#define VAS_SPARE3_OFFSET 0x090 +#define VAS_SPARE4_OFFSET 0x130 +#define VAS_SPARE5_OFFSET 0x160 +#define VAS_SPARE6_OFFSET 0x188 + +#define VAS_LFIFO_BAR_OFFSET 0x0A0 +#define VAS_LFIFO_BAR PPC_BITMASK(8, 53) +#define VAS_PAGE_MIGRATION_SELECT PPC_BITMASK(54, 56) + +#define VAS_LDATA_STAMP_CTL_OFFSET 0x0A8 +#define VAS_LDATA_STAMP PPC_BITMASK(0, 1) +#define VAS_XTRA_WRITE PPC_BIT(2) + +#define VAS_LDMA_CACHE_CTL_OFFSET 0x0B0 +#define VAS_LDMA_TYPE PPC_BITMASK(0, 1) +#define VAS_LDMA_FIFO_DISABLE PPC_BIT(2) + +#define VAS_LRFIFO_PUSH_OFFSET 0x0B8 +#define VAS_LRFIFO_PUSH PPC_BITMASK(0, 15) + +#define VAS_CURR_MSG_COUNT_OFFSET 0x0C0 +#define VAS_CURR_MSG_COUNT PPC_BITMASK(0, 7) + +#define VAS_LNOTIFY_AFTER_COUNT_OFFSET 0x0C8 +#define VAS_LNOTIFY_AFTER_COUNT PPC_BITMASK(0, 7) + +#define VAS_LRX_WCRED_OFFSET 0x0E0 +#define VAS_LRX_WCRED PPC_BITMASK(0, 15) + +#define VAS_LRX_WCRED_ADDER_OFFSET 0x190 +#define VAS_LRX_WCRED_ADDER PPC_BITMASK(0, 15) + +#define VAS_TX_WCRED_OFFSET 0x0F0 +#define VAS_TX_WCRED PPC_BITMASK(4, 15) + +#define VAS_TX_WCRED_ADDER_OFFSET 0x1A0 +#define VAS_TX_WCRED_ADDER PPC_BITMASK(4, 15) + +#define VAS_LFIFO_SIZE_OFFSET 0x100 +#define VAS_LFIFO_SIZE PPC_BITMASK(0, 3) + +#define VAS_WINCTL_OFFSET 0x108 +#define VAS_WINCTL_OPEN PPC_BIT(0) +#define VAS_WINCTL_REJ_NO_CREDIT PPC_BIT(1) +#define VAS_WINCTL_PIN PPC_BIT(2) +#define VAS_WINCTL_TX_WCRED_MODE PPC_BIT(3) +#define VAS_WINCTL_RX_WCRED_MODE PPC_BIT(4) +#define VAS_WINCTL_TX_WORD_MODE PPC_BIT(5) +#define VAS_WINCTL_RX_WORD_MODE PPC_BIT(6) +#define VAS_WINCTL_RSVD_TXBUF PPC_BIT(7) +#define VAS_WINCTL_THRESH_CTL PPC_BITMASK(8, 9) +#define VAS_WINCTL_FAULT_WIN PPC_BIT(10) +#define VAS_WINCTL_NX_WIN PPC_BIT(11) + +#define VAS_WIN_STATUS_OFFSET 0x110 +#define VAS_WIN_BUSY PPC_BIT(1) + +#define VAS_WIN_CTX_CACHING_CTL_OFFSET 0x118 +#define VAS_CASTOUT_REQ PPC_BIT(0) +#define VAS_PUSH_TO_MEM PPC_BIT(1) +#define VAS_WIN_CACHE_STATUS PPC_BIT(4) + +#define VAS_TX_RSVD_BUF_COUNT_OFFSET 0x120 +#define VAS_RXVD_BUF_COUNT PPC_BITMASK(58, 63) + +#define VAS_LRFIFO_WIN_PTR_OFFSET 0x128 +#define VAS_LRX_WIN_ID PPC_BITMASK(0, 15) + +/* + * Local Notification Control Register controls what happens in _response_ + * to a paste command and hence applies only to receive windows. + */ +#define VAS_LNOTIFY_CTL_OFFSET 0x138 +#define VAS_NOTIFY_DISABLE PPC_BIT(0) +#define VAS_INTR_DISABLE PPC_BIT(1) +#define VAS_NOTIFY_EARLY PPC_BIT(2) +#define VAS_NOTIFY_OSU_INTR PPC_BIT(3) + +#define VAS_LNOTIFY_PID_OFFSET 0x140 +#define VAS_LNOTIFY_PID PPC_BITMASK(0, 19) + +#define VAS_LNOTIFY_LPID_OFFSET 0x148 +#define VAS_LNOTIFY_LPID PPC_BITMASK(0, 11) + +#define VAS_LNOTIFY_TID_OFFSET 0x150 +#define VAS_LNOTIFY_TID PPC_BITMASK(0, 15) + +#define VAS_LNOTIFY_SCOPE_OFFSET 0x158 +#define VAS_LNOTIFY_MIN_SCOPE PPC_BITMASK(0, 1) +#define VAS_LNOTIFY_MAX_SCOPE PPC_BITMASK(2, 3) + +#define VAS_NX_UTIL_OFFSET 0x1B0 +#define VAS_NX_UTIL PPC_BITMASK(0, 63) + +/* SE: Side effects */ +#define VAS_NX_UTIL_SE_OFFSET 0x1B8 +#define VAS_NX_UTIL_SE PPC_BITMASK(0, 63) + +#define VAS_NX_UTIL_ADDER_OFFSET 0x180 +#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63) + +/* + * Local Notify Scope Control Register. (Receive windows only). + */ +enum vas_notify_scope { + VAS_SCOPE_LOCAL, + VAS_SCOPE_GROUP, + VAS_SCOPE_VECTORED_GROUP, + VAS_SCOPE_UNUSED, +}; + +/* + * Local DMA Cache Control Register (Receive windows only). + */ +enum vas_dma_type { + VAS_DMA_TYPE_INJECT, + VAS_DMA_TYPE_WRITE, +}; + +/* + * Local Notify Scope Control Register. (Receive windows only). + * Not applicable to NX receive windows. + */ +enum vas_notify_after_count { + VAS_NOTIFY_AFTER_256 = 0, + VAS_NOTIFY_NONE, + VAS_NOTIFY_AFTER_2 +}; + +/* + * One per instance of VAS. Each instance will have a separate set of + * receive windows, one per coprocessor type. + */ +struct vas_instance { + int vas_id; + struct ida ida; + struct list_head node; + struct platform_device *pdev; + + u64 hvwc_bar_start; + u64 uwc_bar_start; + u64 paste_base_addr; + u64 paste_win_id_shift; + + struct mutex mutex; + struct vas_window *rxwin[VAS_COP_TYPE_MAX]; + struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; +}; + +/* + * In-kernel state a VAS window. One per window. + */ +struct vas_window { + /* Fields common to send and receive windows */ + struct vas_instance *vinst; + int winid; + bool tx_win; /* True if send window */ + bool nx_win; /* True if NX window */ + bool user_win; /* True if user space window */ + void *hvwc_map; /* HV window context */ + void *uwc_map; /* OS/User window context */ + pid_t pid; /* Linux process id of owner */ + + /* Fields applicable only to send windows */ + void *paste_kaddr; + char *paste_addr_name; + struct vas_window *rxwin; + + /* Feilds applicable only to receive windows */ + enum vas_cop_type cop; + atomic_t num_txwins; +}; + +/* + * Container for the hardware state of a window. One per-window. + * + * A VAS Window context is a 512-byte area in the hardware that contains + * a set of 64-bit registers. Individual bit-fields in these registers + * determine the configuration/operation of the hardware. struct vas_winctx + * is a container for the register fields in the window context. + */ +struct vas_winctx { + void *rx_fifo; + int rx_fifo_size; + int wcreds_max; + int rsvd_txbuf_count; + + bool user_win; + bool nx_win; + bool fault_win; + bool rsvd_txbuf_enable; + bool pin_win; + bool rej_no_credit; + bool tx_wcred_mode; + bool rx_wcred_mode; + bool tx_word_mode; + bool rx_word_mode; + bool data_stamp; + bool xtra_write; + bool notify_disable; + bool intr_disable; + bool fifo_disable; + bool notify_early; + bool notify_os_intr_reg; + + int lpid; + int pidr; /* value from SPRN_PID, not linux pid */ + int lnotify_lpid; + int lnotify_pid; + int lnotify_tid; + u32 pswid; + int rx_win_id; + int fault_win_id; + int tc_mode; + + u64 irq_port; + + enum vas_dma_type dma_type; + enum vas_notify_scope min_scope; + enum vas_notify_scope max_scope; + enum vas_notify_after_count notify_after_count; +}; + +#endif /* _VAS_H */ -- cgit v1.2.3-58-ga151 From b6622a339e8670a1025d4dd84be473c76dabed33 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:32 -0700 Subject: powerpc/powernv: Move GET_FIELD/SET_FIELD to vas.h Move the GET_FIELD and SET_FIELD macros to vas.h as VAS and other users of VAS, including NX-842 can use those macros. There is a lot of related code between the VAS/NX kernel drivers and skiboot. For consistency, switch the order of parameters in SET_FIELD to match the order in skiboot. Signed-off-by: Sukadev Bhattiprolu Reviewed-by: Dan Streetman Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 8 ++++++++ drivers/crypto/nx/nx-842-powernv.c | 7 ++++--- drivers/crypto/nx/nx-842.h | 5 ----- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 9f81db68f27b..7dcf3bded343 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -29,6 +29,14 @@ #define VAS_THRESH_FIFO_GT_QTR_FULL 2 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 +/* + * Get/Set bit fields + */ +#define GET_FIELD(m, v) (((v) & (m)) >> MASK_LSH(m)) +#define MASK_LSH(m) (__builtin_ffsl(m) - 1) +#define SET_FIELD(m, v, val) \ + (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_LSH(m)) & (m))) + /* * Co-processor Engine type. */ diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c index 1710f80a09ec..3abb045cdba7 100644 --- a/drivers/crypto/nx/nx-842-powernv.c +++ b/drivers/crypto/nx/nx-842-powernv.c @@ -22,6 +22,7 @@ #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dan Streetman "); @@ -424,9 +425,9 @@ static int nx842_powernv_function(const unsigned char *in, unsigned int inlen, /* set up CCW */ ccw = 0; - ccw = SET_FIELD(ccw, CCW_CT, nx842_ct); - ccw = SET_FIELD(ccw, CCW_CI_842, 0); /* use 0 for hw auto-selection */ - ccw = SET_FIELD(ccw, CCW_FC_842, fc); + ccw = SET_FIELD(CCW_CT, ccw, nx842_ct); + ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */ + ccw = SET_FIELD(CCW_FC_842, ccw, fc); /* set up CRB's CSB addr */ csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS; diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h index a4eee3bba937..30929bd7d1a9 100644 --- a/drivers/crypto/nx/nx-842.h +++ b/drivers/crypto/nx/nx-842.h @@ -100,11 +100,6 @@ static inline unsigned long nx842_get_pa(void *addr) return page_to_phys(vmalloc_to_page(addr)) + offset_in_page(addr); } -/* Get/Set bit fields */ -#define MASK_LSH(m) (__builtin_ffsl(m) - 1) -#define GET_FIELD(v, m) (((v) & (m)) >> MASK_LSH(m)) -#define SET_FIELD(v, m, val) (((v) & ~(m)) | (((val) << MASK_LSH(m)) & (m))) - /** * This provides the driver's constraints. Different nx842 implementations * may have varying requirements. The constraints are: -- cgit v1.2.3-58-ga151 From 62c4eda4fabe89709ec43dcf1efe9fbea007a734 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:37 -0700 Subject: powerpc/powernv/vas: Define vas_rx_win_open() interface Define the vas_rx_win_open() interface. This interface is intended to be used by the Nest Accelerator (NX) driver(s) to setup receive windows for one or more NX engines (which implement compression & encryption algorithms in the hardware). Follow-on patches will provide an interface to close the window and to open a send window that kernel subsystems can use to access the NX engines. The interface to open a receive window is expected to be invoked for each instance of VAS in the system. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 45 +++++ arch/powerpc/platforms/powernv/vas-window.c | 283 +++++++++++++++++++++++++++- arch/powerpc/platforms/powernv/vas.h | 14 ++ 3 files changed, 341 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 7dcf3bded343..18c59c3ade3d 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -50,4 +50,49 @@ enum vas_cop_type { VAS_COP_TYPE_MAX, }; +/* + * Receive window attributes specified by the (in-kernel) owner of window. + */ +struct vas_rx_win_attr { + void *rx_fifo; + int rx_fifo_size; + int wcreds_max; + + bool pin_win; + bool rej_no_credit; + bool tx_wcred_mode; + bool rx_wcred_mode; + bool tx_win_ord_mode; + bool rx_win_ord_mode; + bool data_stamp; + bool nx_win; + bool fault_win; + bool user_win; + bool notify_disable; + bool intr_disable; + bool notify_early; + + int lnotify_lpid; + int lnotify_pid; + int lnotify_tid; + u32 pswid; + + int tc_mode; +}; + +/* + * Helper to initialize receive window attributes to defaults for an + * NX window. + */ +void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop); + +/* + * Open a VAS receive window for the instance of VAS identified by @vasid + * Use @attr to initialize the attributes of the window. + * + * Return a handle to the window or ERR_PTR() on error. + */ +struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, + struct vas_rx_win_attr *attr); + #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index bfc9dba24b65..783c8c6585d4 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "vas.h" @@ -531,7 +533,7 @@ void vas_window_free(struct vas_window *window) vas_release_window_id(&vinst->ida, winid); } -struct vas_window *vas_window_alloc(struct vas_instance *vinst) +static struct vas_window *vas_window_alloc(struct vas_instance *vinst) { int winid; struct vas_window *window; @@ -558,6 +560,285 @@ out_free: return ERR_PTR(-ENOMEM); } +/* + * Get the VAS receive window associated with NX engine identified + * by @cop and if applicable, @pswid. + * + * See also function header of set_vinst_win(). + */ +struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, + enum vas_cop_type cop, u32 pswid) +{ + struct vas_window *rxwin; + + mutex_lock(&vinst->mutex); + + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) + rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL); + else + rxwin = ERR_PTR(-EINVAL); + + if (!IS_ERR(rxwin)) + atomic_inc(&rxwin->num_txwins); + + mutex_unlock(&vinst->mutex); + + return rxwin; +} + +/* + * We have two tables of windows in a VAS instance. The first one, + * ->windows[], contains all the windows in the instance and allows + * looking up a window by its id. It is used to look up send windows + * during fault handling and receive windows when pairing user space + * send/receive windows. + * + * The second table, ->rxwin[], contains receive windows that are + * associated with NX engines. This table has VAS_COP_TYPE_MAX + * entries and is used to look up a receive window by its + * coprocessor type. + * + * Here, we save @window in the ->windows[] table. If it is a receive + * window, we also save the window in the ->rxwin[] table. + */ +static void set_vinst_win(struct vas_instance *vinst, + struct vas_window *window) +{ + int id = window->winid; + + mutex_lock(&vinst->mutex); + + /* + * There should only be one receive window for a coprocessor type + * unless its a user (FTW) window. + */ + if (!window->user_win && !window->tx_win) { + WARN_ON_ONCE(vinst->rxwin[window->cop]); + vinst->rxwin[window->cop] = window; + } + + WARN_ON_ONCE(vinst->windows[id] != NULL); + vinst->windows[id] = window; + + mutex_unlock(&vinst->mutex); +} + +/* + * Clear this window from the table(s) of windows for this VAS instance. + * See also function header of set_vinst_win(). + */ +void clear_vinst_win(struct vas_window *window) +{ + int id = window->winid; + struct vas_instance *vinst = window->vinst; + + mutex_lock(&vinst->mutex); + + if (!window->user_win && !window->tx_win) { + WARN_ON_ONCE(!vinst->rxwin[window->cop]); + vinst->rxwin[window->cop] = NULL; + } + + WARN_ON_ONCE(vinst->windows[id] != window); + vinst->windows[id] = NULL; + + mutex_unlock(&vinst->mutex); +} + +static void init_winctx_for_rxwin(struct vas_window *rxwin, + struct vas_rx_win_attr *rxattr, + struct vas_winctx *winctx) +{ + /* + * We first zero (memset()) all fields and only set non-zero fields. + * Following fields are 0/false but maybe deserve a comment: + * + * ->notify_os_intr_reg In powerNV, send intrs to HV + * ->notify_disable False for NX windows + * ->intr_disable False for Fault Windows + * ->xtra_write False for NX windows + * ->notify_early NA for NX windows + * ->rsvd_txbuf_count NA for Rx windows + * ->lpid, ->pid, ->tid NA for Rx windows + */ + + memset(winctx, 0, sizeof(struct vas_winctx)); + + winctx->rx_fifo = rxattr->rx_fifo; + winctx->rx_fifo_size = rxattr->rx_fifo_size; + winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; + winctx->pin_win = rxattr->pin_win; + + winctx->nx_win = rxattr->nx_win; + winctx->fault_win = rxattr->fault_win; + winctx->rx_word_mode = rxattr->rx_win_ord_mode; + winctx->tx_word_mode = rxattr->tx_win_ord_mode; + winctx->rx_wcred_mode = rxattr->rx_wcred_mode; + winctx->tx_wcred_mode = rxattr->tx_wcred_mode; + + if (winctx->nx_win) { + winctx->data_stamp = true; + winctx->intr_disable = true; + winctx->pin_win = true; + + WARN_ON_ONCE(winctx->fault_win); + WARN_ON_ONCE(!winctx->rx_word_mode); + WARN_ON_ONCE(!winctx->tx_word_mode); + WARN_ON_ONCE(winctx->notify_after_count); + } else if (winctx->fault_win) { + winctx->notify_disable = true; + } else if (winctx->user_win) { + /* + * Section 1.8.1 Low Latency Core-Core Wake up of + * the VAS workbook: + * + * - disable credit checks ([tr]x_wcred_mode = false) + * - disable FIFO writes + * - enable ASB_Notify, disable interrupt + */ + winctx->fifo_disable = true; + winctx->intr_disable = true; + winctx->rx_fifo = NULL; + } + + winctx->lnotify_lpid = rxattr->lnotify_lpid; + winctx->lnotify_pid = rxattr->lnotify_pid; + winctx->lnotify_tid = rxattr->lnotify_tid; + winctx->pswid = rxattr->pswid; + winctx->dma_type = VAS_DMA_TYPE_INJECT; + winctx->tc_mode = rxattr->tc_mode; + + winctx->min_scope = VAS_SCOPE_LOCAL; + winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; +} + +static bool rx_win_args_valid(enum vas_cop_type cop, + struct vas_rx_win_attr *attr) +{ + dump_rx_win_attr(attr); + + if (cop >= VAS_COP_TYPE_MAX) + return false; + + if (cop != VAS_COP_TYPE_FTW && + attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN) + return false; + + if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) + return false; + + if (attr->nx_win) { + /* cannot be fault or user window if it is nx */ + if (attr->fault_win || attr->user_win) + return false; + /* + * Section 3.1.4.32: NX Windows must not disable notification, + * and must not enable interrupts or early notification. + */ + if (attr->notify_disable || !attr->intr_disable || + attr->notify_early) + return false; + } else if (attr->fault_win) { + /* cannot be both fault and user window */ + if (attr->user_win) + return false; + + /* + * Section 3.1.4.32: Fault windows must disable notification + * but not interrupts. + */ + if (!attr->notify_disable || attr->intr_disable) + return false; + + } else if (attr->user_win) { + /* + * User receive windows are only for fast-thread-wakeup + * (FTW). They don't need a FIFO and must disable interrupts + */ + if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable) + return false; + } else { + /* Rx window must be one of NX or Fault or User window. */ + return false; + } + + return true; +} + +void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) +{ + memset(rxattr, 0, sizeof(*rxattr)); + + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + rxattr->pin_win = true; + rxattr->nx_win = true; + rxattr->fault_win = false; + rxattr->intr_disable = true; + rxattr->rx_wcred_mode = true; + rxattr->tx_wcred_mode = true; + rxattr->rx_win_ord_mode = true; + rxattr->tx_win_ord_mode = true; + } else if (cop == VAS_COP_TYPE_FAULT) { + rxattr->pin_win = true; + rxattr->fault_win = true; + rxattr->notify_disable = true; + rxattr->rx_wcred_mode = true; + rxattr->tx_wcred_mode = true; + rxattr->rx_win_ord_mode = true; + rxattr->tx_win_ord_mode = true; + } else if (cop == VAS_COP_TYPE_FTW) { + rxattr->user_win = true; + rxattr->intr_disable = true; + + /* + * As noted in the VAS Workbook we disable credit checks. + * If we enable credit checks in the future, we must also + * implement a mechanism to return the user credits or new + * paste operations will fail. + */ + } +} +EXPORT_SYMBOL_GPL(vas_init_rx_win_attr); + +struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, + struct vas_rx_win_attr *rxattr) +{ + struct vas_window *rxwin; + struct vas_winctx winctx; + struct vas_instance *vinst; + + if (!rx_win_args_valid(cop, rxattr)) + return ERR_PTR(-EINVAL); + + vinst = find_vas_instance(vasid); + if (!vinst) { + pr_devel("vasid %d not found!\n", vasid); + return ERR_PTR(-EINVAL); + } + pr_devel("Found instance %d\n", vasid); + + rxwin = vas_window_alloc(vinst); + if (IS_ERR(rxwin)) { + pr_devel("Unable to allocate memory for Rx window\n"); + return rxwin; + } + + rxwin->tx_win = false; + rxwin->nx_win = rxattr->nx_win; + rxwin->user_win = rxattr->user_win; + rxwin->cop = cop; + if (rxattr->user_win) + rxwin->pid = task_pid_vnr(current); + + init_winctx_for_rxwin(rxwin, rxattr, &winctx); + init_winctx_regs(rxwin, &winctx); + + set_vinst_win(vinst, rxwin); + + return rxwin; +} +EXPORT_SYMBOL_GPL(vas_rx_win_open); + /* stub for now */ int vas_win_close(struct vas_window *window) { diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 60a3c3c1615b..d3e4f554a11c 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -289,6 +289,9 @@ enum vas_notify_after_count { /* * One per instance of VAS. Each instance will have a separate set of * receive windows, one per coprocessor type. + * + * See also function header of set_vinst_win() for details on ->windows[] + * and ->rxwin[] tables. */ struct vas_instance { int vas_id; @@ -393,6 +396,16 @@ extern struct vas_instance *find_vas_instance(int vasid); #define VREG(r) VREG_SFX(r, _OFFSET) #ifdef vas_debug +static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr) +{ + pr_err("VAS: fault %d, notify %d, intr %d early %d\n", + attr->fault_win, attr->notify_disable, + attr->intr_disable, attr->notify_early); + + pr_err("VAS: rx_fifo_size %d, max value %d\n", + attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX); +} + static inline void vas_log_write(struct vas_window *win, char *name, void *regptr, u64 val) { @@ -405,6 +418,7 @@ static inline void vas_log_write(struct vas_window *win, char *name, #else /* vas_debug */ #define vas_log_write(win, name, reg, val) +#define dump_rx_win_attr(attr) #endif /* vas_debug */ -- cgit v1.2.3-58-ga151 From 98271d4198699947d66d6f8a02c09bd27cb90022 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:38 -0700 Subject: powerpc/powernv/vas: Define vas_win_close() interface Define the vas_win_close() interface which should be used to close a send or receive windows. While the hardware configurations required to open send and receive windows differ, the configuration to close a window is the same for both. So we use a single interface to close the window. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 7 ++ arch/powerpc/platforms/powernv/vas-window.c | 101 ++++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 18c59c3ade3d..96bc24d61625 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -95,4 +95,11 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_rx_win_attr *attr); +/* + * Close the send or receive window identified by @win. For receive windows + * return -EAGAIN if there are active send windows attached to this receive + * window. + */ +int vas_win_close(struct vas_window *win); + #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 783c8c6585d4..39aa0e4d8b09 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -130,7 +130,7 @@ static void unmap_region(void *addr, u64 start, int len) /* * Unmap the paste address region for a window. */ -void unmap_paste_region(struct vas_window *window) +static void unmap_paste_region(struct vas_window *window) { int len; u64 busaddr_start; @@ -522,7 +522,7 @@ static int vas_assign_window_id(struct ida *ida) return winid; } -void vas_window_free(struct vas_window *window) +static void vas_window_free(struct vas_window *window) { int winid = window->winid; struct vas_instance *vinst = window->vinst; @@ -560,6 +560,14 @@ out_free: return ERR_PTR(-ENOMEM); } +static void put_rx_win(struct vas_window *rxwin) +{ + /* Better not be a send window! */ + WARN_ON_ONCE(rxwin->tx_win); + + atomic_dec(&rxwin->num_txwins); +} + /* * Get the VAS receive window associated with NX engine identified * by @cop and if applicable, @pswid. @@ -627,7 +635,7 @@ static void set_vinst_win(struct vas_instance *vinst, * Clear this window from the table(s) of windows for this VAS instance. * See also function header of set_vinst_win(). */ -void clear_vinst_win(struct vas_window *window) +static void clear_vinst_win(struct vas_window *window) { int id = window->winid; struct vas_instance *vinst = window->vinst; @@ -839,8 +847,91 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, } EXPORT_SYMBOL_GPL(vas_rx_win_open); -/* stub for now */ +static void poll_window_busy_state(struct vas_window *window) +{ + int busy; + u64 val; + +retry: + /* + * Poll Window Busy flag + */ + val = read_hvwc_reg(window, VREG(WIN_STATUS)); + busy = GET_FIELD(VAS_WIN_BUSY, val); + if (busy) { + val = 0; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ); + goto retry; + } +} + +static void poll_window_castout(struct vas_window *window) +{ + int cached; + u64 val; + + /* Cast window context out of the cache */ +retry: + val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL)); + cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val); + if (cached) { + val = 0ULL; + val = SET_FIELD(VAS_CASTOUT_REQ, val, 1); + val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0); + write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val); + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ); + goto retry; + } +} + +/* + * Close a window. + * + * See Section 1.12.1 of VAS workbook v1.05 for details on closing window: + * - Disable new paste operations (unmap paste address) + * - Poll for the "Window Busy" bit to be cleared + * - Clear the Open/Enable bit for the Window. + * - Poll for return of window Credits (implies FIFO empty for Rx win?) + * - Unpin and cast window context out of cache + * + * Besides the hardware, kernel has some bookkeeping of course. + */ int vas_win_close(struct vas_window *window) { - return -1; + u64 val; + + if (!window) + return 0; + + if (!window->tx_win && atomic_read(&window->num_txwins) != 0) { + pr_devel("Attempting to close an active Rx window!\n"); + WARN_ON_ONCE(1); + return -EBUSY; + } + + unmap_paste_region(window); + + clear_vinst_win(window); + + poll_window_busy_state(window); + + /* Unpin window from cache and close it */ + val = read_hvwc_reg(window, VREG(WINCTL)); + val = SET_FIELD(VAS_WINCTL_PIN, val, 0); + val = SET_FIELD(VAS_WINCTL_OPEN, val, 0); + write_hvwc_reg(window, VREG(WINCTL), val); + + poll_window_castout(window); + + /* if send window, drop reference to matching receive window */ + if (window->tx_win) + put_rx_win(window->rxwin); + + vas_window_free(window); + + return 0; } +EXPORT_SYMBOL_GPL(vas_win_close); -- cgit v1.2.3-58-ga151 From 5239af679a07427647b009ebb9c70b1a03ebca9b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:39 -0700 Subject: powerpc/powernv/vas: Define vas_tx_win_open() Define an interface to open a VAS send window. This interface is intended to be used the Nest Accelerator (NX) driver(s) to open a send window and use it to submit compression/encryption requests to a VAS receive window. The receive window, identified by the [vasid, cop] parameters, must already be open in VAS (i.e connected to an NX engine). Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 42 ++++++++ arch/powerpc/platforms/powernv/vas-window.c | 156 +++++++++++++++++++++++++++- 2 files changed, 195 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 96bc24d61625..033461c17637 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -80,6 +80,29 @@ struct vas_rx_win_attr { int tc_mode; }; +/* + * Window attributes specified by the in-kernel owner of a send window. + */ +struct vas_tx_win_attr { + enum vas_cop_type cop; + int wcreds_max; + int lpid; + int pidr; /* hardware PID (from SPRN_PID) */ + int pid; /* linux process id */ + int pswid; + int rsvd_txbuf_count; + int tc_mode; + + bool user_win; + bool pin_win; + bool rej_no_credit; + bool rsvd_txbuf_enable; + bool tx_wcred_mode; + bool rx_wcred_mode; + bool tx_win_ord_mode; + bool rx_win_ord_mode; +}; + /* * Helper to initialize receive window attributes to defaults for an * NX window. @@ -95,6 +118,25 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_rx_win_attr *attr); +/* + * Helper to initialize send window attributes to defaults for an NX window. + */ +extern void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, + enum vas_cop_type cop); + +/* + * Open a VAS send window for the instance of VAS identified by @vasid + * and the co-processor type @cop. Use @attr to initialize attributes + * of the window. + * + * Note: The instance of VAS must already have an open receive window for + * the coprocessor type @cop. + * + * Return a handle to the send window or ERR_PTR() on error. + */ +struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, + struct vas_tx_win_attr *attr); + /* * Close the send or receive window identified by @win. For receive windows * return -EAGAIN if there are active send windows attached to this receive diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 39aa0e4d8b09..6992e304156b 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -64,7 +64,7 @@ static inline void get_uwc_mmio_bar(struct vas_window *window, * space. Unlike MMIO regions (map_mmio_region() below), paste region must * be mapped cache-able and is only applicable to send windows. */ -void *map_paste_region(struct vas_window *txwin) +static void *map_paste_region(struct vas_window *txwin) { int len; void *map; @@ -100,7 +100,6 @@ free_name: return ERR_PTR(-ENOMEM); } - static void *map_mmio_region(char *name, u64 start, int len) { void *map; @@ -574,7 +573,7 @@ static void put_rx_win(struct vas_window *rxwin) * * See also function header of set_vinst_win(). */ -struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, +static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, enum vas_cop_type cop, u32 pswid) { struct vas_window *rxwin; @@ -847,6 +846,157 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, } EXPORT_SYMBOL_GPL(vas_rx_win_open); +void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) +{ + memset(txattr, 0, sizeof(*txattr)); + + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + txattr->rej_no_credit = false; + txattr->rx_wcred_mode = true; + txattr->tx_wcred_mode = true; + txattr->rx_win_ord_mode = true; + txattr->tx_win_ord_mode = true; + } else if (cop == VAS_COP_TYPE_FTW) { + txattr->user_win = true; + } +} +EXPORT_SYMBOL_GPL(vas_init_tx_win_attr); + +static void init_winctx_for_txwin(struct vas_window *txwin, + struct vas_tx_win_attr *txattr, + struct vas_winctx *winctx) +{ + /* + * We first zero all fields and only set non-zero ones. Following + * are some fields set to 0/false for the stated reason: + * + * ->notify_os_intr_reg In powernv, send intrs to HV + * ->rsvd_txbuf_count Not supported yet. + * ->notify_disable False for NX windows + * ->xtra_write False for NX windows + * ->notify_early NA for NX windows + * ->lnotify_lpid NA for Tx windows + * ->lnotify_pid NA for Tx windows + * ->lnotify_tid NA for Tx windows + * ->tx_win_cred_mode Ignore for now for NX windows + * ->rx_win_cred_mode Ignore for now for NX windows + */ + memset(winctx, 0, sizeof(struct vas_winctx)); + + winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT; + + winctx->user_win = txattr->user_win; + winctx->nx_win = txwin->rxwin->nx_win; + winctx->pin_win = txattr->pin_win; + + winctx->rx_wcred_mode = txattr->rx_wcred_mode; + winctx->tx_wcred_mode = txattr->tx_wcred_mode; + winctx->rx_word_mode = txattr->rx_win_ord_mode; + winctx->tx_word_mode = txattr->tx_win_ord_mode; + + if (winctx->nx_win) { + winctx->data_stamp = true; + winctx->intr_disable = true; + } + + winctx->lpid = txattr->lpid; + winctx->pidr = txattr->pidr; + winctx->rx_win_id = txwin->rxwin->winid; + + winctx->dma_type = VAS_DMA_TYPE_INJECT; + winctx->tc_mode = txattr->tc_mode; + winctx->min_scope = VAS_SCOPE_LOCAL; + winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + + winctx->pswid = 0; +} + +static bool tx_win_args_valid(enum vas_cop_type cop, + struct vas_tx_win_attr *attr) +{ + if (attr->tc_mode != VAS_THRESH_DISABLED) + return false; + + if (cop > VAS_COP_TYPE_MAX) + return false; + + if (attr->user_win && + (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) + return false; + + return true; +} + +struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, + struct vas_tx_win_attr *attr) +{ + int rc; + struct vas_window *txwin; + struct vas_window *rxwin; + struct vas_winctx winctx; + struct vas_instance *vinst; + + if (!tx_win_args_valid(cop, attr)) + return ERR_PTR(-EINVAL); + + vinst = find_vas_instance(vasid); + if (!vinst) { + pr_devel("vasid %d not found!\n", vasid); + return ERR_PTR(-EINVAL); + } + + rxwin = get_vinst_rxwin(vinst, cop, attr->pswid); + if (IS_ERR(rxwin)) { + pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop); + return rxwin; + } + + txwin = vas_window_alloc(vinst); + if (IS_ERR(txwin)) { + rc = PTR_ERR(txwin); + goto put_rxwin; + } + + txwin->tx_win = 1; + txwin->rxwin = rxwin; + txwin->nx_win = txwin->rxwin->nx_win; + txwin->pid = attr->pid; + txwin->user_win = attr->user_win; + + init_winctx_for_txwin(txwin, attr, &winctx); + + init_winctx_regs(txwin, &winctx); + + /* + * If its a kernel send window, map the window address into the + * kernel's address space. For user windows, user must issue an + * mmap() to map the window into their address space. + * + * NOTE: If kernel ever resubmits a user CRB after handling a page + * fault, we will need to map this into kernel as well. + */ + if (!txwin->user_win) { + txwin->paste_kaddr = map_paste_region(txwin); + if (IS_ERR(txwin->paste_kaddr)) { + rc = PTR_ERR(txwin->paste_kaddr); + goto free_window; + } + } + + set_vinst_win(vinst, txwin); + + return txwin; + +free_window: + vas_window_free(txwin); + +put_rxwin: + put_rx_win(rxwin); + return ERR_PTR(rc); + +} +EXPORT_SYMBOL_GPL(vas_tx_win_open); + static void poll_window_busy_state(struct vas_window *window) { int busy; -- cgit v1.2.3-58-ga151 From 2392c8c8c0450293625dbef19ff5e206fb7b6749 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:40 -0700 Subject: powerpc/powernv/vas: Define copy/paste interfaces Define interfaces (wrappers) to the 'copy' and 'paste' instructions (which are new in PowerISA 3.0). These are intended to be used to by NX driver(s) to submit Coprocessor Request Blocks (CRBs) to the NX hardware engines. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- MAINTAINERS | 1 + arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/include/asm/vas.h | 12 ++++++++ arch/powerpc/platforms/powernv/copy-paste.h | 46 ++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/vas-window.c | 47 +++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/vas.h | 18 +++++++++-- 6 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/copy-paste.h (limited to 'arch/powerpc/include') diff --git a/MAINTAINERS b/MAINTAINERS index 5712d9da0c9d..bb6e46d83404 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6434,6 +6434,7 @@ M: Sukadev Bhattiprolu L: linuxppc-dev@lists.ozlabs.org S: Supported F: arch/powerpc/platforms/powernv/vas* +F: arch/powerpc/platforms/powernv/copy-paste.h F: arch/powerpc/include/asm/vas.h F: arch/powerpc/include/uapi/asm/vas.h diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 041ba15aa2b9..b5e1b51b8ba9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -418,6 +418,8 @@ ___PPC_RB(b)) #define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \ ___PPC_RB(b)) +#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \ + ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ __PPC_RA(a) | __PPC_RS(s)) #define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \ diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 033461c17637..fd5963acd658 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -144,4 +144,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, */ int vas_win_close(struct vas_window *win); +/* + * Copy the co-processor request block (CRB) @crb into the local L2 cache. + */ +int vas_copy_crb(void *crb, int offset); + +/* + * Paste a previously copied CRB (see vas_copy_crb()) from the L2 cache to + * the hardware address associated with the window @win. @re is expected/ + * assumed to be true for NX windows. + */ +int vas_paste_crb(struct vas_window *win, int offset, bool re); + #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h new file mode 100644 index 000000000000..c9a503623431 --- /dev/null +++ b/arch/powerpc/platforms/powernv/copy-paste.h @@ -0,0 +1,46 @@ +/* + * Copyright 2016-17 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#define CR0_SHIFT 28 +#define CR0_MASK 0xF +/* + * Copy/paste instructions: + * + * copy RA,RB + * Copy contents of address (RA) + effective_address(RB) + * to internal copy-buffer. + * + * paste RA,RB + * Paste contents of internal copy-buffer to the address + * (RA) + effective_address(RB) + */ +static inline int vas_copy(void *crb, int offset) +{ + asm volatile(PPC_COPY(%0, %1)";" + : + : "b" (offset), "b" (crb) + : "memory"); + + return 0; +} + +static inline int vas_paste(void *paste_address, int offset) +{ + u32 cr; + + cr = 0; + asm volatile(PPC_PASTE(%1, %2)";" + "mfocrf %0, 0x80;" + : "=r" (cr) + : "b" (offset), "b" (paste_address) + : "memory", "cr0"); + + return (cr >> CR0_SHIFT) & CR0_MASK; +} diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 6992e304156b..5aae845b8cd9 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -18,6 +18,7 @@ #include #include "vas.h" +#include "copy-paste.h" /* * Compute the paste address region for the window @window using the @@ -997,6 +998,52 @@ put_rxwin: } EXPORT_SYMBOL_GPL(vas_tx_win_open); +int vas_copy_crb(void *crb, int offset) +{ + return vas_copy(crb, offset); +} +EXPORT_SYMBOL_GPL(vas_copy_crb); + +#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53) +int vas_paste_crb(struct vas_window *txwin, int offset, bool re) +{ + int rc; + void *addr; + uint64_t val; + + /* + * Only NX windows are supported for now and hardware assumes + * report-enable flag is set for NX windows. Ensure software + * complies too. + */ + WARN_ON_ONCE(txwin->nx_win && !re); + + addr = txwin->paste_kaddr; + if (re) { + /* + * Set the REPORT_ENABLE bit (equivalent to writing + * to 1K offset of the paste address) + */ + val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1); + addr += val; + } + + /* + * Map the raw CR value from vas_paste() to an error code (there + * is just pass or fail for now though). + */ + rc = vas_paste(addr, offset); + if (rc == 2) + rc = 0; + else + rc = -EINVAL; + + print_fifo_msg_count(txwin); + + return rc; +} +EXPORT_SYMBOL_GPL(vas_paste_crb); + static void poll_window_busy_state(struct vas_window *window) { int busy; diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index d3e4f554a11c..38dee5d50f31 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -398,11 +398,11 @@ extern struct vas_instance *find_vas_instance(int vasid); #ifdef vas_debug static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr) { - pr_err("VAS: fault %d, notify %d, intr %d early %d\n", + pr_err("fault %d, notify %d, intr %d early %d\n", attr->fault_win, attr->notify_disable, attr->intr_disable, attr->notify_early); - pr_err("VAS: rx_fifo_size %d, max value %d\n", + pr_err("rx_fifo_size %d, max value %d\n", attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX); } @@ -450,4 +450,18 @@ static inline u64 read_hvwc_reg(struct vas_window *win, return in_be64(win->hvwc_map+reg); } +#ifdef vas_debug + +static void print_fifo_msg_count(struct vas_window *txwin) +{ + uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o); + pr_devel("Winid %d, Msg count %llu\n", txwin->winid, + (uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); +} +#else /* vas_debug */ + +#define print_fifo_msg_count(window) + +#endif /* vas_debug */ + #endif /* _VAS_H */ -- cgit v1.2.3-58-ga151 From d1e1b351f50f9e5941f436f6c63949731979e00c Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 30 Aug 2017 21:45:09 +1000 Subject: powerpc/xmon: Add ISA v3.0 SPRs to SPR dump Add support for printing the PIDR/TIDR for ISA 300 and PSSCR and PTCR in ISA 3.0 hypervisor mode. SPRN_PSSCR_PR is the privileged mode access and is used when we are not in hypervisor mode. Signed-off-by: Balbir Singh [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/xmon/xmon.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c36823d64ec9..2c4366ada976 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -356,6 +356,7 @@ #define SPRN_PMSR 0x355 /* Power Management Status Reg */ #define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */ +#define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */ #define SPRN_PMCR 0x374 /* Power Management Control Register */ /* HFSCR and FSCR bit numbers are the same */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 9d2c5e0ef305..33351c6704b1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1807,6 +1807,28 @@ static void dump_207_sprs(void) #endif } +static void dump_300_sprs(void) +{ +#ifdef CONFIG_PPC64 + bool hv = mfmsr() & MSR_HV; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + printf("pidr = %.16lx tidr = %.16lx\n", + mfspr(SPRN_PID), mfspr(SPRN_TIDR)); + printf("asdr = %.16lx psscr = %.16lx\n", + mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR) + : mfspr(SPRN_PSSCR_PR)); + + if (!hv) + return; + + printf("ptcr = %.16lx\n", + mfspr(SPRN_PTCR)); +#endif +} + static void dump_one_spr(int spr, bool show_unimplemented) { unsigned long val; @@ -1860,6 +1882,7 @@ static void super_regs(void) dump_206_sprs(); dump_207_sprs(); + dump_300_sprs(); return; } -- cgit v1.2.3-58-ga151 From eb039161da2ff388cc30d076badd8e06fb015f33 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 9 Mar 2017 16:42:12 +1100 Subject: powerpc/asm: Convert .llong directives to .8byte .llong is an undocumented PPC specific directive. The generic equivalent is .quad, but even better (because it's self describing) is .8byte. Convert all .llong directives to .8byte. Signed-off-by: Tobin C. Harding Signed-off-by: Michael Ellerman --- arch/powerpc/boot/crt0.S | 20 ++++++++++---------- arch/powerpc/include/asm/asm-compat.h | 2 +- arch/powerpc/include/asm/feature-fixups.h | 6 +++--- arch/powerpc/include/asm/reg.h | 12 ++++++------ arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/head_64.S | 8 ++++---- arch/powerpc/kernel/reloc_64.S | 6 +++--- arch/powerpc/kernel/systbl.S | 14 +++++++------- arch/powerpc/platforms/powernv/opal-wrappers.S | 2 +- arch/powerpc/platforms/pseries/hvCall.S | 2 +- arch/powerpc/purgatory/trampoline.S | 4 ++-- .../powerpc/switch_endian/switch_endian_test.S | 2 +- 12 files changed, 40 insertions(+), 40 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 12866ccb5694..dcf2f15e6797 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -26,17 +26,17 @@ _zimage_start_opd: #ifdef __powerpc64__ .balign 8 -p_start: .llong _start -p_etext: .llong _etext -p_bss_start: .llong __bss_start -p_end: .llong _end - -p_toc: .llong __toc_start + 0x8000 - p_base -p_dyn: .llong __dynamic_start - p_base -p_rela: .llong __rela_dyn_start - p_base -p_prom: .llong 0 +p_start: .8byte _start +p_etext: .8byte _etext +p_bss_start: .8byte __bss_start +p_end: .8byte _end + +p_toc: .8byte __toc_start + 0x8000 - p_base +p_dyn: .8byte __dynamic_start - p_base +p_rela: .8byte __rela_dyn_start - p_base +p_prom: .8byte 0 .weak _platform_stack_top -p_pstack: .llong _platform_stack_top +p_pstack: .8byte _platform_stack_top #else p_start: .long _start p_etext: .long _etext diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index cee3aa087653..7f2a7702596c 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -25,7 +25,7 @@ #define PPC_LCMPI stringify_in_c(cmpdi) #define PPC_LCMPLI stringify_in_c(cmpldi) #define PPC_LCMP stringify_in_c(cmpd) -#define PPC_LONG stringify_in_c(.llong) +#define PPC_LONG stringify_in_c(.8byte) #define PPC_LONG_ALIGN stringify_in_c(.balign 8) #define PPC_TLNEI stringify_in_c(tdnei) #define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 2de2319b99e2..8f88f771cc55 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -19,11 +19,11 @@ */ #if defined(CONFIG_PPC64) && !defined(__powerpc64__) /* 64 bits kernel, 32 bits code (ie. vdso32) */ -#define FTR_ENTRY_LONG .llong +#define FTR_ENTRY_LONG .8byte #define FTR_ENTRY_OFFSET .long 0xffffffff; .long #elif defined(CONFIG_PPC64) -#define FTR_ENTRY_LONG .llong -#define FTR_ENTRY_OFFSET .llong +#define FTR_ENTRY_LONG .8byte +#define FTR_ENTRY_OFFSET .8byte #else #define FTR_ENTRY_LONG .long #define FTR_ENTRY_OFFSET .long diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 2c4366ada976..f92eaf7a4c0d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1344,12 +1344,12 @@ static inline void msr_check_and_clear(unsigned long bits) ".section __ftr_fixup,\"a\"\n" \ ".align 3\n" \ "98:\n" \ - " .llong %1\n" \ - " .llong %1\n" \ - " .llong 97b-98b\n" \ - " .llong 99b-98b\n" \ - " .llong 0\n" \ - " .llong 0\n" \ + " .8byte %1\n" \ + " .8byte %1\n" \ + " .8byte 97b-98b\n" \ + " .8byte 99b-98b\n" \ + " .8byte 0\n" \ + " .8byte 0\n" \ ".previous" \ : "=r" (rval) \ : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 55e9d03b5de9..4a0fd4f40245 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1104,7 +1104,7 @@ _ASM_NOKPROBE_SYMBOL(__enter_rtas) _ASM_NOKPROBE_SYMBOL(rtas_return_loc) .align 3 -1: .llong rtas_restore_regs +1: .8byte rtas_restore_regs rtas_restore_regs: /* relocation is on at this point */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 0ddc602b33a4..ff8511d6d8ea 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -92,13 +92,13 @@ END_FTR_SECTION(0, 1) .balign 8 .globl __secondary_hold_spinloop __secondary_hold_spinloop: - .llong 0x0 + .8byte 0x0 /* Secondary processors write this value with their cpu # */ /* after they enter the spin loop immediately below. */ .globl __secondary_hold_acknowledge __secondary_hold_acknowledge: - .llong 0x0 + .8byte 0x0 #ifdef CONFIG_RELOCATABLE /* This flag is set to 1 by a loader if the kernel should run @@ -650,7 +650,7 @@ __after_prom_start: bctr .balign 8 -p_end: .llong _end - copy_to_here +p_end: .8byte _end - copy_to_here 4: /* @@ -892,7 +892,7 @@ _GLOBAL(relative_toc) blr .balign 8 -p_toc: .llong __toc_start + 0x8000 - 0b +p_toc: .8byte __toc_start + 0x8000 - 0b /* * This is where the main kernel code starts. diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index d88736fbece6..e8cfc69f59ae 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -82,7 +82,7 @@ _GLOBAL(relocate) 6: blr .balign 8 -p_dyn: .llong __dynamic_start - 0b -p_rela: .llong __rela_dyn_start - 0b -p_st: .llong _stext - 0b +p_dyn: .8byte __dynamic_start - 0b +p_rela: .8byte __rela_dyn_start - 0b +p_st: .8byte _stext - 0b diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 4d6b1d3a747f..7ccb7f81f8db 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -17,13 +17,13 @@ #include #ifdef CONFIG_PPC64 -#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func) -#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func) -#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func) -#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall) -#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func) -#define PPC64ONLY(func) .llong DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall) -#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264) +#define SYSCALL(func) .8byte DOTSYM(sys_##func),DOTSYM(sys_##func) +#define COMPAT_SYS(func) .8byte DOTSYM(sys_##func),DOTSYM(compat_sys_##func) +#define PPC_SYS(func) .8byte DOTSYM(ppc_##func),DOTSYM(ppc_##func) +#define OLDSYS(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall) +#define SYS32ONLY(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func) +#define PPC64ONLY(func) .8byte DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall) +#define SYSX(f, f3264, f32) .8byte DOTSYM(f),DOTSYM(f3264) #else #define SYSCALL(func) .long sys_##func #define COMPAT_SYS(func) .long sys_##func diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 951fa93f881d..8c1ede2d3f7e 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -27,7 +27,7 @@ .globl opal_tracepoint_refcount opal_tracepoint_refcount: - .llong 0 + .8byte 0 .section ".text" diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 74b5b8e239c8..c511a1743a44 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -23,7 +23,7 @@ .globl hcall_tracepoint_refcount hcall_tracepoint_refcount: - .llong 0 + .8byte 0 .section ".text" #endif diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index 30277446892c..4aad9dd10ace 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -104,13 +104,13 @@ master: .balign 8 .globl kernel kernel: - .llong 0x0 + .8byte 0x0 .size kernel, . - kernel .balign 8 .globl dt_offset dt_offset: - .llong 0x0 + .8byte 0x0 .size dt_offset, . - dt_offset diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S index ef7c971abb67..bceb53f57573 100644 --- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S +++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S @@ -8,7 +8,7 @@ message: .section ".toc" .balign 8 pattern: - .llong 0x5555AAAA5555AAAA + .8byte 0x5555AAAA5555AAAA .text FUNC_START(_start) -- cgit v1.2.3-58-ga151 From 2a636a56d2d39676fe85190dec102c7440e24977 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 29 Jun 2017 17:12:55 +1000 Subject: powerpc/smp: Add cpu_l2_cache_map We want to add an extra level to the CPU scheduler topology to account for cores which share a cache. To do this we need to build a cpumask for each CPU that indicates which CPUs share this cache to use as an input to the scheduler. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/smp.h | 6 ++++++ arch/powerpc/kernel/smp.c | 23 +++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 8ea98504f900..fac963e10d39 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -97,6 +97,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #endif DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); static inline struct cpumask *cpu_sibling_mask(int cpu) @@ -109,6 +110,11 @@ static inline struct cpumask *cpu_core_mask(int cpu) return per_cpu(cpu_core_map, cpu); } +static inline struct cpumask *cpu_l2_cache_mask(int cpu) +{ + return per_cpu(cpu_l2_cache_map, cpu); +} + extern int cpu_to_core_id(int cpu); /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a29c23bd9f2e..b42c868e1ac1 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -75,9 +75,11 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct thread_info *secondary_ti; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* SMP operations for this machine */ @@ -610,6 +612,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); + zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); /* @@ -624,6 +628,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* Init the cpumasks so the boot CPU is related to itself */ cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); if (smp_ops && smp_ops->probe) @@ -907,6 +912,7 @@ static void remove_cpu_from_masks(int cpu) /* NB: cpu_core_mask is a superset of the others */ for_each_cpu(i, cpu_core_mask(cpu)) { set_cpus_unrelated(cpu, i, cpu_core_mask); + set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); set_cpus_unrelated(cpu, i, cpu_sibling_mask); } } @@ -929,17 +935,22 @@ static void add_cpu_to_masks(int cpu) set_cpus_related(i, cpu, cpu_sibling_mask); /* - * Copy the thread sibling into core sibling mask, and - * add CPUs that share a chip or an L2 to the core sibling - * mask. + * Copy the thread sibling mask into the cache sibling mask + * and mark any CPUs that share an L2 with this CPU. */ for_each_cpu(i, cpu_sibling_mask(cpu)) + set_cpus_related(cpu, i, cpu_l2_cache_mask); + update_mask_by_l2(cpu, cpu_l2_cache_mask); + + /* + * Copy the cache sibling mask into core sibling mask and mark + * any CPUs on the same chip as this CPU. + */ + for_each_cpu(i, cpu_l2_cache_mask(cpu)) set_cpus_related(cpu, i, cpu_core_mask); - if (chipid == -1) { - update_mask_by_l2(cpu, cpu_core_mask); + if (chipid == -1) return; - } for_each_cpu(i, cpu_online_mask) if (cpu_to_chip_id(i) == chipid) -- cgit v1.2.3-58-ga151 From 93b2d3cf3733b4060d3623161551f51ea1ab5499 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:24 +1000 Subject: powerpc: Correct instruction code for xxlor instruction The instruction code for xxlor that commit 0016a4cf5582 ("powerpc: Emulate most Book I instructions in emulate_step()", 2010-06-15) added is actually the code for xxlnor. It is used in get_vsr() and put_vsr() and the effect of the error is that if emulate_step is used to emulate a VSX load or store from any register other than vsr0, the bitwise complement of the correct value will be loaded or stored. This corrects the error. Fixes: 0016a4cf5582 ("powerpc: Emulate most Book I instructions in emulate_step()") Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index b5e1b51b8ba9..3dd8a6e43bb2 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -262,7 +262,7 @@ #define PPC_INST_TLBSRX_DOT 0x7c0006a5 #define PPC_INST_VPMSUMW 0x10000488 #define PPC_INST_VPMSUMD 0x100004c8 -#define PPC_INST_XXLOR 0xf0000510 +#define PPC_INST_XXLOR 0xf0000490 #define PPC_INST_XXSWAPD 0xf0000250 #define PPC_INST_XVCPSGNDP 0xf0000780 #define PPC_INST_TRECHKPT 0x7c0007dd -- cgit v1.2.3-58-ga151 From 3cdfcbfd32b9d1c0d4a6fa80ee9c390927aab948 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:25 +1000 Subject: powerpc: Change analyse_instr so it doesn't modify *regs The analyse_instr function currently doesn't just work out what an instruction does, it also executes those instructions whose effect is only to update CPU registers that are stored in struct pt_regs. This is undesirable because optprobes uses analyse_instr to work out if an instruction could be successfully emulated in future. This changes analyse_instr so it doesn't modify *regs; instead it stores information in the instruction_op structure to indicate what registers (GPRs, CR, XER, LR) would be set and what value they would be set to. A companion function called emulate_update_regs() can then use that information to update a pt_regs struct appropriately. As a minor cleanup, this replaces inline asm using the cntlzw and cntlzd instructions with calls to __builtin_clz() and __builtin_clzl(). Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 52 +++- arch/powerpc/lib/sstep.c | 601 +++++++++++++++++++++++---------------- 2 files changed, 396 insertions(+), 257 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index d3a42cc45a82..442e6363eb5a 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -23,9 +23,6 @@ struct pt_regs; #define IS_RFID(instr) (((instr) & 0xfc0007fe) == 0x4c000024) #define IS_RFI(instr) (((instr) & 0xfc0007fe) == 0x4c000064) -/* Emulate instructions that cause a transfer of control. */ -extern int emulate_step(struct pt_regs *regs, unsigned int instr); - enum instruction_type { COMPUTE, /* arith/logical/CR op, etc. */ LOAD, @@ -55,11 +52,29 @@ enum instruction_type { #define INSTR_TYPE_MASK 0x1f +/* Compute flags, ORed in with type */ +#define SETREG 0x20 +#define SETCC 0x40 +#define SETXER 0x80 + +/* Branch flags, ORed in with type */ +#define SETLK 0x20 +#define BRTAKEN 0x40 +#define DECCTR 0x80 + /* Load/store flags, ORed in with type */ #define SIGNEXT 0x20 #define UPDATE 0x40 /* matches bit in opcode 31 instructions */ #define BYTEREV 0x80 +/* Barrier type field, ORed in with type */ +#define BARRIER_MASK 0xe0 +#define BARRIER_SYNC 0x00 +#define BARRIER_ISYNC 0x20 +#define BARRIER_EIEIO 0x40 +#define BARRIER_LWSYNC 0x60 +#define BARRIER_PTESYNC 0x80 + /* Cacheop values, ORed in with type */ #define CACHEOP_MASK 0x700 #define DCBST 0 @@ -83,7 +98,36 @@ struct instruction_op { int update_reg; /* For MFSPR */ int spr; + u32 ccval; + u32 xerval; }; -extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs, +/* + * Decode an instruction, and return information about it in *op + * without changing *regs. + * + * Return value is 1 if the instruction can be emulated just by + * updating *regs with the information in *op, -1 if we need the + * GPRs but *regs doesn't contain the full register set, or 0 + * otherwise. + */ +extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, unsigned int instr); + +/* + * Emulate an instruction that can be executed just by updating + * fields in *regs. + */ +void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); + +/* + * Emulate instructions that cause a transfer of control, + * arithmetic/logical instructions, loads and stores, + * cache operations and barriers. + * + * Returns 1 if the instruction was emulated successfully, + * 0 if it could not be emulated, or -1 for an instruction that + * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.). + */ +extern int emulate_step(struct pt_regs *regs, unsigned int instr); + diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a85b82c0e3f3..e190b8dc11a2 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -62,15 +62,17 @@ static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr, /* * Determine whether a conditional branch instruction would branch. */ -static nokprobe_inline int branch_taken(unsigned int instr, struct pt_regs *regs) +static nokprobe_inline int branch_taken(unsigned int instr, + const struct pt_regs *regs, + struct instruction_op *op) { unsigned int bo = (instr >> 21) & 0x1f; unsigned int bi; if ((bo & 4) == 0) { /* decrement counter */ - --regs->ctr; - if (((bo >> 1) & 1) ^ (regs->ctr == 0)) + op->type |= DECCTR; + if (((bo >> 1) & 1) ^ (regs->ctr == 1)) return 0; } if ((bo & 0x10) == 0) { @@ -92,7 +94,8 @@ static nokprobe_inline long address_ok(struct pt_regs *regs, unsigned long ea, i /* * Calculate effective address for a D-form instruction */ -static nokprobe_inline unsigned long dform_ea(unsigned int instr, struct pt_regs *regs) +static nokprobe_inline unsigned long dform_ea(unsigned int instr, + const struct pt_regs *regs) { int ra; unsigned long ea; @@ -109,7 +112,8 @@ static nokprobe_inline unsigned long dform_ea(unsigned int instr, struct pt_regs /* * Calculate effective address for a DS-form instruction */ -static nokprobe_inline unsigned long dsform_ea(unsigned int instr, struct pt_regs *regs) +static nokprobe_inline unsigned long dsform_ea(unsigned int instr, + const struct pt_regs *regs) { int ra; unsigned long ea; @@ -127,7 +131,7 @@ static nokprobe_inline unsigned long dsform_ea(unsigned int instr, struct pt_reg * Calculate effective address for an X-form instruction */ static nokprobe_inline unsigned long xform_ea(unsigned int instr, - struct pt_regs *regs) + const struct pt_regs *regs) { int ra, rb; unsigned long ea; @@ -526,24 +530,27 @@ static nokprobe_inline int do_vsx_store(int rn, int (*func)(int, unsigned long), : "=r" (err) \ : "r" (addr), "i" (-EFAULT), "0" (err)) -static nokprobe_inline void set_cr0(struct pt_regs *regs, int rd) +static nokprobe_inline void set_cr0(const struct pt_regs *regs, + struct instruction_op *op, int rd) { long val = regs->gpr[rd]; - regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); + op->type |= SETCC; + op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); #ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) val = (int) val; #endif if (val < 0) - regs->ccr |= 0x80000000; + op->ccval |= 0x80000000; else if (val > 0) - regs->ccr |= 0x40000000; + op->ccval |= 0x40000000; else - regs->ccr |= 0x20000000; + op->ccval |= 0x20000000; } -static nokprobe_inline void add_with_carry(struct pt_regs *regs, int rd, +static nokprobe_inline void add_with_carry(const struct pt_regs *regs, + struct instruction_op *op, int rd, unsigned long val1, unsigned long val2, unsigned long carry_in) { @@ -551,24 +558,29 @@ static nokprobe_inline void add_with_carry(struct pt_regs *regs, int rd, if (carry_in) ++val; - regs->gpr[rd] = val; + op->type = COMPUTE + SETREG + SETXER; + op->reg = rd; + op->val = val; #ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) { val = (unsigned int) val; val1 = (unsigned int) val1; } #endif + op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) - regs->xer |= XER_CA; + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; } -static nokprobe_inline void do_cmp_signed(struct pt_regs *regs, long v1, long v2, - int crfld) +static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs, + struct instruction_op *op, + long v1, long v2, int crfld) { unsigned int crval, shift; + op->type = COMPUTE + SETCC; crval = (regs->xer >> 31) & 1; /* get SO bit */ if (v1 < v2) crval |= 8; @@ -577,14 +589,17 @@ static nokprobe_inline void do_cmp_signed(struct pt_regs *regs, long v1, long v2 else crval |= 2; shift = (7 - crfld) * 4; - regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); + op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift); } -static nokprobe_inline void do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, - unsigned long v2, int crfld) +static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v1, + unsigned long v2, int crfld) { unsigned int crval, shift; + op->type = COMPUTE + SETCC; crval = (regs->xer >> 31) & 1; /* get SO bit */ if (v1 < v2) crval |= 8; @@ -593,11 +608,12 @@ static nokprobe_inline void do_cmp_unsigned(struct pt_regs *regs, unsigned long else crval |= 2; shift = (7 - crfld) * 4; - regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); + op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift); } -static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1, - unsigned long v2, int rd) +static nokprobe_inline void do_cmpb(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v1, unsigned long v2) { unsigned long long out_val, mask; int i; @@ -608,16 +624,16 @@ static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1, if ((v1 & mask) == (v2 & mask)) out_val |= mask; } - - regs->gpr[rd] = out_val; + op->val = out_val; } /* * The size parameter is used to adjust the equivalent popcnt instruction. * popcntb = 8, popcntw = 32, popcntd = 64 */ -static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1, - int size, int ra) +static nokprobe_inline void do_popcnt(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v1, int size) { unsigned long long out = v1; @@ -626,23 +642,24 @@ static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1, out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0f; if (size == 8) { /* popcntb */ - regs->gpr[ra] = out; + op->val = out; return; } out += out >> 8; out += out >> 16; if (size == 32) { /* popcntw */ - regs->gpr[ra] = out & 0x0000003f0000003f; + op->val = out & 0x0000003f0000003f; return; } out = (out + (out >> 32)) & 0x7f; - regs->gpr[ra] = out; /* popcntd */ + op->val = out; /* popcntd */ } #ifdef CONFIG_PPC64 -static nokprobe_inline void do_bpermd(struct pt_regs *regs, unsigned long v1, - unsigned long v2, int ra) +static nokprobe_inline void do_bpermd(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v1, unsigned long v2) { unsigned char perm, idx; unsigned int i; @@ -654,26 +671,27 @@ static nokprobe_inline void do_bpermd(struct pt_regs *regs, unsigned long v1, if (v2 & PPC_BIT(idx)) perm |= 1 << i; } - regs->gpr[ra] = perm; + op->val = perm; } #endif /* CONFIG_PPC64 */ /* * The size parameter adjusts the equivalent prty instruction. * prtyw = 32, prtyd = 64 */ -static nokprobe_inline void do_prty(struct pt_regs *regs, unsigned long v, - int size, int ra) +static nokprobe_inline void do_prty(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v, int size) { unsigned long long res = v ^ (v >> 8); res ^= res >> 16; if (size == 32) { /* prtyw */ - regs->gpr[ra] = res & 0x0000000100000001; + op->val = res & 0x0000000100000001; return; } res ^= res >> 32; - regs->gpr[ra] = res & 1; /*prtyd */ + op->val = res & 1; /*prtyd */ } static nokprobe_inline int trap_compare(long v1, long v2) @@ -709,14 +727,18 @@ static nokprobe_inline int trap_compare(long v1, long v2) #define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x)) /* - * Decode an instruction, and execute it if that can be done just by - * modifying *regs (i.e. integer arithmetic and logical instructions, - * branches, and barrier instructions). - * Returns 1 if the instruction has been executed, or 0 if not. - * Sets *op to indicate what the instruction does. + * Decode an instruction, and return information about it in *op + * without changing *regs. + * Integer arithmetic and logical instructions, branches, and barrier + * instructions can be emulated just using the information in *op. + * + * Return value is 1 if the instruction can be emulated just by + * updating *regs with the information in *op, -1 if we need the + * GPRs but *regs doesn't contain the full register set, or 0 + * otherwise. */ -int analyse_instr(struct instruction_op *op, struct pt_regs *regs, - unsigned int instr) +int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, + unsigned int instr) { unsigned int opcode, ra, rb, rd, spr, u; unsigned long int imm; @@ -733,12 +755,11 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, imm = (signed short)(instr & 0xfffc); if ((instr & 2) == 0) imm += regs->nip; - regs->nip += 4; - regs->nip = truncate_if_32bit(regs->msr, regs->nip); + op->val = truncate_if_32bit(regs->msr, imm); if (instr & 1) - regs->link = regs->nip; - if (branch_taken(instr, regs)) - regs->nip = truncate_if_32bit(regs->msr, imm); + op->type |= SETLK; + if (branch_taken(instr, regs, op)) + op->type |= BRTAKEN; return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ @@ -749,38 +770,37 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, return 0; #endif case 18: /* b */ - op->type = BRANCH; + op->type = BRANCH | BRTAKEN; imm = instr & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; if ((instr & 2) == 0) imm += regs->nip; + op->val = truncate_if_32bit(regs->msr, imm); if (instr & 1) - regs->link = truncate_if_32bit(regs->msr, regs->nip + 4); - imm = truncate_if_32bit(regs->msr, imm); - regs->nip = imm; + op->type |= SETLK; return 1; case 19: switch ((instr >> 1) & 0x3ff) { case 0: /* mcrf */ + op->type = COMPUTE + SETCC; rd = 7 - ((instr >> 23) & 0x7); ra = 7 - ((instr >> 18) & 0x7); rd *= 4; ra *= 4; val = (regs->ccr >> ra) & 0xf; - regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); - goto instr_done; + op->ccval = (regs->ccr & ~(0xfUL << rd)) | (val << rd); + return 1; case 16: /* bclr */ case 528: /* bcctr */ op->type = BRANCH; imm = (instr & 0x400)? regs->ctr: regs->link; - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); - imm = truncate_if_32bit(regs->msr, imm); + op->val = truncate_if_32bit(regs->msr, imm); if (instr & 1) - regs->link = regs->nip; - if (branch_taken(instr, regs)) - regs->nip = imm; + op->type |= SETLK; + if (branch_taken(instr, regs, op)) + op->type |= BRTAKEN; return 1; case 18: /* rfid, scary */ @@ -790,9 +810,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, return 0; case 150: /* isync */ - op->type = BARRIER; - isync(); - goto instr_done; + op->type = BARRIER | BARRIER_ISYNC; + return 1; case 33: /* crnor */ case 129: /* crandc */ @@ -802,45 +821,47 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 289: /* creqv */ case 417: /* crorc */ case 449: /* cror */ + op->type = COMPUTE + SETCC; ra = (instr >> 16) & 0x1f; rb = (instr >> 11) & 0x1f; rd = (instr >> 21) & 0x1f; ra = (regs->ccr >> (31 - ra)) & 1; rb = (regs->ccr >> (31 - rb)) & 1; val = (instr >> (6 + ra * 2 + rb)) & 1; - regs->ccr = (regs->ccr & ~(1UL << (31 - rd))) | + op->ccval = (regs->ccr & ~(1UL << (31 - rd))) | (val << (31 - rd)); - goto instr_done; + return 1; + default: + op->type = UNKNOWN; + return 0; } break; case 31: switch ((instr >> 1) & 0x3ff) { case 598: /* sync */ - op->type = BARRIER; + op->type = BARRIER + BARRIER_SYNC; #ifdef __powerpc64__ switch ((instr >> 21) & 3) { case 1: /* lwsync */ - asm volatile("lwsync" : : : "memory"); - goto instr_done; + op->type = BARRIER + BARRIER_LWSYNC; + break; case 2: /* ptesync */ - asm volatile("ptesync" : : : "memory"); - goto instr_done; + op->type = BARRIER + BARRIER_PTESYNC; + break; } #endif - mb(); - goto instr_done; + return 1; case 854: /* eieio */ - op->type = BARRIER; - eieio(); - goto instr_done; + op->type = BARRIER + BARRIER_EIEIO; + return 1; } break; } /* Following cases refer to regs->gpr[], so we need all regs */ if (!FULL_REGS(regs)) - return 0; + return -1; rd = (instr >> 21) & 0x1f; ra = (instr >> 16) & 0x1f; @@ -851,21 +872,21 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 2: /* tdi */ if (rd & trap_compare(regs->gpr[ra], (short) instr)) goto trap; - goto instr_done; + return 1; #endif case 3: /* twi */ if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) goto trap; - goto instr_done; + return 1; case 7: /* mulli */ - regs->gpr[rd] = regs->gpr[ra] * (short) instr; - goto instr_done; + op->val = regs->gpr[ra] * (short) instr; + goto compute_done; case 8: /* subfic */ imm = (short) instr; - add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1); - goto instr_done; + add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1); + return 1; case 10: /* cmpli */ imm = (unsigned short) instr; @@ -874,8 +895,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, if ((rd & 1) == 0) val = (unsigned int) val; #endif - do_cmp_unsigned(regs, val, imm, rd >> 2); - goto instr_done; + do_cmp_unsigned(regs, op, val, imm, rd >> 2); + return 1; case 11: /* cmpi */ imm = (short) instr; @@ -884,47 +905,47 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, if ((rd & 1) == 0) val = (int) val; #endif - do_cmp_signed(regs, val, imm, rd >> 2); - goto instr_done; + do_cmp_signed(regs, op, val, imm, rd >> 2); + return 1; case 12: /* addic */ imm = (short) instr; - add_with_carry(regs, rd, regs->gpr[ra], imm, 0); - goto instr_done; + add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); + return 1; case 13: /* addic. */ imm = (short) instr; - add_with_carry(regs, rd, regs->gpr[ra], imm, 0); - set_cr0(regs, rd); - goto instr_done; + add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); + set_cr0(regs, op, rd); + return 1; case 14: /* addi */ imm = (short) instr; if (ra) imm += regs->gpr[ra]; - regs->gpr[rd] = imm; - goto instr_done; + op->val = imm; + goto compute_done; case 15: /* addis */ imm = ((short) instr) << 16; if (ra) imm += regs->gpr[ra]; - regs->gpr[rd] = imm; - goto instr_done; + op->val = imm; + goto compute_done; case 20: /* rlwimi */ mb = (instr >> 6) & 0x1f; me = (instr >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); imm = MASK32(mb, me); - regs->gpr[ra] = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm); + op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm); goto logical_done; case 21: /* rlwinm */ mb = (instr >> 6) & 0x1f; me = (instr >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); - regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me); + op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 23: /* rlwnm */ @@ -932,40 +953,37 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, me = (instr >> 1) & 0x1f; rb = regs->gpr[rb] & 0x1f; val = DATA32(regs->gpr[rd]); - regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me); + op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 24: /* ori */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] | imm; - goto instr_done; + op->val = regs->gpr[rd] | (unsigned short) instr; + goto logical_done_nocc; case 25: /* oris */ imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] | (imm << 16); - goto instr_done; + op->val = regs->gpr[rd] | (imm << 16); + goto logical_done_nocc; case 26: /* xori */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] ^ imm; - goto instr_done; + op->val = regs->gpr[rd] ^ (unsigned short) instr; + goto logical_done_nocc; case 27: /* xoris */ imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] ^ (imm << 16); - goto instr_done; + op->val = regs->gpr[rd] ^ (imm << 16); + goto logical_done_nocc; case 28: /* andi. */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] & imm; - set_cr0(regs, ra); - goto instr_done; + op->val = regs->gpr[rd] & (unsigned short) instr; + set_cr0(regs, op, ra); + goto logical_done_nocc; case 29: /* andis. */ imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] & (imm << 16); - set_cr0(regs, ra); - goto instr_done; + op->val = regs->gpr[rd] & (imm << 16); + set_cr0(regs, op, ra); + goto logical_done_nocc; #ifdef __powerpc64__ case 30: /* rld* */ @@ -976,34 +994,36 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, val = ROTATE(val, sh); switch ((instr >> 2) & 3) { case 0: /* rldicl */ - regs->gpr[ra] = val & MASK64_L(mb); - goto logical_done; + val &= MASK64_L(mb); + break; case 1: /* rldicr */ - regs->gpr[ra] = val & MASK64_R(mb); - goto logical_done; + val &= MASK64_R(mb); + break; case 2: /* rldic */ - regs->gpr[ra] = val & MASK64(mb, 63 - sh); - goto logical_done; + val &= MASK64(mb, 63 - sh); + break; case 3: /* rldimi */ imm = MASK64(mb, 63 - sh); - regs->gpr[ra] = (regs->gpr[ra] & ~imm) | + val = (regs->gpr[ra] & ~imm) | (val & imm); - goto logical_done; } + op->val = val; + goto logical_done; } else { sh = regs->gpr[rb] & 0x3f; val = ROTATE(val, sh); switch ((instr >> 1) & 7) { case 0: /* rldcl */ - regs->gpr[ra] = val & MASK64_L(mb); + op->val = val & MASK64_L(mb); goto logical_done; case 1: /* rldcr */ - regs->gpr[ra] = val & MASK64_R(mb); + op->val = val & MASK64_R(mb); goto logical_done; } } #endif - break; /* illegal instruction */ + op->type = UNKNOWN; /* illegal instruction */ + return 0; case 31: switch ((instr >> 1) & 0x3ff) { @@ -1012,12 +1032,12 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, (rd & trap_compare((int)regs->gpr[ra], (int)regs->gpr[rb]))) goto trap; - goto instr_done; + return 1; #ifdef __powerpc64__ case 68: /* td */ if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb])) goto trap; - goto instr_done; + return 1; #endif case 83: /* mfmsr */ if (regs->msr & MSR_PR) @@ -1046,74 +1066,50 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, #endif case 19: /* mfcr */ + imm = 0xffffffffUL; if ((instr >> 20) & 1) { imm = 0xf0000000UL; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) { - regs->gpr[rd] = regs->ccr & imm; + if (instr & (0x80000 >> sh)) break; - } imm >>= 4; } - - goto instr_done; } - - regs->gpr[rd] = regs->ccr; - regs->gpr[rd] &= 0xffffffffUL; - goto instr_done; + op->val = regs->ccr & imm; + goto compute_done; case 144: /* mtcrf */ + op->type = COMPUTE + SETCC; imm = 0xf0000000UL; val = regs->gpr[rd]; + op->val = regs->ccr; for (sh = 0; sh < 8; ++sh) { if (instr & (0x80000 >> sh)) - regs->ccr = (regs->ccr & ~imm) | + op->val = (op->val & ~imm) | (val & imm); imm >>= 4; } - goto instr_done; + return 1; case 339: /* mfspr */ spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); - switch (spr) { - case SPRN_XER: /* mfxer */ - regs->gpr[rd] = regs->xer; - regs->gpr[rd] &= 0xffffffffUL; - goto instr_done; - case SPRN_LR: /* mflr */ - regs->gpr[rd] = regs->link; - goto instr_done; - case SPRN_CTR: /* mfctr */ - regs->gpr[rd] = regs->ctr; - goto instr_done; - default: - op->type = MFSPR; - op->reg = rd; - op->spr = spr; - return 0; - } - break; + op->type = MFSPR; + op->reg = rd; + op->spr = spr; + if (spr == SPRN_XER || spr == SPRN_LR || + spr == SPRN_CTR) + return 1; + return 0; case 467: /* mtspr */ spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); - switch (spr) { - case SPRN_XER: /* mtxer */ - regs->xer = (regs->gpr[rd] & 0xffffffffUL); - goto instr_done; - case SPRN_LR: /* mtlr */ - regs->link = regs->gpr[rd]; - goto instr_done; - case SPRN_CTR: /* mtctr */ - regs->ctr = regs->gpr[rd]; - goto instr_done; - default: - op->type = MTSPR; - op->val = regs->gpr[rd]; - op->spr = spr; - return 0; - } - break; + op->type = MTSPR; + op->val = regs->gpr[rd]; + op->spr = spr; + if (spr == SPRN_XER || spr == SPRN_LR || + spr == SPRN_CTR) + return 1; + return 0; /* * Compare instructions @@ -1128,8 +1124,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, val2 = (int) val2; } #endif - do_cmp_signed(regs, val, val2, rd >> 2); - goto instr_done; + do_cmp_signed(regs, op, val, val2, rd >> 2); + return 1; case 32: /* cmpl */ val = regs->gpr[ra]; @@ -1141,113 +1137,113 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, val2 = (unsigned int) val2; } #endif - do_cmp_unsigned(regs, val, val2, rd >> 2); - goto instr_done; + do_cmp_unsigned(regs, op, val, val2, rd >> 2); + return 1; case 508: /* cmpb */ - do_cmpb(regs, regs->gpr[rd], regs->gpr[rb], ra); - goto instr_done; + do_cmpb(regs, op, regs->gpr[rd], regs->gpr[rb]); + goto logical_done_nocc; /* * Arithmetic instructions */ case 8: /* subfc */ - add_with_carry(regs, rd, ~regs->gpr[ra], + add_with_carry(regs, op, rd, ~regs->gpr[ra], regs->gpr[rb], 1); goto arith_done; #ifdef __powerpc64__ case 9: /* mulhdu */ - asm("mulhdu %0,%1,%2" : "=r" (regs->gpr[rd]) : + asm("mulhdu %0,%1,%2" : "=r" (op->val) : "r" (regs->gpr[ra]), "r" (regs->gpr[rb])); goto arith_done; #endif case 10: /* addc */ - add_with_carry(regs, rd, regs->gpr[ra], + add_with_carry(regs, op, rd, regs->gpr[ra], regs->gpr[rb], 0); goto arith_done; case 11: /* mulhwu */ - asm("mulhwu %0,%1,%2" : "=r" (regs->gpr[rd]) : + asm("mulhwu %0,%1,%2" : "=r" (op->val) : "r" (regs->gpr[ra]), "r" (regs->gpr[rb])); goto arith_done; case 40: /* subf */ - regs->gpr[rd] = regs->gpr[rb] - regs->gpr[ra]; + op->val = regs->gpr[rb] - regs->gpr[ra]; goto arith_done; #ifdef __powerpc64__ case 73: /* mulhd */ - asm("mulhd %0,%1,%2" : "=r" (regs->gpr[rd]) : + asm("mulhd %0,%1,%2" : "=r" (op->val) : "r" (regs->gpr[ra]), "r" (regs->gpr[rb])); goto arith_done; #endif case 75: /* mulhw */ - asm("mulhw %0,%1,%2" : "=r" (regs->gpr[rd]) : + asm("mulhw %0,%1,%2" : "=r" (op->val) : "r" (regs->gpr[ra]), "r" (regs->gpr[rb])); goto arith_done; case 104: /* neg */ - regs->gpr[rd] = -regs->gpr[ra]; + op->val = -regs->gpr[ra]; goto arith_done; case 136: /* subfe */ - add_with_carry(regs, rd, ~regs->gpr[ra], regs->gpr[rb], - regs->xer & XER_CA); + add_with_carry(regs, op, rd, ~regs->gpr[ra], + regs->gpr[rb], regs->xer & XER_CA); goto arith_done; case 138: /* adde */ - add_with_carry(regs, rd, regs->gpr[ra], regs->gpr[rb], - regs->xer & XER_CA); + add_with_carry(regs, op, rd, regs->gpr[ra], + regs->gpr[rb], regs->xer & XER_CA); goto arith_done; case 200: /* subfze */ - add_with_carry(regs, rd, ~regs->gpr[ra], 0L, + add_with_carry(regs, op, rd, ~regs->gpr[ra], 0L, regs->xer & XER_CA); goto arith_done; case 202: /* addze */ - add_with_carry(regs, rd, regs->gpr[ra], 0L, + add_with_carry(regs, op, rd, regs->gpr[ra], 0L, regs->xer & XER_CA); goto arith_done; case 232: /* subfme */ - add_with_carry(regs, rd, ~regs->gpr[ra], -1L, + add_with_carry(regs, op, rd, ~regs->gpr[ra], -1L, regs->xer & XER_CA); goto arith_done; #ifdef __powerpc64__ case 233: /* mulld */ - regs->gpr[rd] = regs->gpr[ra] * regs->gpr[rb]; + op->val = regs->gpr[ra] * regs->gpr[rb]; goto arith_done; #endif case 234: /* addme */ - add_with_carry(regs, rd, regs->gpr[ra], -1L, + add_with_carry(regs, op, rd, regs->gpr[ra], -1L, regs->xer & XER_CA); goto arith_done; case 235: /* mullw */ - regs->gpr[rd] = (unsigned int) regs->gpr[ra] * + op->val = (unsigned int) regs->gpr[ra] * (unsigned int) regs->gpr[rb]; goto arith_done; case 266: /* add */ - regs->gpr[rd] = regs->gpr[ra] + regs->gpr[rb]; + op->val = regs->gpr[ra] + regs->gpr[rb]; goto arith_done; #ifdef __powerpc64__ case 457: /* divdu */ - regs->gpr[rd] = regs->gpr[ra] / regs->gpr[rb]; + op->val = regs->gpr[ra] / regs->gpr[rb]; goto arith_done; #endif case 459: /* divwu */ - regs->gpr[rd] = (unsigned int) regs->gpr[ra] / + op->val = (unsigned int) regs->gpr[ra] / (unsigned int) regs->gpr[rb]; goto arith_done; #ifdef __powerpc64__ case 489: /* divd */ - regs->gpr[rd] = (long int) regs->gpr[ra] / + op->val = (long int) regs->gpr[ra] / (long int) regs->gpr[rb]; goto arith_done; #endif case 491: /* divw */ - regs->gpr[rd] = (int) regs->gpr[ra] / + op->val = (int) regs->gpr[ra] / (int) regs->gpr[rb]; goto arith_done; @@ -1260,85 +1256,83 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, val = (regs->ccr >> (31 - mb)) & 1; val2 = (ra) ? regs->gpr[ra] : 0; - regs->gpr[rd] = (val) ? val2 : regs->gpr[rb]; - goto logical_done; + op->val = (val) ? val2 : regs->gpr[rb]; + goto compute_done; case 26: /* cntlzw */ - asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) : - "r" (regs->gpr[rd])); + op->val = __builtin_clz((unsigned int) regs->gpr[rd]); goto logical_done; #ifdef __powerpc64__ case 58: /* cntlzd */ - asm("cntlzd %0,%1" : "=r" (regs->gpr[ra]) : - "r" (regs->gpr[rd])); + op->val = __builtin_clzl(regs->gpr[rd]); goto logical_done; #endif case 28: /* and */ - regs->gpr[ra] = regs->gpr[rd] & regs->gpr[rb]; + op->val = regs->gpr[rd] & regs->gpr[rb]; goto logical_done; case 60: /* andc */ - regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb]; + op->val = regs->gpr[rd] & ~regs->gpr[rb]; goto logical_done; case 122: /* popcntb */ - do_popcnt(regs, regs->gpr[rd], 8, ra); + do_popcnt(regs, op, regs->gpr[rd], 8); goto logical_done; case 124: /* nor */ - regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]); + op->val = ~(regs->gpr[rd] | regs->gpr[rb]); goto logical_done; case 154: /* prtyw */ - do_prty(regs, regs->gpr[rd], 32, ra); + do_prty(regs, op, regs->gpr[rd], 32); goto logical_done; case 186: /* prtyd */ - do_prty(regs, regs->gpr[rd], 64, ra); + do_prty(regs, op, regs->gpr[rd], 64); goto logical_done; #ifdef CONFIG_PPC64 case 252: /* bpermd */ - do_bpermd(regs, regs->gpr[rd], regs->gpr[rb], ra); + do_bpermd(regs, op, regs->gpr[rd], regs->gpr[rb]); goto logical_done; #endif case 284: /* xor */ - regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]); + op->val = ~(regs->gpr[rd] ^ regs->gpr[rb]); goto logical_done; case 316: /* xor */ - regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb]; + op->val = regs->gpr[rd] ^ regs->gpr[rb]; goto logical_done; case 378: /* popcntw */ - do_popcnt(regs, regs->gpr[rd], 32, ra); + do_popcnt(regs, op, regs->gpr[rd], 32); goto logical_done; case 412: /* orc */ - regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb]; + op->val = regs->gpr[rd] | ~regs->gpr[rb]; goto logical_done; case 444: /* or */ - regs->gpr[ra] = regs->gpr[rd] | regs->gpr[rb]; + op->val = regs->gpr[rd] | regs->gpr[rb]; goto logical_done; case 476: /* nand */ - regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]); + op->val = ~(regs->gpr[rd] & regs->gpr[rb]); goto logical_done; #ifdef CONFIG_PPC64 case 506: /* popcntd */ - do_popcnt(regs, regs->gpr[rd], 64, ra); + do_popcnt(regs, op, regs->gpr[rd], 64); goto logical_done; #endif case 922: /* extsh */ - regs->gpr[ra] = (signed short) regs->gpr[rd]; + op->val = (signed short) regs->gpr[rd]; goto logical_done; case 954: /* extsb */ - regs->gpr[ra] = (signed char) regs->gpr[rd]; + op->val = (signed char) regs->gpr[rd]; goto logical_done; #ifdef __powerpc64__ case 986: /* extsw */ - regs->gpr[ra] = (signed int) regs->gpr[rd]; + op->val = (signed int) regs->gpr[rd]; goto logical_done; #endif @@ -1348,75 +1342,83 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 24: /* slw */ sh = regs->gpr[rb] & 0x3f; if (sh < 32) - regs->gpr[ra] = (regs->gpr[rd] << sh) & 0xffffffffUL; + op->val = (regs->gpr[rd] << sh) & 0xffffffffUL; else - regs->gpr[ra] = 0; + op->val = 0; goto logical_done; case 536: /* srw */ sh = regs->gpr[rb] & 0x3f; if (sh < 32) - regs->gpr[ra] = (regs->gpr[rd] & 0xffffffffUL) >> sh; + op->val = (regs->gpr[rd] & 0xffffffffUL) >> sh; else - regs->gpr[ra] = 0; + op->val = 0; goto logical_done; case 792: /* sraw */ + op->type = COMPUTE + SETREG + SETXER; sh = regs->gpr[rb] & 0x3f; ival = (signed int) regs->gpr[rd]; - regs->gpr[ra] = ival >> (sh < 32 ? sh : 31); + op->val = ival >> (sh < 32 ? sh : 31); + op->xerval = regs->xer; if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0)) - regs->xer |= XER_CA; + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; goto logical_done; case 824: /* srawi */ + op->type = COMPUTE + SETREG + SETXER; sh = rb; ival = (signed int) regs->gpr[rd]; - regs->gpr[ra] = ival >> sh; + op->val = ival >> sh; + op->xerval = regs->xer; if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) - regs->xer |= XER_CA; + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; goto logical_done; #ifdef __powerpc64__ case 27: /* sld */ sh = regs->gpr[rb] & 0x7f; if (sh < 64) - regs->gpr[ra] = regs->gpr[rd] << sh; + op->val = regs->gpr[rd] << sh; else - regs->gpr[ra] = 0; + op->val = 0; goto logical_done; case 539: /* srd */ sh = regs->gpr[rb] & 0x7f; if (sh < 64) - regs->gpr[ra] = regs->gpr[rd] >> sh; + op->val = regs->gpr[rd] >> sh; else - regs->gpr[ra] = 0; + op->val = 0; goto logical_done; case 794: /* srad */ + op->type = COMPUTE + SETREG + SETXER; sh = regs->gpr[rb] & 0x7f; ival = (signed long int) regs->gpr[rd]; - regs->gpr[ra] = ival >> (sh < 64 ? sh : 63); + op->val = ival >> (sh < 64 ? sh : 63); + op->xerval = regs->xer; if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0)) - regs->xer |= XER_CA; + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; goto logical_done; case 826: /* sradi with sh_5 = 0 */ case 827: /* sradi with sh_5 = 1 */ + op->type = COMPUTE + SETREG + SETXER; sh = rb | ((instr & 2) << 4); ival = (signed long int) regs->gpr[rd]; - regs->gpr[ra] = ival >> sh; + op->val = ival >> sh; + op->xerval = regs->xer; if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) - regs->xer |= XER_CA; + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; goto logical_done; #endif /* __powerpc64__ */ @@ -1787,15 +1789,18 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, logical_done: if (instr & 1) - set_cr0(regs, ra); - goto instr_done; + set_cr0(regs, op, ra); + logical_done_nocc: + op->reg = ra; + op->type |= SETREG; + return 1; arith_done: if (instr & 1) - set_cr0(regs, rd); - - instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + set_cr0(regs, op, rd); + compute_done: + op->reg = rd; + op->type |= SETREG; return 1; priv: @@ -1886,6 +1891,92 @@ static nokprobe_inline void do_byterev(unsigned long *valp, int size) } } +/* + * Emulate an instruction that can be executed just by updating + * fields in *regs. + */ +void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) +{ + unsigned long next_pc; + + next_pc = truncate_if_32bit(regs->msr, regs->nip + 4); + switch (op->type & INSTR_TYPE_MASK) { + case COMPUTE: + if (op->type & SETREG) + regs->gpr[op->reg] = op->val; + if (op->type & SETCC) + regs->ccr = op->ccval; + if (op->type & SETXER) + regs->xer = op->xerval; + break; + + case BRANCH: + if (op->type & SETLK) + regs->link = next_pc; + if (op->type & BRTAKEN) + next_pc = op->val; + if (op->type & DECCTR) + --regs->ctr; + break; + + case BARRIER: + switch (op->type & BARRIER_MASK) { + case BARRIER_SYNC: + mb(); + break; + case BARRIER_ISYNC: + isync(); + break; + case BARRIER_EIEIO: + eieio(); + break; + case BARRIER_LWSYNC: + asm volatile("lwsync" : : : "memory"); + break; + case BARRIER_PTESYNC: + asm volatile("ptesync" : : : "memory"); + break; + } + break; + + case MFSPR: + switch (op->spr) { + case SPRN_XER: + regs->gpr[op->reg] = regs->xer & 0xffffffffUL; + break; + case SPRN_LR: + regs->gpr[op->reg] = regs->link; + break; + case SPRN_CTR: + regs->gpr[op->reg] = regs->ctr; + break; + default: + WARN_ON_ONCE(1); + } + break; + + case MTSPR: + switch (op->spr) { + case SPRN_XER: + regs->xer = op->val & 0xffffffffUL; + break; + case SPRN_LR: + regs->link = op->val; + break; + case SPRN_CTR: + regs->ctr = op->val; + break; + default: + WARN_ON_ONCE(1); + } + break; + + default: + WARN_ON_ONCE(1); + } + regs->nip = next_pc; +} + /* * Emulate instructions that cause a transfer of control, * loads and stores, and a few other instructions. @@ -1902,8 +1993,12 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) int i, rd, nb; r = analyse_instr(&op, regs, instr); - if (r != 0) + if (r < 0) return r; + if (r > 0) { + emulate_update_regs(regs, &op); + return 1; + } err = 0; size = GETSIZE(op.type); -- cgit v1.2.3-58-ga151 From 350779a29f11f80ac66a8b38a7718ad30f003f18 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:27 +1000 Subject: powerpc: Handle most loads and stores in instruction emulation code This extends the instruction emulation infrastructure in sstep.c to handle all the load and store instructions defined in the Power ISA v3.0, except for the atomic memory operations, ldmx (which was never implemented), lfdp/stfdp, and the vector element load/stores. The instructions added are: Integer loads and stores: lbarx, lharx, lqarx, stbcx., sthcx., stqcx., lq, stq. VSX loads and stores: lxsiwzx, lxsiwax, stxsiwx, lxvx, lxvl, lxvll, lxvdsx, lxvwsx, stxvx, stxvl, stxvll, lxsspx, lxsdx, stxsspx, stxsdx, lxvw4x, lxsibzx, lxvh8x, lxsihzx, lxvb16x, stxvw4x, stxsibx, stxvh8x, stxsihx, stxvb16x, lxsd, lxssp, lxv, stxsd, stxssp, stxv. These instructions are handled both in the analyse_instr phase and in the emulate_step phase. The code for lxvd2ux and stxvd2ux has been taken out, as those instructions were never implemented in any processor and have been taken out of the architecture, and their opcodes have been reused for other instructions in POWER9 (lxvb16x and stxvb16x). The emulation for the VSX loads and stores uses helper functions which don't access registers or memory directly, which can hopefully be reused by KVM later. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 8 + arch/powerpc/include/asm/sstep.h | 21 ++ arch/powerpc/lib/Makefile | 1 + arch/powerpc/lib/ldstfp.S | 70 ++-- arch/powerpc/lib/quad.S | 62 ++++ arch/powerpc/lib/sstep.c | 610 +++++++++++++++++++++++++++++++--- 6 files changed, 710 insertions(+), 62 deletions(-) create mode 100644 arch/powerpc/lib/quad.S (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 3dd8a6e43bb2..ce0930d68857 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -205,6 +205,8 @@ #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 #define PPC_INST_STDCX 0x7c0001ad +#define PPC_INST_LQARX 0x7c000228 +#define PPC_INST_STQCX 0x7c00016d #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000028 @@ -403,12 +405,18 @@ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \ + ___PPC_RT(t) | ___PPC_RA(a) | \ + ___PPC_RB(b) | __PPC_EH(eh)) #define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \ + ___PPC_RT(t) | ___PPC_RA(a) | \ + ___PPC_RB(b)) #define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ ___PPC_RB(b)) #define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 442e6363eb5a..980197024c0b 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -83,6 +83,12 @@ enum instruction_type { #define DCBT 0x300 #define ICBI 0x400 +/* VSX flags values */ +#define VSX_FPCONV 1 /* do floating point SP/DP conversion */ +#define VSX_SPLAT 2 /* store loaded value into all elements */ +#define VSX_LDLEFT 4 /* load VSX register from left */ +#define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */ + /* Size field in type word */ #define SIZE(n) ((n) << 8) #define GETSIZE(w) ((w) >> 8) @@ -100,6 +106,17 @@ struct instruction_op { int spr; u32 ccval; u32 xerval; + u8 element_size; /* for VSX/VMX loads/stores */ + u8 vsx_flags; +}; + +union vsx_reg { + u8 b[16]; + u16 h[8]; + u32 w[4]; + unsigned long d[2]; + float fp[4]; + double dp[2]; }; /* @@ -131,3 +148,7 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); */ extern int emulate_step(struct pt_regs *regs, unsigned int instr); +extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, + const void *mem); +extern void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, + void *mem); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 3c3146ba62da..400778d7accc 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -32,6 +32,7 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o obj-y += checksum_$(BITS).o checksum_wrappers.o obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o +obj64-$(CONFIG_PPC_EMULATE_SSTEP) += quad.o obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index a58777c1b2cb..6840911e7a01 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -178,10 +178,10 @@ _GLOBAL(do_stfd) EX_TABLE(2b,3b) #ifdef CONFIG_ALTIVEC -/* Get the contents of vrN into v0; N is in r3. */ +/* Get the contents of vrN into v0; N is in r3. Doesn't touch r3 or r4. */ _GLOBAL(get_vr) mflr r0 - rlwinm r3,r3,3,0xf8 + rlwinm r6,r3,3,0xf8 bcl 20,31,1f blr /* v0 is already in v0 */ nop @@ -192,15 +192,15 @@ reg = 1 reg = reg + 1 .endr 1: mflr r5 - add r5,r3,r5 + add r5,r6,r5 mtctr r5 mtlr r0 bctr -/* Put the contents of v0 into vrN; N is in r3. */ +/* Put the contents of v0 into vrN; N is in r3. Doesn't touch r3 or r4. */ _GLOBAL(put_vr) mflr r0 - rlwinm r3,r3,3,0xf8 + rlwinm r6,r3,3,0xf8 bcl 20,31,1f blr /* v0 is already in v0 */ nop @@ -211,7 +211,7 @@ reg = 1 reg = reg + 1 .endr 1: mflr r5 - add r5,r3,r5 + add r5,r6,r5 mtctr r5 mtlr r0 bctr @@ -313,7 +313,7 @@ reg = reg + 1 bctr /* Load VSX reg N from vector doubleword *p. N is in r3, p in r4. */ -_GLOBAL(do_lxvd2x) +_GLOBAL(load_vsrn) PPC_STLU r1,-STKFRM(r1) mflr r0 PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) @@ -325,41 +325,38 @@ _GLOBAL(do_lxvd2x) isync beq cr7,1f STXVD2X(0,R1,R8) -1: li r9,-EFAULT -2: LXVD2X(0,R0,R4) - li r9,0 -3: beq cr7,4f +1: LXVD2X(0,R0,R4) +#ifdef __LITTLE_ENDIAN__ + XXSWAPD(0,0) +#endif + beq cr7,4f bl put_vsr LXVD2X(0,R1,R8) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 MTMSRD(r6) isync - mr r3,r9 addi r1,r1,STKFRM blr - EX_TABLE(2b,3b) /* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */ -_GLOBAL(do_stxvd2x) +_GLOBAL(store_vsrn) PPC_STLU r1,-STKFRM(r1) mflr r0 PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) mfmsr r6 oris r7,r6,MSR_VSX@h - cmpwi cr7,r3,0 li r8,STKFRM-16 MTMSRD(r7) isync - beq cr7,1f STXVD2X(0,R1,R8) bl get_vsr -1: li r9,-EFAULT -2: STXVD2X(0,R0,R4) - li r9,0 -3: beq cr7,4f +#ifdef __LITTLE_ENDIAN__ + XXSWAPD(0,0) +#endif + STXVD2X(0,R0,R4) LXVD2X(0,R1,R8) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) + PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 MTMSRD(r6) isync @@ -367,7 +364,36 @@ _GLOBAL(do_stxvd2x) addi r1,r1,STKFRM blr EX_TABLE(2b,3b) - #endif /* CONFIG_VSX */ +/* Convert single-precision to double, without disturbing FPRs. */ +/* conv_sp_to_dp(float *sp, double *dp) */ +_GLOBAL(conv_sp_to_dp) + mfmsr r6 + ori r7, r6, MSR_FP + MTMSRD(r7) + isync + stfd fr0, -16(r1) + lfs fr0, 0(r3) + stfd fr0, 0(r4) + lfd fr0, -16(r1) + MTMSRD(r6) + isync + blr + +/* Convert single-precision to double, without disturbing FPRs. */ +/* conv_sp_to_dp(double *dp, float *sp) */ +_GLOBAL(conv_dp_to_sp) + mfmsr r6 + ori r7, r6, MSR_FP + MTMSRD(r7) + isync + stfd fr0, -16(r1) + lfd fr0, 0(r3) + stfs fr0, 0(r4) + lfd fr0, -16(r1) + MTMSRD(r6) + isync + blr + #endif /* CONFIG_PPC_FPU */ diff --git a/arch/powerpc/lib/quad.S b/arch/powerpc/lib/quad.S new file mode 100644 index 000000000000..c4d12fae8724 --- /dev/null +++ b/arch/powerpc/lib/quad.S @@ -0,0 +1,62 @@ +/* + * Quadword loads and stores + * for use in instruction emulation. + * + * Copyright 2017 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +/* do_lq(unsigned long ea, unsigned long *regs) */ +_GLOBAL(do_lq) +1: lq r6, 0(r3) + std r6, 0(r4) + std r7, 8(r4) + li r3, 0 + blr +2: li r3, -EFAULT + blr + EX_TABLE(1b, 2b) + +/* do_stq(unsigned long ea, unsigned long val0, unsigned long val1) */ +_GLOBAL(do_stq) +1: stq r4, 0(r3) + li r3, 0 + blr +2: li r3, -EFAULT + blr + EX_TABLE(1b, 2b) + +/* do_lqarx(unsigned long ea, unsigned long *regs) */ +_GLOBAL(do_lqarx) +1: PPC_LQARX(6, 0, 3, 0) + std r6, 0(r4) + std r7, 8(r4) + li r3, 0 + blr +2: li r3, -EFAULT + blr + EX_TABLE(1b, 2b) + +/* do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, + unsigned int *crp) */ + +_GLOBAL(do_stqcx) +1: PPC_STQCX(4, 0, 3) + mfcr r5 + stw r5, 0(r6) + li r3, 0 + blr +2: li r3, -EFAULT + blr + EX_TABLE(1b, 2b) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index b80ccff23657..b1ba74117ad5 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -42,8 +42,29 @@ extern int do_stfs(int rn, unsigned long ea); extern int do_stfd(int rn, unsigned long ea); extern int do_lvx(int rn, unsigned long ea); extern int do_stvx(int rn, unsigned long ea); -extern int do_lxvd2x(int rn, unsigned long ea); -extern int do_stxvd2x(int rn, unsigned long ea); +extern void load_vsrn(int vsr, const void *p); +extern void store_vsrn(int vsr, void *p); +extern void conv_sp_to_dp(const float *sp, double *dp); +extern void conv_dp_to_sp(const double *dp, float *sp); +#endif + +#ifdef __powerpc64__ +/* + * Functions in quad.S + */ +extern int do_lq(unsigned long ea, unsigned long *regs); +extern int do_stq(unsigned long ea, unsigned long val0, unsigned long val1); +extern int do_lqarx(unsigned long ea, unsigned long *regs); +extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, + unsigned int *crp); +#endif + +#ifdef __LITTLE_ENDIAN__ +#define IS_LE 1 +#define IS_BE 0 +#else +#define IS_LE 0 +#define IS_BE 1 #endif /* @@ -125,6 +146,23 @@ static nokprobe_inline unsigned long dsform_ea(unsigned int instr, return truncate_if_32bit(regs->msr, ea); } + +/* + * Calculate effective address for a DQ-form instruction + */ +static nokprobe_inline unsigned long dqform_ea(unsigned int instr, + const struct pt_regs *regs) +{ + int ra; + unsigned long ea; + + ra = (instr >> 16) & 0x1f; + ea = (signed short) (instr & ~0xf); /* sign-extend */ + if (ra) + ea += regs->gpr[ra]; + + return truncate_if_32bit(regs->msr, ea); +} #endif /* __powerpc64 */ /* @@ -454,43 +492,195 @@ static nokprobe_inline int do_vec_store(int rn, int (*func)(int, unsigned long), } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX -static nokprobe_inline int do_vsx_load(int rn, int (*func)(int, unsigned long), - unsigned long ea, struct pt_regs *regs) +#ifdef __powerpc64__ +static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea, + int reg) { int err; - unsigned long val[2]; if (!address_ok(regs, ea, 16)) return -EFAULT; - if ((ea & 3) == 0) - return (*func)(rn, ea); - err = read_mem_unaligned(&val[0], ea, 8, regs); - if (!err) - err = read_mem_unaligned(&val[1], ea + 8, 8, regs); + /* if aligned, should be atomic */ + if ((ea & 0xf) == 0) + return do_lq(ea, ®s->gpr[reg]); + + err = read_mem(®s->gpr[reg + IS_LE], ea, 8, regs); if (!err) - err = (*func)(rn, (unsigned long) &val[0]); + err = read_mem(®s->gpr[reg + IS_BE], ea + 8, 8, regs); return err; } -static nokprobe_inline int do_vsx_store(int rn, int (*func)(int, unsigned long), - unsigned long ea, struct pt_regs *regs) +static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea, + int reg) { int err; - unsigned long val[2]; if (!address_ok(regs, ea, 16)) return -EFAULT; - if ((ea & 3) == 0) - return (*func)(rn, ea); - err = (*func)(rn, (unsigned long) &val[0]); - if (err) - return err; - err = write_mem_unaligned(val[0], ea, 8, regs); + /* if aligned, should be atomic */ + if ((ea & 0xf) == 0) + return do_stq(ea, regs->gpr[reg], regs->gpr[reg + 1]); + + err = write_mem(regs->gpr[reg + IS_LE], ea, 8, regs); if (!err) - err = write_mem_unaligned(val[1], ea + 8, 8, regs); + err = write_mem(regs->gpr[reg + IS_BE], ea + 8, 8, regs); return err; } +#endif /* __powerpc64 */ + +#ifdef CONFIG_VSX +void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, + const void *mem) +{ + int size, read_size; + int i, j; + const unsigned int *wp; + const unsigned short *hp; + const unsigned char *bp; + + size = GETSIZE(op->type); + reg->d[0] = reg->d[1] = 0; + + switch (op->element_size) { + case 16: + /* whole vector; lxv[x] or lxvl[l] */ + if (size == 0) + break; + memcpy(reg, mem, size); + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) { + /* reverse 16 bytes */ + unsigned long tmp; + tmp = byterev_8(reg->d[0]); + reg->d[0] = byterev_8(reg->d[1]); + reg->d[1] = tmp; + } + break; + case 8: + /* scalar loads, lxvd2x, lxvdsx */ + read_size = (size >= 8) ? 8 : size; + i = IS_LE ? 8 : 8 - read_size; + memcpy(®->b[i], mem, read_size); + if (size < 8) { + if (op->type & SIGNEXT) { + /* size == 4 is the only case here */ + reg->d[IS_LE] = (signed int) reg->d[IS_LE]; + } else if (op->vsx_flags & VSX_FPCONV) { + preempt_disable(); + conv_sp_to_dp(®->fp[1 + IS_LE], + ®->dp[IS_LE]); + preempt_enable(); + } + } else { + if (size == 16) + reg->d[IS_BE] = *(unsigned long *)(mem + 8); + else if (op->vsx_flags & VSX_SPLAT) + reg->d[IS_BE] = reg->d[IS_LE]; + } + break; + case 4: + /* lxvw4x, lxvwsx */ + wp = mem; + for (j = 0; j < size / 4; ++j) { + i = IS_LE ? 3 - j : j; + reg->w[i] = *wp++; + } + if (op->vsx_flags & VSX_SPLAT) { + u32 val = reg->w[IS_LE ? 3 : 0]; + for (; j < 4; ++j) { + i = IS_LE ? 3 - j : j; + reg->w[i] = val; + } + } + break; + case 2: + /* lxvh8x */ + hp = mem; + for (j = 0; j < size / 2; ++j) { + i = IS_LE ? 7 - j : j; + reg->h[i] = *hp++; + } + break; + case 1: + /* lxvb16x */ + bp = mem; + for (j = 0; j < size; ++j) { + i = IS_LE ? 15 - j : j; + reg->b[i] = *bp++; + } + break; + } +} +EXPORT_SYMBOL_GPL(emulate_vsx_load); +NOKPROBE_SYMBOL(emulate_vsx_load); + +void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, + void *mem) +{ + int size, write_size; + int i, j; + union vsx_reg buf; + unsigned int *wp; + unsigned short *hp; + unsigned char *bp; + + size = GETSIZE(op->type); + + switch (op->element_size) { + case 16: + /* stxv, stxvx, stxvl, stxvll */ + if (size == 0) + break; + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) { + /* reverse 16 bytes */ + buf.d[0] = byterev_8(reg->d[1]); + buf.d[1] = byterev_8(reg->d[0]); + reg = &buf; + } + memcpy(mem, reg, size); + break; + case 8: + /* scalar stores, stxvd2x */ + write_size = (size >= 8) ? 8 : size; + i = IS_LE ? 8 : 8 - write_size; + if (size < 8 && op->vsx_flags & VSX_FPCONV) { + buf.d[0] = buf.d[1] = 0; + preempt_disable(); + conv_dp_to_sp(®->dp[IS_LE], &buf.fp[1 + IS_LE]); + preempt_enable(); + reg = &buf; + } + memcpy(mem, ®->b[i], write_size); + if (size == 16) + memcpy(mem + 8, ®->d[IS_BE], 8); + break; + case 4: + /* stxvw4x */ + wp = mem; + for (j = 0; j < size / 4; ++j) { + i = IS_LE ? 3 - j : j; + *wp++ = reg->w[i]; + } + break; + case 2: + /* stxvh8x */ + hp = mem; + for (j = 0; j < size / 2; ++j) { + i = IS_LE ? 7 - j : j; + *hp++ = reg->h[i]; + } + break; + case 1: + /* stvxb16x */ + bp = mem; + for (j = 0; j < size; ++j) { + i = IS_LE ? 15 - j : j; + *bp++ = reg->b[i]; + } + break; + } +} +EXPORT_SYMBOL_GPL(emulate_vsx_store); +NOKPROBE_SYMBOL(emulate_vsx_store); #endif /* CONFIG_VSX */ #define __put_user_asmx(x, addr, err, op, cr) \ @@ -1455,14 +1645,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } - /* - * Loads and stores. - */ +/* + * Loads and stores. + */ op->type = UNKNOWN; op->update_reg = ra; op->reg = rd; op->val = regs->gpr[rd]; u = (instr >> 20) & UPDATE; + op->vsx_flags = 0; switch (opcode) { case 31: @@ -1486,9 +1677,30 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(STCX, 0, 8); break; - case 21: /* ldx */ - case 53: /* ldux */ - op->type = MKOP(LOAD, u, 8); + case 52: /* lbarx */ + op->type = MKOP(LARX, 0, 1); + break; + + case 694: /* stbcx. */ + op->type = MKOP(STCX, 0, 1); + break; + + case 116: /* lharx */ + op->type = MKOP(LARX, 0, 2); + break; + + case 726: /* sthcx. */ + op->type = MKOP(STCX, 0, 2); + break; + + case 276: /* lqarx */ + if (!((rd & 1) || rd == ra || rd == rb)) + op->type = MKOP(LARX, 0, 16); + break; + + case 182: /* stqcx. */ + if (!(rd & 1)) + op->type = MKOP(STCX, 0, 16); break; #endif @@ -1506,6 +1718,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 103: /* lvx */ case 359: /* lvxl */ op->type = MKOP(LOAD_VMX, 0, 16); + op->element_size = 16; break; case 231: /* stvx */ @@ -1515,6 +1728,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ + case 21: /* ldx */ + case 53: /* ldux */ + op->type = MKOP(LOAD, u, 8); + break; + case 149: /* stdx */ case 181: /* stdux */ op->type = MKOP(STORE, u, 8); @@ -1635,16 +1853,184 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; #ifdef CONFIG_VSX + case 12: /* lxsiwzx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 4); + op->element_size = 8; + break; + + case 76: /* lxsiwax */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, SIGNEXT, 4); + op->element_size = 8; + break; + + case 140: /* stxsiwx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 4); + op->element_size = 8; + break; + + case 268: /* lxvx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 269: /* lxvl */ + case 301: { /* lxvll */ + int nb; + op->reg = rd | ((instr & 1) << 5); + op->ea = ra ? regs->gpr[ra] : 0; + nb = regs->gpr[rb] & 0xff; + if (nb > 16) + nb = 16; + op->type = MKOP(LOAD_VSX, 0, nb); + op->element_size = 16; + op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + VSX_CHECK_VEC; + break; + } + case 332: /* lxvdsx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 8); + op->element_size = 8; + op->vsx_flags = VSX_SPLAT; + break; + + case 364: /* lxvwsx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 4); + op->element_size = 4; + op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC; + break; + + case 396: /* stxvx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 397: /* stxvl */ + case 429: { /* stxvll */ + int nb; + op->reg = rd | ((instr & 1) << 5); + op->ea = ra ? regs->gpr[ra] : 0; + nb = regs->gpr[rb] & 0xff; + if (nb > 16) + nb = 16; + op->type = MKOP(STORE_VSX, 0, nb); + op->element_size = 16; + op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + VSX_CHECK_VEC; + break; + } + case 524: /* lxsspx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV; + break; + + case 588: /* lxsdx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 8); + op->element_size = 8; + break; + + case 652: /* stxsspx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV; + break; + + case 716: /* stxsdx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 8); + op->element_size = 8; + break; + + case 780: /* lxvw4x */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 4; + break; + + case 781: /* lxsibzx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 1); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 812: /* lxvh8x */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 2; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 813: /* lxsihzx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 2); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 844: /* lxvd2x */ - case 876: /* lxvd2ux */ op->reg = rd | ((instr & 1) << 5); - op->type = MKOP(LOAD_VSX, u, 16); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 8; + break; + + case 876: /* lxvb16x */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 1; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 908: /* stxvw4x */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 4; + break; + + case 909: /* stxsibx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 1); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 940: /* stxvh8x */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 2; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 941: /* stxsihx */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 2); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; break; case 972: /* stxvd2x */ - case 1004: /* stxvd2ux */ op->reg = rd | ((instr & 1) << 5); - op->type = MKOP(STORE_VSX, u, 16); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 8; + break; + + case 1004: /* stxvb16x */ + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 1; + op->vsx_flags = VSX_CHECK_VEC; break; #endif /* CONFIG_VSX */ @@ -1731,6 +2117,34 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; #endif +#ifdef __powerpc64__ + case 56: /* lq */ + if (!((rd & 1) || (rd == ra))) + op->type = MKOP(LOAD, 0, 16); + op->ea = dqform_ea(instr, regs); + break; +#endif + +#ifdef CONFIG_VSX + case 57: /* lxsd, lxssp */ + op->ea = dsform_ea(instr, regs); + switch (instr & 3) { + case 2: /* lxsd */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, 0, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 3: /* lxssp */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, 0, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + } + break; +#endif /* CONFIG_VSX */ + #ifdef __powerpc64__ case 58: /* ld[u], lwa */ op->ea = dsform_ea(instr, regs); @@ -1746,7 +2160,51 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } break; +#endif +#ifdef CONFIG_VSX + case 61: /* lxv, stxsd, stxssp, stxv */ + switch (instr & 7) { + case 1: /* lxv */ + op->ea = dqform_ea(instr, regs); + if (instr & 8) + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 2: /* stxsd with LSB of DS field = 0 */ + case 6: /* stxsd with LSB of DS field = 1 */ + op->ea = dsform_ea(instr, regs); + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, 0, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 3: /* stxssp with LSB of DS field = 0 */ + case 7: /* stxssp with LSB of DS field = 1 */ + op->ea = dsform_ea(instr, regs); + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, 0, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + + case 5: /* stxv */ + op->ea = dqform_ea(instr, regs); + if (instr & 8) + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + } + break; +#endif /* CONFIG_VSX */ + +#ifdef __powerpc64__ case 62: /* std[u] */ op->ea = dsform_ea(instr, regs); switch (instr & 3) { @@ -1756,6 +2214,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 1: /* stdu */ op->type = MKOP(STORE, UPDATE, 8); break; + case 2: /* stq */ + if (!(rd & 1)) + op->type = MKOP(STORE, 0, 16); + break; } break; #endif /* __powerpc64__ */ @@ -1994,6 +2456,14 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) return 0; err = 0; switch (size) { +#ifdef __powerpc64__ + case 1: + __get_user_asmx(val, op.ea, err, "lbarx"); + break; + case 2: + __get_user_asmx(val, op.ea, err, "lharx"); + break; +#endif case 4: __get_user_asmx(val, op.ea, err, "lwarx"); break; @@ -2001,6 +2471,9 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case 8: __get_user_asmx(val, op.ea, err, "ldarx"); break; + case 16: + err = do_lqarx(op.ea, ®s->gpr[op.reg]); + goto ldst_done; #endif default: return 0; @@ -2016,6 +2489,14 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) return 0; err = 0; switch (size) { +#ifdef __powerpc64__ + case 1: + __put_user_asmx(op.val, op.ea, err, "stbcx.", cr); + break; + case 2: + __put_user_asmx(op.val, op.ea, err, "stbcx.", cr); + break; +#endif case 4: __put_user_asmx(op.val, op.ea, err, "stwcx.", cr); break; @@ -2023,6 +2504,10 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case 8: __put_user_asmx(op.val, op.ea, err, "stdcx.", cr); break; + case 16: + err = do_stqcx(op.ea, regs->gpr[op.reg], + regs->gpr[op.reg + 1], &cr); + break; #endif default: return 0; @@ -2034,6 +2519,12 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: +#ifdef __powerpc64__ + if (size == 16) { + err = emulate_lq(regs, op.ea, op.reg); + goto ldst_done; + } +#endif err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -2057,15 +2548,31 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case LOAD_VMX: if (!(regs->msr & MSR_VEC)) return 0; - err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); + err = do_vec_load(op.reg, do_lvx, op.ea, regs); goto ldst_done; #endif #ifdef CONFIG_VSX - case LOAD_VSX: - if (!(regs->msr & MSR_VSX)) + case LOAD_VSX: { + char mem[16]; + union vsx_reg buf; + unsigned long msrbit = MSR_VSX; + + /* + * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX + * when the target of the instruction is a vector register. + */ + if (op.reg >= 32 && (op.vsx_flags & VSX_CHECK_VEC)) + msrbit = MSR_VEC; + if (!(regs->msr & msrbit)) + return 0; + if (!address_ok(regs, op.ea, size) || + __copy_from_user(mem, (void __user *)op.ea, size)) return 0; - err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); + + emulate_vsx_load(&op, &buf, mem); + load_vsrn(op.reg, &buf); goto ldst_done; + } #endif case LOAD_MULTI: if (regs->msr & MSR_LE) @@ -2086,6 +2593,12 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: +#ifdef __powerpc64__ + if (size == 16) { + err = emulate_stq(regs, op.ea, op.reg); + goto ldst_done; + } +#endif if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -2110,15 +2623,32 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case STORE_VMX: if (!(regs->msr & MSR_VEC)) return 0; - err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); + err = do_vec_store(op.reg, do_stvx, op.ea, regs); goto ldst_done; #endif #ifdef CONFIG_VSX - case STORE_VSX: - if (!(regs->msr & MSR_VSX)) + case STORE_VSX: { + char mem[16]; + union vsx_reg buf; + unsigned long msrbit = MSR_VSX; + + /* + * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX + * when the target of the instruction is a vector register. + */ + if (op.reg >= 32 && (op.vsx_flags & VSX_CHECK_VEC)) + msrbit = MSR_VEC; + if (!(regs->msr & msrbit)) + return 0; + if (!address_ok(regs, op.ea, size)) + return 0; + + store_vsrn(op.reg, &buf); + emulate_vsx_store(&op, &buf, mem); + if (__copy_to_user((void __user *)op.ea, mem, size)) return 0; - err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; + } #endif case STORE_MULTI: if (regs->msr & MSR_LE) -- cgit v1.2.3-58-ga151 From d120cdbce68c3739f94f733bec376460fb9cbc14 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:28 +1000 Subject: powerpc/64: Fix update forms of loads and stores to write 64-bit EA When a 64-bit processor is executing in 32-bit mode, the update forms of load and store instructions are required by the architecture to write the full 64-bit effective address into the RA register, though only the bottom 32 bits are used to address memory. Currently, the instruction emulation code writes the truncated address to the RA register. This fixes it by keeping the full 64-bit EA in the instruction_op structure, truncating the address in emulate_step() where it is used to address memory, rather than in the address computations in analyse_instr(). Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 4 +- arch/powerpc/lib/sstep.c | 109 ++++++++++++++++++++------------------- 2 files changed, 58 insertions(+), 55 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 980197024c0b..4fcc2c9a6ed5 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -25,7 +25,7 @@ struct pt_regs; enum instruction_type { COMPUTE, /* arith/logical/CR op, etc. */ - LOAD, + LOAD, /* load and store types need to be contiguous */ LOAD_MULTI, LOAD_FP, LOAD_VMX, @@ -52,6 +52,8 @@ enum instruction_type { #define INSTR_TYPE_MASK 0x1f +#define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX) + /* Compute flags, ORed in with type */ #define SETREG 0x20 #define SETCC 0x40 diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index b1ba74117ad5..89e5c26e2860 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -126,7 +126,7 @@ static nokprobe_inline unsigned long dform_ea(unsigned int instr, if (ra) ea += regs->gpr[ra]; - return truncate_if_32bit(regs->msr, ea); + return ea; } #ifdef __powerpc64__ @@ -144,7 +144,7 @@ static nokprobe_inline unsigned long dsform_ea(unsigned int instr, if (ra) ea += regs->gpr[ra]; - return truncate_if_32bit(regs->msr, ea); + return ea; } /* @@ -161,7 +161,7 @@ static nokprobe_inline unsigned long dqform_ea(unsigned int instr, if (ra) ea += regs->gpr[ra]; - return truncate_if_32bit(regs->msr, ea); + return ea; } #endif /* __powerpc64 */ @@ -180,7 +180,7 @@ static nokprobe_inline unsigned long xform_ea(unsigned int instr, if (ra) ea += regs->gpr[ra]; - return truncate_if_32bit(regs->msr, ea); + return ea; } /* @@ -1789,10 +1789,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if (rb == 0) rb = 32; /* # bytes to load */ op->type = MKOP(LOAD_MULTI, 0, rb); - op->ea = 0; - if (ra) - op->ea = truncate_if_32bit(regs->msr, - regs->gpr[ra]); + op->ea = ra ? regs->gpr[ra] : 0; break; #ifdef CONFIG_PPC_FPU @@ -1837,10 +1834,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if (rb == 0) rb = 32; /* # bytes to store */ op->type = MKOP(STORE_MULTI, 0, rb); - op->ea = 0; - if (ra) - op->ea = truncate_if_32bit(regs->msr, - regs->gpr[ra]); + op->ea = ra ? regs->gpr[ra] : 0; break; case 790: /* lhbrx */ @@ -2407,10 +2401,11 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) int emulate_step(struct pt_regs *regs, unsigned int instr) { struct instruction_op op; - int r, err, size; + int r, err, size, type; unsigned long val; unsigned int cr; int i, rd, nb; + unsigned long ea; r = analyse_instr(&op, regs, instr); if (r < 0) @@ -2422,27 +2417,33 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) err = 0; size = GETSIZE(op.type); - switch (op.type & INSTR_TYPE_MASK) { + type = op.type & INSTR_TYPE_MASK; + + ea = op.ea; + if (OP_IS_LOAD_STORE(type) || type == CACHEOP) + ea = truncate_if_32bit(regs->msr, op.ea); + + switch (type) { case CACHEOP: - if (!address_ok(regs, op.ea, 8)) + if (!address_ok(regs, ea, 8)) return 0; switch (op.type & CACHEOP_MASK) { case DCBST: - __cacheop_user_asmx(op.ea, err, "dcbst"); + __cacheop_user_asmx(ea, err, "dcbst"); break; case DCBF: - __cacheop_user_asmx(op.ea, err, "dcbf"); + __cacheop_user_asmx(ea, err, "dcbf"); break; case DCBTST: if (op.reg == 0) - prefetchw((void *) op.ea); + prefetchw((void *) ea); break; case DCBT: if (op.reg == 0) - prefetch((void *) op.ea); + prefetch((void *) ea); break; case ICBI: - __cacheop_user_asmx(op.ea, err, "icbi"); + __cacheop_user_asmx(ea, err, "icbi"); break; } if (err) @@ -2450,29 +2451,29 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (op.ea & (size - 1)) + if (ea & (size - 1)) break; /* can't handle misaligned */ - if (!address_ok(regs, op.ea, size)) + if (!address_ok(regs, ea, size)) return 0; err = 0; switch (size) { #ifdef __powerpc64__ case 1: - __get_user_asmx(val, op.ea, err, "lbarx"); + __get_user_asmx(val, ea, err, "lbarx"); break; case 2: - __get_user_asmx(val, op.ea, err, "lharx"); + __get_user_asmx(val, ea, err, "lharx"); break; #endif case 4: - __get_user_asmx(val, op.ea, err, "lwarx"); + __get_user_asmx(val, ea, err, "lwarx"); break; #ifdef __powerpc64__ case 8: - __get_user_asmx(val, op.ea, err, "ldarx"); + __get_user_asmx(val, ea, err, "ldarx"); break; case 16: - err = do_lqarx(op.ea, ®s->gpr[op.reg]); + err = do_lqarx(ea, ®s->gpr[op.reg]); goto ldst_done; #endif default: @@ -2483,29 +2484,29 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (op.ea & (size - 1)) + if (ea & (size - 1)) break; /* can't handle misaligned */ - if (!address_ok(regs, op.ea, size)) + if (!address_ok(regs, ea, size)) return 0; err = 0; switch (size) { #ifdef __powerpc64__ case 1: - __put_user_asmx(op.val, op.ea, err, "stbcx.", cr); + __put_user_asmx(op.val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op.val, op.ea, err, "stbcx.", cr); + __put_user_asmx(op.val, ea, err, "stbcx.", cr); break; #endif case 4: - __put_user_asmx(op.val, op.ea, err, "stwcx.", cr); + __put_user_asmx(op.val, ea, err, "stwcx.", cr); break; #ifdef __powerpc64__ case 8: - __put_user_asmx(op.val, op.ea, err, "stdcx.", cr); + __put_user_asmx(op.val, ea, err, "stdcx.", cr); break; case 16: - err = do_stqcx(op.ea, regs->gpr[op.reg], + err = do_stqcx(ea, regs->gpr[op.reg], regs->gpr[op.reg + 1], &cr); break; #endif @@ -2521,11 +2522,11 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case LOAD: #ifdef __powerpc64__ if (size == 16) { - err = emulate_lq(regs, op.ea, op.reg); + err = emulate_lq(regs, ea, op.reg); goto ldst_done; } #endif - err = read_mem(®s->gpr[op.reg], op.ea, size, regs); + err = read_mem(®s->gpr[op.reg], ea, size, regs); if (!err) { if (op.type & SIGNEXT) do_signext(®s->gpr[op.reg], size); @@ -2539,16 +2540,16 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) if (!(regs->msr & MSR_FP)) return 0; if (size == 4) - err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); + err = do_fp_load(op.reg, do_lfs, ea, size, regs); else - err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); + err = do_fp_load(op.reg, do_lfd, ea, size, regs); goto ldst_done; #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: if (!(regs->msr & MSR_VEC)) return 0; - err = do_vec_load(op.reg, do_lvx, op.ea, regs); + err = do_vec_load(op.reg, do_lvx, ea, regs); goto ldst_done; #endif #ifdef CONFIG_VSX @@ -2565,8 +2566,8 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) msrbit = MSR_VEC; if (!(regs->msr & msrbit)) return 0; - if (!address_ok(regs, op.ea, size) || - __copy_from_user(mem, (void __user *)op.ea, size)) + if (!address_ok(regs, ea, size) || + __copy_from_user(mem, (void __user *)ea, size)) return 0; emulate_vsx_load(&op, &buf, mem); @@ -2582,12 +2583,12 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) nb = size - i; if (nb > 4) nb = 4; - err = read_mem(®s->gpr[rd], op.ea, nb, regs); + err = read_mem(®s->gpr[rd], ea, nb, regs); if (err) return 0; if (nb < 4) /* left-justify last bytes */ regs->gpr[rd] <<= 32 - 8 * nb; - op.ea += 4; + ea += 4; ++rd; } goto instr_done; @@ -2595,18 +2596,18 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case STORE: #ifdef __powerpc64__ if (size == 16) { - err = emulate_stq(regs, op.ea, op.reg); + err = emulate_stq(regs, ea, op.reg); goto ldst_done; } #endif if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && - op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { - err = handle_stack_update(op.ea, regs); + ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { + err = handle_stack_update(ea, regs); goto ldst_done; } - err = write_mem(op.val, op.ea, size, regs); + err = write_mem(op.val, ea, size, regs); goto ldst_done; #ifdef CONFIG_PPC_FPU @@ -2614,16 +2615,16 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) if (!(regs->msr & MSR_FP)) return 0; if (size == 4) - err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); + err = do_fp_store(op.reg, do_stfs, ea, size, regs); else - err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); + err = do_fp_store(op.reg, do_stfd, ea, size, regs); goto ldst_done; #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: if (!(regs->msr & MSR_VEC)) return 0; - err = do_vec_store(op.reg, do_stvx, op.ea, regs); + err = do_vec_store(op.reg, do_stvx, ea, regs); goto ldst_done; #endif #ifdef CONFIG_VSX @@ -2640,12 +2641,12 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) msrbit = MSR_VEC; if (!(regs->msr & msrbit)) return 0; - if (!address_ok(regs, op.ea, size)) + if (!address_ok(regs, ea, size)) return 0; store_vsrn(op.reg, &buf); emulate_vsx_store(&op, &buf, mem); - if (__copy_to_user((void __user *)op.ea, mem, size)) + if (__copy_to_user((void __user *)ea, mem, size)) return 0; goto ldst_done; } @@ -2661,10 +2662,10 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) nb = 4; else val >>= 32 - 8 * nb; - err = write_mem(val, op.ea, nb, regs); + err = write_mem(val, ea, nb, regs); if (err) return 0; - op.ea += 4; + ea += 4; ++rd; } goto instr_done; -- cgit v1.2.3-58-ga151 From c22435a5f3d8f85ea162ae523a6ba60a58521ba5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:33 +1000 Subject: powerpc: Emulate FP/vector/VSX loads/stores correctly when regs not live At present, the analyse_instr/emulate_step code checks for the relevant MSR_FP/VEC/VSX bit being set when a FP/VMX/VSX load or store is decoded, but doesn't recheck the bit before reading or writing the relevant FP/VMX/VSX register in emulate_step(). Since we don't have preemption disabled, it is possible that we get preempted between checking the MSR bit and doing the register access. If that happened, then the registers would have been saved to the thread_struct for the current process. Accesses to the CPU registers would then potentially read stale values, or write values that would never be seen by the user process. Another way that the registers can become non-live is if a page fault occurs when accessing user memory, and the page fault code calls a copy routine that wants to use the VMX or VSX registers. To fix this, the code for all the FP/VMX/VSX loads gets restructured so that it forms an image in a local variable of the desired register contents, then disables preemption, checks the MSR bit and either sets the CPU register or writes the value to the thread struct. Similarly, the code for stores checks the MSR bit, copies either the CPU register or the thread struct to a local variable, then reenables preemption and then copies the register image to memory. If the instruction being emulated is in the kernel, then we must not use the register values in the thread_struct. In this case, if the relevant MSR enable bit is not set, then emulate_step refuses to emulate the instruction. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 1 + arch/powerpc/lib/ldstfp.S | 241 +++++++-------------------------------- arch/powerpc/lib/sstep.c | 228 +++++++++++++++++++++++++----------- 3 files changed, 203 insertions(+), 267 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 4fcc2c9a6ed5..474a99255689 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -119,6 +119,7 @@ union vsx_reg { unsigned long d[2]; float fp[4]; double dp[2]; + __vector128 v; }; /* diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index 6840911e7a01..7b5cf5e421ab 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -21,27 +21,19 @@ #define STKFRM (PPC_MIN_STKFRM + 16) - .macro inst32 op -reg = 0 - .rept 32 -20: \op reg,0,r4 - b 3f - EX_TABLE(20b,99f) -reg = reg + 1 - .endr - .endm - -/* Get the contents of frN into fr0; N is in r3. */ +/* Get the contents of frN into *p; N is in r3 and p is in r4. */ _GLOBAL(get_fpr) mflr r0 + mfmsr r6 + ori r7, r6, MSR_FP + MTMSRD(r7) + isync rlwinm r3,r3,3,0xf8 bcl 20,31,1f - blr /* fr0 is already in fr0 */ - nop -reg = 1 - .rept 31 - fmr fr0,reg - blr +reg = 0 + .rept 32 + stfd reg, 0(r4) + b 2f reg = reg + 1 .endr 1: mflr r5 @@ -49,18 +41,23 @@ reg = reg + 1 mtctr r5 mtlr r0 bctr +2: MTMSRD(r6) + isync + blr -/* Put the contents of fr0 into frN; N is in r3. */ +/* Put the contents of *p into frN; N is in r3 and p is in r4. */ _GLOBAL(put_fpr) mflr r0 + mfmsr r6 + ori r7, r6, MSR_FP + MTMSRD(r7) + isync rlwinm r3,r3,3,0xf8 bcl 20,31,1f - blr /* fr0 is already in fr0 */ - nop -reg = 1 - .rept 31 - fmr reg,fr0 - blr +reg = 0 + .rept 32 + lfd reg, 0(r4) + b 2f reg = reg + 1 .endr 1: mflr r5 @@ -68,127 +65,24 @@ reg = reg + 1 mtctr r5 mtlr r0 bctr - -/* Load FP reg N from float at *p. N is in r3, p in r4. */ -_GLOBAL(do_lfs) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - ori r7,r6,MSR_FP - cmpwi cr7,r3,0 - MTMSRD(r7) - isync - beq cr7,1f - stfd fr0,STKFRM-16(r1) -1: li r9,-EFAULT -2: lfs fr0,0(r4) - li r9,0 -3: bl put_fpr - beq cr7,4f - lfd fr0,STKFRM-16(r1) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) - isync - mr r3,r9 - addi r1,r1,STKFRM - blr - EX_TABLE(2b,3b) - -/* Load FP reg N from double at *p. N is in r3, p in r4. */ -_GLOBAL(do_lfd) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - ori r7,r6,MSR_FP - cmpwi cr7,r3,0 - MTMSRD(r7) - isync - beq cr7,1f - stfd fr0,STKFRM-16(r1) -1: li r9,-EFAULT -2: lfd fr0,0(r4) - li r9,0 -3: beq cr7,4f - bl put_fpr - lfd fr0,STKFRM-16(r1) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) - isync - mr r3,r9 - addi r1,r1,STKFRM - blr - EX_TABLE(2b,3b) - -/* Store FP reg N to float at *p. N is in r3, p in r4. */ -_GLOBAL(do_stfs) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - ori r7,r6,MSR_FP - cmpwi cr7,r3,0 - MTMSRD(r7) - isync - beq cr7,1f - stfd fr0,STKFRM-16(r1) - bl get_fpr -1: li r9,-EFAULT -2: stfs fr0,0(r4) - li r9,0 -3: beq cr7,4f - lfd fr0,STKFRM-16(r1) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) +2: MTMSRD(r6) isync - mr r3,r9 - addi r1,r1,STKFRM blr - EX_TABLE(2b,3b) -/* Store FP reg N to double at *p. N is in r3, p in r4. */ -_GLOBAL(do_stfd) - PPC_STLU r1,-STKFRM(r1) +#ifdef CONFIG_ALTIVEC +/* Get the contents of vrN into *p; N is in r3 and p is in r4. */ +_GLOBAL(get_vr) mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) mfmsr r6 - ori r7,r6,MSR_FP - cmpwi cr7,r3,0 + oris r7, r6, MSR_VEC@h MTMSRD(r7) isync - beq cr7,1f - stfd fr0,STKFRM-16(r1) - bl get_fpr -1: li r9,-EFAULT -2: stfd fr0,0(r4) - li r9,0 -3: beq cr7,4f - lfd fr0,STKFRM-16(r1) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) - isync - mr r3,r9 - addi r1,r1,STKFRM - blr - EX_TABLE(2b,3b) - -#ifdef CONFIG_ALTIVEC -/* Get the contents of vrN into v0; N is in r3. Doesn't touch r3 or r4. */ -_GLOBAL(get_vr) - mflr r0 rlwinm r6,r3,3,0xf8 bcl 20,31,1f - blr /* v0 is already in v0 */ - nop -reg = 1 - .rept 31 - vor v0,reg,reg /* assembler doesn't know vmr? */ - blr +reg = 0 + .rept 32 + stvx reg, 0, r4 + b 2f reg = reg + 1 .endr 1: mflr r5 @@ -196,18 +90,23 @@ reg = reg + 1 mtctr r5 mtlr r0 bctr +2: MTMSRD(r6) + isync + blr -/* Put the contents of v0 into vrN; N is in r3. Doesn't touch r3 or r4. */ +/* Put the contents of *p into vrN; N is in r3 and p is in r4. */ _GLOBAL(put_vr) mflr r0 + mfmsr r6 + oris r7, r6, MSR_VEC@h + MTMSRD(r7) + isync rlwinm r6,r3,3,0xf8 bcl 20,31,1f - blr /* v0 is already in v0 */ - nop -reg = 1 - .rept 31 - vor reg,v0,v0 - blr +reg = 0 + .rept 32 + lvx reg, 0, r4 + b 2f reg = reg + 1 .endr 1: mflr r5 @@ -215,62 +114,9 @@ reg = reg + 1 mtctr r5 mtlr r0 bctr - -/* Load vector reg N from *p. N is in r3, p in r4. */ -_GLOBAL(do_lvx) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - oris r7,r6,MSR_VEC@h - cmpwi cr7,r3,0 - li r8,STKFRM-16 - MTMSRD(r7) - isync - beq cr7,1f - stvx v0,r1,r8 -1: li r9,-EFAULT -2: lvx v0,0,r4 - li r9,0 -3: beq cr7,4f - bl put_vr - lvx v0,r1,r8 -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) - isync - mr r3,r9 - addi r1,r1,STKFRM - blr - EX_TABLE(2b,3b) - -/* Store vector reg N to *p. N is in r3, p in r4. */ -_GLOBAL(do_stvx) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - oris r7,r6,MSR_VEC@h - cmpwi cr7,r3,0 - li r8,STKFRM-16 - MTMSRD(r7) - isync - beq cr7,1f - stvx v0,r1,r8 - bl get_vr -1: li r9,-EFAULT -2: stvx v0,0,r4 - li r9,0 -3: beq cr7,4f - lvx v0,r1,r8 -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) +2: MTMSRD(r6) isync - mr r3,r9 - addi r1,r1,STKFRM blr - EX_TABLE(2b,3b) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -363,7 +209,6 @@ _GLOBAL(store_vsrn) mr r3,r9 addi r1,r1,STKFRM blr - EX_TABLE(2b,3b) #endif /* CONFIG_VSX */ /* Convert single-precision to double, without disturbing FPRs. */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 25ee04321870..c4d6a02a9d1a 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -36,12 +36,10 @@ extern char system_call_common[]; /* * Functions in ldstfp.S */ -extern int do_lfs(int rn, unsigned long ea); -extern int do_lfd(int rn, unsigned long ea); -extern int do_stfs(int rn, unsigned long ea); -extern int do_stfd(int rn, unsigned long ea); -extern int do_lvx(int rn, unsigned long ea); -extern int do_stvx(int rn, unsigned long ea); +extern void get_fpr(int rn, double *p); +extern void put_fpr(int rn, const double *p); +extern void get_vr(int rn, __vector128 *p); +extern void put_vr(int rn, __vector128 *p); extern void load_vsrn(int vsr, const void *p); extern void store_vsrn(int vsr, void *p); extern void conv_sp_to_dp(const float *sp, double *dp); @@ -409,63 +407,108 @@ NOKPROBE_SYMBOL(write_mem); #ifdef CONFIG_PPC_FPU /* - * Check the address and alignment, and call func to do the actual - * load or store. + * These access either the real FP register or the image in the + * thread_struct, depending on regs->msr & MSR_FP. */ -static int do_fp_load(int rn, int (*func)(int, unsigned long), - unsigned long ea, int nb, - struct pt_regs *regs) +static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs) { int err; - u8 buf[sizeof(double)] __attribute__((aligned(sizeof(double)))); + union { + float f; + double d; + unsigned long l; + u8 b[sizeof(double)]; + } u; if (!address_ok(regs, ea, nb)) return -EFAULT; - if (ea & 3) { - err = copy_mem_in(buf, ea, nb); - if (err) - return err; - ea = (unsigned long) buf; - } - return (*func)(rn, ea); + err = copy_mem_in(u.b, ea, nb); + if (err) + return err; + preempt_disable(); + if (nb == 4) + conv_sp_to_dp(&u.f, &u.d); + if (regs->msr & MSR_FP) + put_fpr(rn, &u.d); + else + current->thread.TS_FPR(rn) = u.l; + preempt_enable(); + return 0; } NOKPROBE_SYMBOL(do_fp_load); -static int do_fp_store(int rn, int (*func)(int, unsigned long), - unsigned long ea, int nb, - struct pt_regs *regs) +static int do_fp_store(int rn, unsigned long ea, int nb, struct pt_regs *regs) { - int err; - u8 buf[sizeof(double)] __attribute__((aligned(sizeof(double)))); + union { + float f; + double d; + unsigned long l; + u8 b[sizeof(double)]; + } u; if (!address_ok(regs, ea, nb)) return -EFAULT; - if ((ea & 3) == 0) - return (*func)(rn, ea); - err = (*func)(rn, (unsigned long) buf); - if (!err) - err = copy_mem_out(buf, ea, nb); - return err; + preempt_disable(); + if (regs->msr & MSR_FP) + get_fpr(rn, &u.d); + else + u.l = current->thread.TS_FPR(rn); + if (nb == 4) + conv_dp_to_sp(&u.d, &u.f); + preempt_enable(); + return copy_mem_out(u.b, ea, nb); } NOKPROBE_SYMBOL(do_fp_store); #endif #ifdef CONFIG_ALTIVEC /* For Altivec/VMX, no need to worry about alignment */ -static nokprobe_inline int do_vec_load(int rn, int (*func)(int, unsigned long), - unsigned long ea, struct pt_regs *regs) +static nokprobe_inline int do_vec_load(int rn, unsigned long ea, + int size, struct pt_regs *regs) { + int err; + union { + __vector128 v; + u8 b[sizeof(__vector128)]; + } u = {}; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; - return (*func)(rn, ea); + /* align to multiple of size */ + ea &= ~(size - 1); + err = copy_mem_in(u.b, ea, size); + if (err) + return err; + + preempt_disable(); + if (regs->msr & MSR_VEC) + put_vr(rn, &u.v); + else + current->thread.vr_state.vr[rn] = u.v; + preempt_enable(); + return 0; } -static nokprobe_inline int do_vec_store(int rn, int (*func)(int, unsigned long), - unsigned long ea, struct pt_regs *regs) +static nokprobe_inline int do_vec_store(int rn, unsigned long ea, + int size, struct pt_regs *regs) { + union { + __vector128 v; + u8 b[sizeof(__vector128)]; + } u; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; - return (*func)(rn, ea); + /* align to multiple of size */ + ea &= ~(size - 1); + + preempt_disable(); + if (regs->msr & MSR_VEC) + get_vr(rn, &u.v); + else + u.v = current->thread.vr_state.vr[rn]; + preempt_enable(); + return copy_mem_out(u.b, ea, size); } #endif /* CONFIG_ALTIVEC */ @@ -658,6 +701,68 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, } EXPORT_SYMBOL_GPL(emulate_vsx_store); NOKPROBE_SYMBOL(emulate_vsx_store); + +static nokprobe_inline int do_vsx_load(struct instruction_op *op, + unsigned long ea, struct pt_regs *regs) +{ + int reg = op->reg; + u8 mem[16]; + union vsx_reg buf; + int size = GETSIZE(op->type); + + if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size)) + return -EFAULT; + + emulate_vsx_load(op, &buf, mem); + preempt_disable(); + if (reg < 32) { + /* FP regs + extensions */ + if (regs->msr & MSR_FP) { + load_vsrn(reg, &buf); + } else { + current->thread.fp_state.fpr[reg][0] = buf.d[0]; + current->thread.fp_state.fpr[reg][1] = buf.d[1]; + } + } else { + if (regs->msr & MSR_VEC) + load_vsrn(reg, &buf); + else + current->thread.vr_state.vr[reg - 32] = buf.v; + } + preempt_enable(); + return 0; +} + +static nokprobe_inline int do_vsx_store(struct instruction_op *op, + unsigned long ea, struct pt_regs *regs) +{ + int reg = op->reg; + u8 mem[16]; + union vsx_reg buf; + int size = GETSIZE(op->type); + + if (!address_ok(regs, ea, size)) + return -EFAULT; + + preempt_disable(); + if (reg < 32) { + /* FP regs + extensions */ + if (regs->msr & MSR_FP) { + store_vsrn(reg, &buf); + } else { + buf.d[0] = current->thread.fp_state.fpr[reg][0]; + buf.d[1] = current->thread.fp_state.fpr[reg][1]; + } + } else { + if (regs->msr & MSR_VEC) + store_vsrn(reg, &buf); + else + buf.v = current->thread.vr_state.vr[reg - 32]; + } + preempt_enable(); + emulate_vsx_store(op, &buf, mem); + return copy_mem_out(mem, ea, size); +} #endif /* CONFIG_VSX */ #define __put_user_asmx(x, addr, err, op, cr) \ @@ -2524,25 +2629,26 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (!(regs->msr & MSR_FP)) + /* + * If the instruction is in userspace, we can emulate it even + * if the VMX state is not live, because we have the state + * stored in the thread_struct. If the instruction is in + * the kernel, we must not touch the state in the thread_struct. + */ + if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - if (size == 4) - err = do_fp_load(op.reg, do_lfs, ea, size, regs); - else - err = do_fp_load(op.reg, do_lfd, ea, size, regs); + err = do_fp_load(op.reg, ea, size, regs); goto ldst_done; #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (!(regs->msr & MSR_VEC)) + if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) return 0; - err = do_vec_load(op.reg, do_lvx, ea, regs); + err = do_vec_load(op.reg, ea, size, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: { - u8 mem[16]; - union vsx_reg buf; unsigned long msrbit = MSR_VSX; /* @@ -2551,14 +2657,9 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) */ if (op.reg >= 32 && (op.vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; - if (!(regs->msr & msrbit)) + if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) return 0; - if (!address_ok(regs, ea, size) || - copy_mem_in(mem, ea, size)) - return 0; - - emulate_vsx_load(&op, &buf, mem); - load_vsrn(op.reg, &buf); + err = do_vsx_load(&op, ea, regs); goto ldst_done; } #endif @@ -2599,25 +2700,20 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (!(regs->msr & MSR_FP)) + if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - if (size == 4) - err = do_fp_store(op.reg, do_stfs, ea, size, regs); - else - err = do_fp_store(op.reg, do_stfd, ea, size, regs); + err = do_fp_store(op.reg, ea, size, regs); goto ldst_done; #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (!(regs->msr & MSR_VEC)) + if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) return 0; - err = do_vec_store(op.reg, do_stvx, ea, regs); + err = do_vec_store(op.reg, ea, size, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: { - u8 mem[16]; - union vsx_reg buf; unsigned long msrbit = MSR_VSX; /* @@ -2626,15 +2722,9 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) */ if (op.reg >= 32 && (op.vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; - if (!(regs->msr & msrbit)) - return 0; - if (!address_ok(regs, ea, size)) - return 0; - - store_vsrn(op.reg, &buf); - emulate_vsx_store(&op, &buf, mem); - if (copy_mem_out(mem, ea, size)) + if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) return 0; + err = do_vsx_store(&op, ea, regs); goto ldst_done; } #endif -- cgit v1.2.3-58-ga151 From b2543f7b20bb2a551ed340447d7303f0ce4644f1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:36 +1000 Subject: powerpc: Emulate the dcbz instruction This adds code to analyse_instr() and emulate_step() to understand the dcbz (data cache block zero) instruction. The emulate_dcbz() function is made public so it can be used by the alignment handler in future. (The apparently unnecessary cropping of the address to 32 bits is there because it will be needed in that situation.) Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 2 ++ arch/powerpc/lib/sstep.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 474a99255689..793639a5aa5e 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -84,6 +84,7 @@ enum instruction_type { #define DCBTST 0x200 #define DCBT 0x300 #define ICBI 0x400 +#define DCBZ 0x500 /* VSX flags values */ #define VSX_FPCONV 1 /* do floating point SP/DP conversion */ @@ -155,3 +156,4 @@ extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, const void *mem); extern void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, void *mem); +extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 44a8ce062747..2d7e498b622b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -780,6 +780,30 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, } #endif /* CONFIG_VSX */ +int emulate_dcbz(unsigned long ea, struct pt_regs *regs) +{ + int err; + unsigned long i, size; + +#ifdef __powerpc64__ + size = ppc64_caches.l1d.block_size; + if (!(regs->msr & MSR_64BIT)) + ea &= 0xffffffffUL; +#else + size = L1_CACHE_BYTES; +#endif + ea &= ~(size - 1); + if (!address_ok(regs, ea, size)) + return -EFAULT; + for (i = 0; i < size; i += sizeof(long)) { + err = __put_user(0, (unsigned long __user *) (ea + i)); + if (err) + return err; + } + return 0; +} +NOKPROBE_SYMBOL(emulate_dcbz); + #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ "1: " op " %2,0,%3\n" \ @@ -1748,6 +1772,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(CACHEOP, ICBI, 0); op->ea = xform_ea(instr, regs); return 0; + + case 1014: /* dcbz */ + op->type = MKOP(CACHEOP, DCBZ, 0); + op->ea = xform_ea(instr, regs); + return 0; } break; } @@ -2607,6 +2636,9 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case ICBI: __cacheop_user_asmx(ea, err, "icbi"); break; + case DCBZ: + err = emulate_dcbz(ea, regs); + break; } if (err) return 0; -- cgit v1.2.3-58-ga151 From d955189ae42796621fb439e5e778ccaeebc2a1e7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:38 +1000 Subject: powerpc: Handle opposite-endian processes in emulation code This adds code to the load and store emulation code to byte-swap the data appropriately when the process being emulated is set to the opposite endianness to that of the kernel. This also enables the emulation for the multiple-register loads and stores (lmw, stmw, lswi, stswi, lswx, stswx) to work for little-endian. In little-endian mode, the partial word at the end of a transfer for lsw*/stsw* (when the byte count is not a multiple of 4) is loaded/stored at the least-significant end of the register. Additionally, this fixes a bug in the previous code in that it could call read_mem/write_mem with a byte count that was not 1, 2, 4 or 8. Note that this only works correctly on processors with "true" little-endian mode, such as IBM POWER processors from POWER6 on, not the so-called "PowerPC" little-endian mode that uses address swizzling as implemented on the old 32-bit 603, 604, 740/750, 74xx CPUs. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 7 +- arch/powerpc/lib/sstep.c | 184 +++++++++++++++++++++++++++------------ 2 files changed, 131 insertions(+), 60 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 793639a5aa5e..958c2c55bcfe 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -153,7 +153,8 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); extern int emulate_step(struct pt_regs *regs, unsigned int instr); extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem); -extern void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem); + const void *mem, bool cross_endian); +extern void emulate_vsx_store(struct instruction_op *op, + const union vsx_reg *reg, void *mem, + bool cross_endian); extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 04ffab970aec..5d8284938898 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -217,6 +217,33 @@ static nokprobe_inline unsigned long byterev_8(unsigned long x) } #endif +static nokprobe_inline void do_byte_reverse(void *ptr, int nb) +{ + switch (nb) { + case 2: + *(u16 *)ptr = byterev_2(*(u16 *)ptr); + break; + case 4: + *(u32 *)ptr = byterev_4(*(u32 *)ptr); + break; +#ifdef __powerpc64__ + case 8: + *(unsigned long *)ptr = byterev_8(*(unsigned long *)ptr); + break; + case 16: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[1]); + up[1] = tmp; + break; + } +#endif + default: + WARN_ON_ONCE(1); + } +} + static nokprobe_inline int read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) @@ -430,7 +457,8 @@ NOKPROBE_SYMBOL(write_mem); * These access either the real FP register or the image in the * thread_struct, depending on regs->msr & MSR_FP. */ -static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs) +static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs, + bool cross_endian) { int err; union { @@ -445,6 +473,11 @@ static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs) err = copy_mem_in(u.b, ea, nb, regs); if (err) return err; + if (unlikely(cross_endian)) { + do_byte_reverse(u.b, min(nb, 8)); + if (nb == 16) + do_byte_reverse(&u.b[8], 8); + } preempt_disable(); if (nb == 4) conv_sp_to_dp(&u.f, &u.d[0]); @@ -465,7 +498,8 @@ static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_fp_load); -static int do_fp_store(int rn, unsigned long ea, int nb, struct pt_regs *regs) +static int do_fp_store(int rn, unsigned long ea, int nb, struct pt_regs *regs, + bool cross_endian) { union { float f; @@ -491,6 +525,11 @@ static int do_fp_store(int rn, unsigned long ea, int nb, struct pt_regs *regs) u.l[1] = current->thread.TS_FPR(rn); } preempt_enable(); + if (unlikely(cross_endian)) { + do_byte_reverse(u.b, min(nb, 8)); + if (nb == 16) + do_byte_reverse(&u.b[8], 8); + } return copy_mem_out(u.b, ea, nb, regs); } NOKPROBE_SYMBOL(do_fp_store); @@ -499,7 +538,8 @@ NOKPROBE_SYMBOL(do_fp_store); #ifdef CONFIG_ALTIVEC /* For Altivec/VMX, no need to worry about alignment */ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, - int size, struct pt_regs *regs) + int size, struct pt_regs *regs, + bool cross_endian) { int err; union { @@ -514,7 +554,8 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, err = copy_mem_in(&u.b[ea & 0xf], ea, size, regs); if (err) return err; - + if (unlikely(cross_endian)) + do_byte_reverse(&u.b[ea & 0xf], size); preempt_disable(); if (regs->msr & MSR_VEC) put_vr(rn, &u.v); @@ -525,7 +566,8 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, } static nokprobe_inline int do_vec_store(int rn, unsigned long ea, - int size, struct pt_regs *regs) + int size, struct pt_regs *regs, + bool cross_endian) { union { __vector128 v; @@ -543,49 +585,60 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, else u.v = current->thread.vr_state.vr[rn]; preempt_enable(); + if (unlikely(cross_endian)) + do_byte_reverse(&u.b[ea & 0xf], size); return copy_mem_out(&u.b[ea & 0xf], ea, size, regs); } #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea, - int reg) + int reg, bool cross_endian) { int err; if (!address_ok(regs, ea, 16)) return -EFAULT; /* if aligned, should be atomic */ - if ((ea & 0xf) == 0) - return do_lq(ea, ®s->gpr[reg]); - - err = read_mem(®s->gpr[reg + IS_LE], ea, 8, regs); - if (!err) - err = read_mem(®s->gpr[reg + IS_BE], ea + 8, 8, regs); + if ((ea & 0xf) == 0) { + err = do_lq(ea, ®s->gpr[reg]); + } else { + err = read_mem(®s->gpr[reg + IS_LE], ea, 8, regs); + if (!err) + err = read_mem(®s->gpr[reg + IS_BE], ea + 8, 8, regs); + } + if (!err && unlikely(cross_endian)) + do_byte_reverse(®s->gpr[reg], 16); return err; } static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea, - int reg) + int reg, bool cross_endian) { int err; + unsigned long vals[2]; if (!address_ok(regs, ea, 16)) return -EFAULT; + vals[0] = regs->gpr[reg]; + vals[1] = regs->gpr[reg + 1]; + if (unlikely(cross_endian)) + do_byte_reverse(vals, 16); + /* if aligned, should be atomic */ if ((ea & 0xf) == 0) - return do_stq(ea, regs->gpr[reg], regs->gpr[reg + 1]); + return do_stq(ea, vals[0], vals[1]); - err = write_mem(regs->gpr[reg + IS_LE], ea, 8, regs); + err = write_mem(vals[IS_LE], ea, 8, regs); if (!err) - err = write_mem(regs->gpr[reg + IS_BE], ea + 8, 8, regs); + err = write_mem(vals[IS_BE], ea + 8, 8, regs); return err; } #endif /* __powerpc64 */ #ifdef CONFIG_VSX void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem) + const void *mem, bool rev) { int size, read_size; int i, j; @@ -602,19 +655,18 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (size == 0) break; memcpy(reg, mem, size); - if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) { - /* reverse 16 bytes */ - unsigned long tmp; - tmp = byterev_8(reg->d[0]); - reg->d[0] = byterev_8(reg->d[1]); - reg->d[1] = tmp; - } + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) + rev = !rev; + if (rev) + do_byte_reverse(reg, 16); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ read_size = (size >= 8) ? 8 : size; i = IS_LE ? 8 : 8 - read_size; memcpy(®->b[i], mem, read_size); + if (rev) + do_byte_reverse(®->b[i], 8); if (size < 8) { if (op->type & SIGNEXT) { /* size == 4 is the only case here */ @@ -626,9 +678,10 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, preempt_enable(); } } else { - if (size == 16) - reg->d[IS_BE] = *(unsigned long *)(mem + 8); - else if (op->vsx_flags & VSX_SPLAT) + if (size == 16) { + unsigned long v = *(unsigned long *)(mem + 8); + reg->d[IS_BE] = !rev ? v : byterev_8(v); + } else if (op->vsx_flags & VSX_SPLAT) reg->d[IS_BE] = reg->d[IS_LE]; } break; @@ -637,7 +690,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, wp = mem; for (j = 0; j < size / 4; ++j) { i = IS_LE ? 3 - j : j; - reg->w[i] = *wp++; + reg->w[i] = !rev ? *wp++ : byterev_4(*wp++); } if (op->vsx_flags & VSX_SPLAT) { u32 val = reg->w[IS_LE ? 3 : 0]; @@ -652,7 +705,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, hp = mem; for (j = 0; j < size / 2; ++j) { i = IS_LE ? 7 - j : j; - reg->h[i] = *hp++; + reg->h[i] = !rev ? *hp++ : byterev_2(*hp++); } break; case 1: @@ -669,7 +722,7 @@ EXPORT_SYMBOL_GPL(emulate_vsx_load); NOKPROBE_SYMBOL(emulate_vsx_load); void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem) + void *mem, bool rev) { int size, write_size; int i, j; @@ -685,7 +738,9 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, /* stxv, stxvx, stxvl, stxvll */ if (size == 0) break; - if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) { + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) + rev = !rev; + if (rev) { /* reverse 16 bytes */ buf.d[0] = byterev_8(reg->d[1]); buf.d[1] = byterev_8(reg->d[0]); @@ -707,13 +762,18 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, memcpy(mem, ®->b[i], write_size); if (size == 16) memcpy(mem + 8, ®->d[IS_BE], 8); + if (unlikely(rev)) { + do_byte_reverse(mem, write_size); + if (size == 16) + do_byte_reverse(mem + 8, 8); + } break; case 4: /* stxvw4x */ wp = mem; for (j = 0; j < size / 4; ++j) { i = IS_LE ? 3 - j : j; - *wp++ = reg->w[i]; + *wp++ = !rev ? reg->w[i] : byterev_4(reg->w[i]); } break; case 2: @@ -721,7 +781,7 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, hp = mem; for (j = 0; j < size / 2; ++j) { i = IS_LE ? 7 - j : j; - *hp++ = reg->h[i]; + *hp++ = !rev ? reg->h[i] : byterev_2(reg->h[i]); } break; case 1: @@ -738,7 +798,8 @@ EXPORT_SYMBOL_GPL(emulate_vsx_store); NOKPROBE_SYMBOL(emulate_vsx_store); static nokprobe_inline int do_vsx_load(struct instruction_op *op, - unsigned long ea, struct pt_regs *regs) + unsigned long ea, struct pt_regs *regs, + bool cross_endian) { int reg = op->reg; u8 mem[16]; @@ -748,7 +809,7 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - emulate_vsx_load(op, &buf, mem); + emulate_vsx_load(op, &buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ @@ -769,7 +830,8 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, } static nokprobe_inline int do_vsx_store(struct instruction_op *op, - unsigned long ea, struct pt_regs *regs) + unsigned long ea, struct pt_regs *regs, + bool cross_endian) { int reg = op->reg; u8 mem[16]; @@ -795,7 +857,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, buf.v = current->thread.vr_state.vr[reg - 32]; } preempt_enable(); - emulate_vsx_store(op, &buf, mem); + emulate_vsx_store(op, &buf, mem, cross_endian); return copy_mem_out(mem, ea, size, regs); } #endif /* CONFIG_VSX */ @@ -2619,6 +2681,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) unsigned int cr; int i, rd, nb; unsigned long ea; + bool cross_endian; r = analyse_instr(&op, regs, instr); if (r < 0) @@ -2631,6 +2694,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) err = 0; size = GETSIZE(op.type); type = op.type & INSTR_TYPE_MASK; + cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE); ea = op.ea; if (OP_IS_LOAD_STORE(type) || type == CACHEOP) @@ -2746,7 +2810,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case LOAD: #ifdef __powerpc64__ if (size == 16) { - err = emulate_lq(regs, ea, op.reg); + err = emulate_lq(regs, ea, op.reg, cross_endian); goto ldst_done; } #endif @@ -2754,7 +2818,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) if (!err) { if (op.type & SIGNEXT) do_signext(®s->gpr[op.reg], size); - if (op.type & BYTEREV) + if ((op.type & BYTEREV) == (cross_endian ? 0 : BYTEREV)) do_byterev(®s->gpr[op.reg], size); } goto ldst_done; @@ -2769,14 +2833,14 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) */ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - err = do_fp_load(op.reg, ea, size, regs); + err = do_fp_load(op.reg, ea, size, regs, cross_endian); goto ldst_done; #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) return 0; - err = do_vec_load(op.reg, ea, size, regs); + err = do_vec_load(op.reg, ea, size, regs, cross_endian); goto ldst_done; #endif #ifdef CONFIG_VSX @@ -2791,23 +2855,26 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) msrbit = MSR_VEC; if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) return 0; - err = do_vsx_load(&op, ea, regs); + err = do_vsx_load(&op, ea, regs, cross_endian); goto ldst_done; } #endif case LOAD_MULTI: - if (regs->msr & MSR_LE) - return 0; + if (!address_ok(regs, ea, size)) + return -EFAULT; rd = op.reg; for (i = 0; i < size; i += 4) { + unsigned int v32 = 0; + nb = size - i; if (nb > 4) nb = 4; - err = read_mem(®s->gpr[rd], ea, nb, regs); + err = copy_mem_in((u8 *) &v32, ea, nb, regs); if (err) return 0; - if (nb < 4) /* left-justify last bytes */ - regs->gpr[rd] <<= 32 - 8 * nb; + if (unlikely(cross_endian)) + v32 = byterev_4(v32); + regs->gpr[rd] = v32; ea += 4; ++rd; } @@ -2816,7 +2883,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case STORE: #ifdef __powerpc64__ if (size == 16) { - err = emulate_stq(regs, ea, op.reg); + err = emulate_stq(regs, ea, op.reg, cross_endian); goto ldst_done; } #endif @@ -2827,6 +2894,8 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) err = handle_stack_update(ea, regs); goto ldst_done; } + if (unlikely(cross_endian)) + do_byterev(&op.val, size); err = write_mem(op.val, ea, size, regs); goto ldst_done; @@ -2834,14 +2903,14 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case STORE_FP: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - err = do_fp_store(op.reg, ea, size, regs); + err = do_fp_store(op.reg, ea, size, regs, cross_endian); goto ldst_done; #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) return 0; - err = do_vec_store(op.reg, ea, size, regs); + err = do_vec_store(op.reg, ea, size, regs, cross_endian); goto ldst_done; #endif #ifdef CONFIG_VSX @@ -2856,22 +2925,23 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) msrbit = MSR_VEC; if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) return 0; - err = do_vsx_store(&op, ea, regs); + err = do_vsx_store(&op, ea, regs, cross_endian); goto ldst_done; } #endif case STORE_MULTI: - if (regs->msr & MSR_LE) - return 0; + if (!address_ok(regs, ea, size)) + return -EFAULT; rd = op.reg; for (i = 0; i < size; i += 4) { - val = regs->gpr[rd]; + unsigned int v32 = regs->gpr[rd]; + nb = size - i; if (nb > 4) nb = 4; - else - val >>= 32 - 8 * nb; - err = write_mem(val, ea, nb, regs); + if (unlikely(cross_endian)) + v32 = byterev_4(v32); + err = copy_mem_out((u8 *) &v32, ea, nb, regs); if (err) return 0; ea += 4; -- cgit v1.2.3-58-ga151 From a53d5182e24c22986ad0e99e52f8fe343ee7d7ac Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:39 +1000 Subject: powerpc: Separate out load/store emulation into its own function This moves the parts of emulate_step() that deal with emulating load and store instructions into a new function called emulate_loadstore(). This is to make it possible to reuse this code in the alignment handler. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 9 ++ arch/powerpc/lib/sstep.c | 258 ++++++++++++++++++++++----------------- 2 files changed, 154 insertions(+), 113 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 958c2c55bcfe..309d1c5de143 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -152,6 +152,15 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); */ extern int emulate_step(struct pt_regs *regs, unsigned int instr); +/* + * Emulate a load or store instruction by reading/writing the + * memory of the current process. FP/VMX/VSX registers are assumed + * to hold live values if the appropriate enable bit in regs->msr is + * set; otherwise this will use the saved values in the thread struct + * for user-mode accesses. + */ +extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op); + extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, const void *mem, bool cross_endian); extern void emulate_vsx_store(struct instruction_op *op, diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5d8284938898..423815599063 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -2667,76 +2667,35 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) } /* - * Emulate instructions that cause a transfer of control, - * loads and stores, and a few other instructions. - * Returns 1 if the step was emulated, 0 if not, - * or -1 if the instruction is one that should not be stepped, - * such as an rfid, or a mtmsrd that would clear MSR_RI. + * Emulate a previously-analysed load or store instruction. + * Return values are: + * 0 = instruction emulated successfully + * -EFAULT = address out of range or access faulted (regs->dar + * contains the faulting address) + * -EACCES = misaligned access, instruction requires alignment + * -EINVAL = unknown operation in *op */ -int emulate_step(struct pt_regs *regs, unsigned int instr) +int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) { - struct instruction_op op; - int r, err, size, type; - unsigned long val; - unsigned int cr; + int err, size, type; int i, rd, nb; + unsigned int cr; + unsigned long val; unsigned long ea; bool cross_endian; - r = analyse_instr(&op, regs, instr); - if (r < 0) - return r; - if (r > 0) { - emulate_update_regs(regs, &op); - return 1; - } - err = 0; - size = GETSIZE(op.type); - type = op.type & INSTR_TYPE_MASK; + size = GETSIZE(op->type); + type = op->type & INSTR_TYPE_MASK; cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE); - - ea = op.ea; - if (OP_IS_LOAD_STORE(type) || type == CACHEOP) - ea = truncate_if_32bit(regs->msr, op.ea); + ea = truncate_if_32bit(regs->msr, op->ea); switch (type) { - case CACHEOP: - if (!address_ok(regs, ea, 8)) - return 0; - switch (op.type & CACHEOP_MASK) { - case DCBST: - __cacheop_user_asmx(ea, err, "dcbst"); - break; - case DCBF: - __cacheop_user_asmx(ea, err, "dcbf"); - break; - case DCBTST: - if (op.reg == 0) - prefetchw((void *) ea); - break; - case DCBT: - if (op.reg == 0) - prefetch((void *) ea); - break; - case ICBI: - __cacheop_user_asmx(ea, err, "icbi"); - break; - case DCBZ: - err = emulate_dcbz(ea, regs); - break; - } - if (err) { - regs->dar = ea; - return 0; - } - goto instr_done; - case LARX: if (ea & (size - 1)) - break; /* can't handle misaligned */ + return -EACCES; /* can't handle misaligned */ if (!address_ok(regs, ea, size)) - return 0; + return -EFAULT; err = 0; switch (size) { #ifdef __powerpc64__ @@ -2755,49 +2714,49 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) __get_user_asmx(val, ea, err, "ldarx"); break; case 16: - err = do_lqarx(ea, ®s->gpr[op.reg]); + err = do_lqarx(ea, ®s->gpr[op->reg]); break; #endif default: - return 0; + return -EINVAL; } if (err) { regs->dar = ea; - return 0; + break; } if (size < 16) - regs->gpr[op.reg] = val; - goto ldst_done; + regs->gpr[op->reg] = val; + break; case STCX: if (ea & (size - 1)) - break; /* can't handle misaligned */ + return -EACCES; /* can't handle misaligned */ if (!address_ok(regs, ea, size)) - return 0; + return -EFAULT; err = 0; switch (size) { #ifdef __powerpc64__ case 1: - __put_user_asmx(op.val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op.val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; #endif case 4: - __put_user_asmx(op.val, ea, err, "stwcx.", cr); + __put_user_asmx(op->val, ea, err, "stwcx.", cr); break; #ifdef __powerpc64__ case 8: - __put_user_asmx(op.val, ea, err, "stdcx.", cr); + __put_user_asmx(op->val, ea, err, "stdcx.", cr); break; case 16: - err = do_stqcx(ea, regs->gpr[op.reg], - regs->gpr[op.reg + 1], &cr); + err = do_stqcx(ea, regs->gpr[op->reg], + regs->gpr[op->reg + 1], &cr); break; #endif default: - return 0; + return -EINVAL; } if (!err) regs->ccr = (regs->ccr & 0x0fffffff) | @@ -2805,23 +2764,23 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) ((regs->xer >> 3) & 0x10000000); else regs->dar = ea; - goto ldst_done; + break; case LOAD: #ifdef __powerpc64__ if (size == 16) { - err = emulate_lq(regs, ea, op.reg, cross_endian); - goto ldst_done; + err = emulate_lq(regs, ea, op->reg, cross_endian); + break; } #endif - err = read_mem(®s->gpr[op.reg], ea, size, regs); + err = read_mem(®s->gpr[op->reg], ea, size, regs); if (!err) { - if (op.type & SIGNEXT) - do_signext(®s->gpr[op.reg], size); - if ((op.type & BYTEREV) == (cross_endian ? 0 : BYTEREV)) - do_byterev(®s->gpr[op.reg], size); + if (op->type & SIGNEXT) + do_signext(®s->gpr[op->reg], size); + if ((op->type & BYTEREV) == (cross_endian ? 0 : BYTEREV)) + do_byterev(®s->gpr[op->reg], size); } - goto ldst_done; + break; #ifdef CONFIG_PPC_FPU case LOAD_FP: @@ -2833,15 +2792,15 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) */ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - err = do_fp_load(op.reg, ea, size, regs, cross_endian); - goto ldst_done; + err = do_fp_load(op->reg, ea, size, regs, cross_endian); + break; #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) return 0; - err = do_vec_load(op.reg, ea, size, regs, cross_endian); - goto ldst_done; + err = do_vec_load(op->reg, ea, size, regs, cross_endian); + break; #endif #ifdef CONFIG_VSX case LOAD_VSX: { @@ -2851,18 +2810,18 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX * when the target of the instruction is a vector register. */ - if (op.reg >= 32 && (op.vsx_flags & VSX_CHECK_VEC)) + if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) return 0; - err = do_vsx_load(&op, ea, regs, cross_endian); - goto ldst_done; + err = do_vsx_load(op, ea, regs, cross_endian); + break; } #endif case LOAD_MULTI: if (!address_ok(regs, ea, size)) return -EFAULT; - rd = op.reg; + rd = op->reg; for (i = 0; i < size; i += 4) { unsigned int v32 = 0; @@ -2871,47 +2830,47 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) nb = 4; err = copy_mem_in((u8 *) &v32, ea, nb, regs); if (err) - return 0; + break; if (unlikely(cross_endian)) v32 = byterev_4(v32); regs->gpr[rd] = v32; ea += 4; ++rd; } - goto instr_done; + break; case STORE: #ifdef __powerpc64__ if (size == 16) { - err = emulate_stq(regs, ea, op.reg, cross_endian); - goto ldst_done; + err = emulate_stq(regs, ea, op->reg, cross_endian); + break; } #endif - if ((op.type & UPDATE) && size == sizeof(long) && - op.reg == 1 && op.update_reg == 1 && + if ((op->type & UPDATE) && size == sizeof(long) && + op->reg == 1 && op->update_reg == 1 && !(regs->msr & MSR_PR) && ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { err = handle_stack_update(ea, regs); - goto ldst_done; + break; } if (unlikely(cross_endian)) - do_byterev(&op.val, size); - err = write_mem(op.val, ea, size, regs); - goto ldst_done; + do_byterev(&op->val, size); + err = write_mem(op->val, ea, size, regs); + break; #ifdef CONFIG_PPC_FPU case STORE_FP: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - err = do_fp_store(op.reg, ea, size, regs, cross_endian); - goto ldst_done; + err = do_fp_store(op->reg, ea, size, regs, cross_endian); + break; #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) return 0; - err = do_vec_store(op.reg, ea, size, regs, cross_endian); - goto ldst_done; + err = do_vec_store(op->reg, ea, size, regs, cross_endian); + break; #endif #ifdef CONFIG_VSX case STORE_VSX: { @@ -2921,18 +2880,18 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX * when the target of the instruction is a vector register. */ - if (op.reg >= 32 && (op.vsx_flags & VSX_CHECK_VEC)) + if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) return 0; - err = do_vsx_store(&op, ea, regs, cross_endian); - goto ldst_done; + err = do_vsx_store(op, ea, regs, cross_endian); + break; } #endif case STORE_MULTI: if (!address_ok(regs, ea, size)) return -EFAULT; - rd = op.reg; + rd = op->reg; for (i = 0; i < size; i += 4) { unsigned int v32 = regs->gpr[rd]; @@ -2943,10 +2902,89 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) v32 = byterev_4(v32); err = copy_mem_out((u8 *) &v32, ea, nb, regs); if (err) - return 0; + break; ea += 4; ++rd; } + break; + + default: + return -EINVAL; + } + + if (err) + return err; + + if (op->type & UPDATE) + regs->gpr[op->update_reg] = op->ea; + + return 0; +} +NOKPROBE_SYMBOL(emulate_loadstore); + +/* + * Emulate instructions that cause a transfer of control, + * loads and stores, and a few other instructions. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int emulate_step(struct pt_regs *regs, unsigned int instr) +{ + struct instruction_op op; + int r, err, type; + unsigned long val; + unsigned long ea; + + r = analyse_instr(&op, regs, instr); + if (r < 0) + return r; + if (r > 0) { + emulate_update_regs(regs, &op); + return 1; + } + + err = 0; + type = op.type & INSTR_TYPE_MASK; + + if (OP_IS_LOAD_STORE(type)) { + err = emulate_loadstore(regs, &op); + if (err) + return 0; + goto instr_done; + } + + switch (type) { + case CACHEOP: + ea = truncate_if_32bit(regs->msr, op.ea); + if (!address_ok(regs, ea, 8)) + return 0; + switch (op.type & CACHEOP_MASK) { + case DCBST: + __cacheop_user_asmx(ea, err, "dcbst"); + break; + case DCBF: + __cacheop_user_asmx(ea, err, "dcbf"); + break; + case DCBTST: + if (op.reg == 0) + prefetchw((void *) ea); + break; + case DCBT: + if (op.reg == 0) + prefetch((void *) ea); + break; + case ICBI: + __cacheop_user_asmx(ea, err, "icbi"); + break; + case DCBZ: + err = emulate_dcbz(ea, regs); + break; + } + if (err) { + regs->dar = ea; + return 0; + } goto instr_done; case MFMSR: @@ -2989,12 +3027,6 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) } return 0; - ldst_done: - if (err) - return 0; - if (op.type & UPDATE) - regs->gpr[op.update_reg] = op.ea; - instr_done: regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); return 1; -- cgit v1.2.3-58-ga151 From d2b65ac6526a82965212b632d42687251e122a36 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 16:34:09 +1000 Subject: powerpc: Emulate load/store floating point as integer word instructions This adds emulation for the lfiwax, lfiwzx and stfiwx instructions. This necessitated adding a new flag to indicate whether a floating point or an integer conversion was needed for LOAD_FP and STORE_FP, so this moves the size field in op->type up 4 bits. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 5 ++-- arch/powerpc/lib/sstep.c | 60 ++++++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 309d1c5de143..ab9d849644d0 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -68,6 +68,7 @@ enum instruction_type { #define SIGNEXT 0x20 #define UPDATE 0x40 /* matches bit in opcode 31 instructions */ #define BYTEREV 0x80 +#define FPCONV 0x100 /* Barrier type field, ORed in with type */ #define BARRIER_MASK 0xe0 @@ -93,8 +94,8 @@ enum instruction_type { #define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */ /* Size field in type word */ -#define SIZE(n) ((n) << 8) -#define GETSIZE(w) ((w) >> 8) +#define SIZE(n) ((n) << 12) +#define GETSIZE(w) ((w) >> 12) #define MKOP(t, f, s) ((t) | (f) | SIZE(s)) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 423815599063..f168ea006242 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -457,19 +457,23 @@ NOKPROBE_SYMBOL(write_mem); * These access either the real FP register or the image in the * thread_struct, depending on regs->msr & MSR_FP. */ -static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs, - bool cross_endian) +static int do_fp_load(struct instruction_op *op, unsigned long ea, + struct pt_regs *regs, bool cross_endian) { - int err; + int err, rn, nb; union { + int i; + unsigned int u; float f; double d[2]; unsigned long l[2]; u8 b[2 * sizeof(double)]; } u; + nb = GETSIZE(op->type); if (!address_ok(regs, ea, nb)) return -EFAULT; + rn = op->reg; err = copy_mem_in(u.b, ea, nb, regs); if (err) return err; @@ -479,8 +483,14 @@ static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs, do_byte_reverse(&u.b[8], 8); } preempt_disable(); - if (nb == 4) - conv_sp_to_dp(&u.f, &u.d[0]); + if (nb == 4) { + if (op->type & FPCONV) + conv_sp_to_dp(&u.f, &u.d[0]); + else if (op->type & SIGNEXT) + u.l[0] = u.i; + else + u.l[0] = u.u; + } if (regs->msr & MSR_FP) put_fpr(rn, &u.d[0]); else @@ -498,25 +508,33 @@ static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs, } NOKPROBE_SYMBOL(do_fp_load); -static int do_fp_store(int rn, unsigned long ea, int nb, struct pt_regs *regs, - bool cross_endian) +static int do_fp_store(struct instruction_op *op, unsigned long ea, + struct pt_regs *regs, bool cross_endian) { + int rn, nb; union { + unsigned int u; float f; double d[2]; unsigned long l[2]; u8 b[2 * sizeof(double)]; } u; + nb = GETSIZE(op->type); if (!address_ok(regs, ea, nb)) return -EFAULT; + rn = op->reg; preempt_disable(); if (regs->msr & MSR_FP) get_fpr(rn, &u.d[0]); else u.l[0] = current->thread.TS_FPR(rn); - if (nb == 4) - conv_dp_to_sp(&u.d[0], &u.f); + if (nb == 4) { + if (op->type & FPCONV) + conv_dp_to_sp(&u.d[0], &u.f); + else + u.u = u.l[0]; + } if (nb == 16) { rn |= 1; if (regs->msr & MSR_FP) @@ -2049,7 +2067,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ - op->type = MKOP(LOAD_FP, u, 4); + op->type = MKOP(LOAD_FP, u | FPCONV, 4); break; case 599: /* lfdx */ @@ -2059,7 +2077,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 663: /* stfsx */ case 695: /* stfsux */ - op->type = MKOP(STORE_FP, u, 4); + op->type = MKOP(STORE_FP, u | FPCONV, 4); break; case 727: /* stfdx */ @@ -2072,9 +2090,21 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(LOAD_FP, 0, 16); break; + case 855: /* lfiwax */ + op->type = MKOP(LOAD_FP, SIGNEXT, 4); + break; + + case 887: /* lfiwzx */ + op->type = MKOP(LOAD_FP, 0, 4); + break; + case 919: /* stfdpx */ op->type = MKOP(STORE_FP, 0, 16); break; + + case 983: /* stfiwx */ + op->type = MKOP(STORE_FP, 0, 4); + break; #endif /* __powerpc64 */ #endif /* CONFIG_PPC_FPU */ @@ -2352,7 +2382,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ - op->type = MKOP(LOAD_FP, u, 4); + op->type = MKOP(LOAD_FP, u | FPCONV, 4); op->ea = dform_ea(instr, regs); break; @@ -2364,7 +2394,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 52: /* stfs */ case 53: /* stfsu */ - op->type = MKOP(STORE_FP, u, 4); + op->type = MKOP(STORE_FP, u | FPCONV, 4); op->ea = dform_ea(instr, regs); break; @@ -2792,7 +2822,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) */ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - err = do_fp_load(op->reg, ea, size, regs, cross_endian); + err = do_fp_load(op, ea, regs, cross_endian); break; #endif #ifdef CONFIG_ALTIVEC @@ -2862,7 +2892,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) case STORE_FP: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - err = do_fp_store(op->reg, ea, size, regs, cross_endian); + err = do_fp_store(op, ea, regs, cross_endian); break; #endif #ifdef CONFIG_ALTIVEC -- cgit v1.2.3-58-ga151 From da74f659205ea08cb0fd0b3050637b0e0eb31520 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 23 Aug 2017 16:54:32 +0200 Subject: powerpc/32: add memset16() Commit 694fc88ce271f ("powerpc/string: Implement optimized memset variants") added memset16(), memset32() and memset64() for the 64 bits PPC. On 32 bits, memset64() is not relevant, and as shown below, the generic version of memset32() gives a good code, so only memset16() is candidate for an optimised version. 000009c0 : 9c0: 2c 05 00 00 cmpwi r5,0 9c4: 39 23 ff fc addi r9,r3,-4 9c8: 4d 82 00 20 beqlr 9cc: 7c a9 03 a6 mtctr r5 9d0: 94 89 00 04 stwu r4,4(r9) 9d4: 42 00 ff fc bdnz 9d0 9d8: 4e 80 00 20 blr The last part of memset() handling the not 4-bytes multiples operates on bytes, making it unsuitable for handling word without modification. As it would increase memset() complexity, it is better to implement memset16() from scratch. In addition it has the advantage of allowing a more optimised memset16() than what we would have by using the memset() function. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/string.h | 4 +++- arch/powerpc/lib/copy_32.S | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index b0e82466d4e8..cc9addefb51c 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -10,6 +10,7 @@ #define __HAVE_ARCH_MEMMOVE #define __HAVE_ARCH_MEMCMP #define __HAVE_ARCH_MEMCHR +#define __HAVE_ARCH_MEMSET16 extern char * strcpy(char *,const char *); extern char * strncpy(char *,const char *, __kernel_size_t); @@ -24,7 +25,6 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #ifdef CONFIG_PPC64 -#define __HAVE_ARCH_MEMSET16 #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 @@ -46,6 +46,8 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) { return __memset64(p, v, n * 8); } +#else +extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 8aedbb5f4b86..a14d4df2ebc9 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -67,6 +67,20 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) +_GLOBAL(memset16) + rlwinm. r0 ,r5, 31, 1, 31 + addi r6, r3, -4 + beq- 2f + rlwimi r4 ,r4 ,16 ,0 ,15 + mtctr r0 +1: stwu r4, 4(r6) + bdnz 1b +2: andi. r0, r5, 1 + beqlr + sth r4, 4(r6) + blr +EXPORT_SYMBOL(memset16) + /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination -- cgit v1.2.3-58-ga151 From 146e9f1b65478643f2729a97ccb8be60bb4492e5 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 31 Aug 2017 00:18:18 -0700 Subject: crypto/nx: Add P9 NX specific error codes for 842 engine This patch adds changes for checking P9 specific 842 engine error codes. These errros are reported in coprocessor status block (CSB) for failures. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/icswx.h | 3 +++ drivers/crypto/nx/nx-842-powernv.c | 18 ++++++++++++++++++ drivers/crypto/nx/nx-842.h | 8 ++++++++ 3 files changed, 29 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h index 27e588f6c72e..6a2c87577541 100644 --- a/arch/powerpc/include/asm/icswx.h +++ b/arch/powerpc/include/asm/icswx.h @@ -69,7 +69,10 @@ struct coprocessor_completion_block { #define CSB_CC_WR_PROTECTION (16) #define CSB_CC_UNKNOWN_CODE (17) #define CSB_CC_ABORT (18) +#define CSB_CC_EXCEED_BYTE_COUNT (19) /* P9 or later */ #define CSB_CC_TRANSPORT (20) +#define CSB_CC_INVALID_CRB (21) /* P9 or later */ +#define CSB_CC_INVALID_DDE (30) /* P9 or later */ #define CSB_CC_SEGMENTED_DDL (31) #define CSB_CC_PROGRESS_POINT (32) #define CSB_CC_DDE_OVERFLOW (33) diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c index 829b5cad0043..c0dd4c7e17d3 100644 --- a/drivers/crypto/nx/nx-842-powernv.c +++ b/drivers/crypto/nx/nx-842-powernv.c @@ -243,6 +243,13 @@ static int wait_for_csb(struct nx842_workmem *wmem, case CSB_CC_TEMPL_OVERFLOW: CSB_ERR(csb, "Compressed data template shows data past end"); return -EINVAL; + case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */ + /* + * DDE byte count exceeds the limit specified in Maximum + * byte count register. + */ + CSB_ERR(csb, "DDE byte count exceeds the limit"); + return -EINVAL; /* these should not happen */ case CSB_CC_INVALID_ALIGN: @@ -284,9 +291,17 @@ static int wait_for_csb(struct nx842_workmem *wmem, CSB_ERR(csb, "Too many DDEs in DDL"); return -EINVAL; case CSB_CC_TRANSPORT: + case CSB_CC_INVALID_CRB: /* P9 or later */ /* shouldn't happen, we setup CRB correctly */ CSB_ERR(csb, "Invalid CRB"); return -EINVAL; + case CSB_CC_INVALID_DDE: /* P9 or later */ + /* + * shouldn't happen, setup_direct/indirect_dde creates + * DDE right + */ + CSB_ERR(csb, "Invalid DDE"); + return -EINVAL; case CSB_CC_SEGMENTED_DDL: /* shouldn't happen, setup_ddl creates DDL right */ CSB_ERR(csb, "Segmented DDL error"); @@ -330,6 +345,9 @@ static int wait_for_csb(struct nx842_workmem *wmem, case CSB_CC_HW: CSB_ERR(csb, "Correctable hardware error"); return -EPROTO; + case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */ + CSB_ERR(csb, "Job did not finish within allowed time"); + return -EPROTO; default: CSB_ERR(csb, "Invalid CC %d", csb->cc); diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h index 30929bd7d1a9..bb2f31792683 100644 --- a/drivers/crypto/nx/nx-842.h +++ b/drivers/crypto/nx/nx-842.h @@ -76,9 +76,17 @@ #define CSB_CC_DECRYPT_OVERFLOW (64) /* asym crypt codes */ #define CSB_CC_MINV_OVERFLOW (128) +/* + * HW error - Job did not finish in the maximum time allowed. + * Job terminated. + */ +#define CSB_CC_HW_EXPIRED_TIMER (224) /* These are reserved for hypervisor use */ #define CSB_CC_HYP_RESERVE_START (240) #define CSB_CC_HYP_RESERVE_END (253) +#define CSB_CC_HYP_RESERVE_P9_END (251) +/* No valid interrupt server (P9 or later). */ +#define CSB_CC_HYP_RESERVE_NO_INTR_SERVER (252) #define CSB_CC_HYP_NO_HW (254) #define CSB_CC_HYP_HANG_ABORTED (255) -- cgit v1.2.3-58-ga151 From eac1e731b59ee3b5f5e641a7765c7ed41ed26226 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 30 Aug 2017 21:46:11 +0200 Subject: powerpc/xive: guest exploitation of the XIVE interrupt controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the framework for using XIVE in a PowerVM guest. The support is very similar to the native one in a much simpler form. Each source is associated with an Event State Buffer (ESB). This is a two bit state machine which is used to trigger events. The bits are named "P" (pending) and "Q" (queued) and can be controlled by MMIO. The Guest OS registers event (or notifications) queues on which the HW will post event data for a target to notify. Instead of OPAL calls, a set of Hypervisors call are used to configure the interrupt sources and the event/notification queues of the guest: - H_INT_GET_SOURCE_INFO used to obtain the address of the MMIO page of the Event State Buffer (PQ bits) entry associated with the source. - H_INT_SET_SOURCE_CONFIG assigns a source to a "target". - H_INT_GET_SOURCE_CONFIG determines to which "target" and "priority" is assigned to a source - H_INT_GET_QUEUE_INFO returns the address of the notification management page associated with the specified "target" and "priority". - H_INT_SET_QUEUE_CONFIG sets or resets the event queue for a given "target" and "priority". It is also used to set the notification config associated with the queue, only unconditional notification for the moment. Reset is performed with a queue size of 0 and queueing is disabled in that case. - H_INT_GET_QUEUE_CONFIG returns the queue settings for a given "target" and "priority". - H_INT_RESET resets all of the partition's interrupt exploitation structures to their initial state, losing all configuration set via the hcalls H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG. - H_INT_SYNC issue a synchronisation on a source to make sure sure all notifications have reached their queue. As for XICS, the XIVE interface for the guest is described in the device tree under the "interrupt-controller" node. A couple of new properties are specific to XIVE : - "reg" contains the base address and size of the thread interrupt managnement areas (TIMA), also called rings, for the User level and for the Guest OS level. Only the Guest OS level is taken into account today. - "ibm,xive-eq-sizes" the size of the event queues. One cell per size supported, contains log2 of size, in ascending order. - "ibm,xive-lisn-ranges" the interrupt numbers ranges assigned to the guest. These are allocated using a simple bitmap. and also : - "/ibm,plat-res-int-priorities" contains a list of priorities that the hypervisor has reserved for its own use. Tested with a QEMU XIVE model for pseries and with the Power hypervisor. Signed-off-by: Cédric Le Goater Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 13 +- arch/powerpc/include/asm/xive.h | 3 + arch/powerpc/platforms/pseries/Kconfig | 1 + arch/powerpc/platforms/pseries/hotplug-cpu.c | 11 +- arch/powerpc/platforms/pseries/kexec.c | 6 +- arch/powerpc/platforms/pseries/setup.c | 8 +- arch/powerpc/platforms/pseries/smp.c | 27 +- arch/powerpc/sysdev/xive/Kconfig | 5 + arch/powerpc/sysdev/xive/Makefile | 1 + arch/powerpc/sysdev/xive/common.c | 13 + arch/powerpc/sysdev/xive/spapr.c | 618 +++++++++++++++++++++++++++ 11 files changed, 698 insertions(+), 8 deletions(-) create mode 100644 arch/powerpc/sysdev/xive/spapr.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 57d38b504ff7..3d34dc0869f6 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -280,7 +280,18 @@ #define H_RESIZE_HPT_COMMIT 0x370 #define H_REGISTER_PROC_TBL 0x37C #define H_SIGNAL_SYS_RESET 0x380 -#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET +#define H_INT_GET_SOURCE_INFO 0x3A8 +#define H_INT_SET_SOURCE_CONFIG 0x3AC +#define H_INT_GET_SOURCE_CONFIG 0x3B0 +#define H_INT_GET_QUEUE_INFO 0x3B4 +#define H_INT_SET_QUEUE_CONFIG 0x3B8 +#define H_INT_GET_QUEUE_CONFIG 0x3BC +#define H_INT_SET_OS_REPORTING_LINE 0x3C0 +#define H_INT_GET_OS_REPORTING_LINE 0x3C4 +#define H_INT_ESB 0x3C8 +#define H_INT_SYNC 0x3CC +#define H_INT_RESET 0x3D0 +#define MAX_HCALL_OPCODE H_INT_RESET /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index c23ff4389ca2..473f133a8555 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -110,11 +110,13 @@ extern bool __xive_enabled; static inline bool xive_enabled(void) { return __xive_enabled; } +extern bool xive_spapr_init(void); extern bool xive_native_init(void); extern void xive_smp_probe(void); extern int xive_smp_prepare_cpu(unsigned int cpu); extern void xive_smp_setup_cpu(void); extern void xive_smp_disable_cpu(void); +extern void xive_teardown_cpu(void); extern void xive_kexec_teardown_cpu(int secondary); extern void xive_shutdown(void); extern void xive_flush_interrupt(void); @@ -147,6 +149,7 @@ extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) static inline bool xive_enabled(void) { return false; } +static inline bool xive_spapr_init(void) { return false; } static inline bool xive_native_init(void) { return false; } static inline void xive_smp_probe(void) { } extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 3a6dfd14f64b..71dd69d9ec64 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -7,6 +7,7 @@ config PPC_PSERIES select PCI select PCI_MSI select PPC_XICS + select PPC_XIVE_SPAPR select PPC_ICP_NATIVE select PPC_ICP_HV select PPC_ICS_RTAS diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index b357f1ae0b0a..fc0d8f97c03a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include "pseries.h" @@ -109,7 +110,10 @@ static void pseries_mach_cpu_die(void) local_irq_disable(); idle_task_exit(); - xics_teardown_cpu(); + if (xive_enabled()) + xive_teardown_cpu(); + else + xics_teardown_cpu(); if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { set_cpu_current_state(cpu, CPU_STATE_INACTIVE); @@ -174,7 +178,10 @@ static int pseries_cpu_disable(void) boot_cpuid = cpumask_any(cpu_online_mask); /* FIXME: abstract this to not be platform specific later on */ - xics_migrate_irqs_away(); + if (xive_enabled()) + xive_smp_disable_cpu(); + else + xics_migrate_irqs_away(); return 0; } diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 6681ac97fb18..eeb13429d685 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -51,5 +52,8 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary) } } - xics_kexec_teardown_cpu(secondary); + if (xive_enabled()) + xive_kexec_teardown_cpu(secondary); + else + xics_kexec_teardown_cpu(secondary); } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b5b650910cf5..5f1beb8367ac 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -176,8 +177,11 @@ static void __init pseries_setup_i8259_cascade(void) static void __init pseries_init_irq(void) { - xics_init(); - pseries_setup_i8259_cascade(); + /* Try using a XIVE if available, otherwise use a XICS */ + if (!xive_spapr_init()) { + xics_init(); + pseries_setup_i8259_cascade(); + } } static void pseries_lpar_enable_pmcs(void) diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 24785f63fb40..2e184829e5d4 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -136,7 +137,9 @@ out: static void smp_setup_cpu(int cpu) { - if (cpu != boot_cpuid) + if (xive_enabled()) + xive_smp_setup_cpu(); + else if (cpu != boot_cpuid) xics_setup_cpu(); if (firmware_has_feature(FW_FEATURE_SPLPAR)) @@ -181,6 +184,13 @@ static int smp_pSeries_kick_cpu(int nr) return 0; } +static int pseries_smp_prepare_cpu(int cpu) +{ + if (xive_enabled()) + return xive_smp_prepare_cpu(cpu); + return 0; +} + static void smp_pseries_cause_ipi(int cpu) { /* POWER9 should not use this handler */ @@ -211,7 +221,7 @@ static int pseries_cause_nmi_ipi(int cpu) return 0; } -static __init void pSeries_smp_probe(void) +static __init void pSeries_smp_probe_xics(void) { xics_smp_probe(); @@ -221,11 +231,24 @@ static __init void pSeries_smp_probe(void) smp_ops->cause_ipi = icp_ops->cause_ipi; } +static __init void pSeries_smp_probe(void) +{ + if (xive_enabled()) + /* + * Don't use P9 doorbells when XIVE is enabled. IPIs + * using MMIOs should be faster + */ + xive_smp_probe(); + else + pSeries_smp_probe_xics(); +} + static struct smp_ops_t pseries_smp_ops = { .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ .cause_ipi = NULL, /* Filled at runtime by pSeries_smp_probe() */ .cause_nmi_ipi = pseries_cause_nmi_ipi, .probe = pSeries_smp_probe, + .prepare_cpu = pseries_smp_prepare_cpu, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_setup_cpu, .cpu_bootable = smp_generic_cpu_bootable, diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig index 12ccd7373d2f..3e3e25b5e30d 100644 --- a/arch/powerpc/sysdev/xive/Kconfig +++ b/arch/powerpc/sysdev/xive/Kconfig @@ -9,3 +9,8 @@ config PPC_XIVE_NATIVE default n select PPC_XIVE depends on PPC_POWERNV + +config PPC_XIVE_SPAPR + bool + default n + select PPC_XIVE diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile index 3fab303fc169..536d6e5706e3 100644 --- a/arch/powerpc/sysdev/xive/Makefile +++ b/arch/powerpc/sysdev/xive/Makefile @@ -2,3 +2,4 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror obj-y += common.o obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o +obj-$(CONFIG_PPC_XIVE_SPAPR) += spapr.o diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a30d6d12b92b..f1e0a3326d01 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1372,6 +1372,19 @@ void xive_flush_interrupt(void) #endif /* CONFIG_SMP */ +void xive_teardown_cpu(void) +{ + struct xive_cpu *xc = __this_cpu_read(xive_cpu); + unsigned int cpu = smp_processor_id(); + + /* Set CPPR to 0 to disable flow of interrupts */ + xc->cppr = 0; + out_8(xive_tima + xive_tima_offset + TM_CPPR, 0); + + if (xive_ops->teardown_cpu) + xive_ops->teardown_cpu(cpu, xc); +} + void xive_kexec_teardown_cpu(int secondary) { struct xive_cpu *xc = __this_cpu_read(xive_cpu); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c new file mode 100644 index 000000000000..797bb0636ab7 --- /dev/null +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -0,0 +1,618 @@ +/* + * Copyright 2016,2017 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "xive: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xive-internal.h" + +static u32 xive_queue_shift; + +struct xive_irq_bitmap { + unsigned long *bitmap; + unsigned int base; + unsigned int count; + spinlock_t lock; + struct list_head list; +}; + +static LIST_HEAD(xive_irq_bitmaps); + +static int xive_irq_bitmap_add(int base, int count) +{ + struct xive_irq_bitmap *xibm; + + xibm = kzalloc(sizeof(*xibm), GFP_ATOMIC); + if (!xibm) + return -ENOMEM; + + spin_lock_init(&xibm->lock); + xibm->base = base; + xibm->count = count; + xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL); + list_add(&xibm->list, &xive_irq_bitmaps); + + pr_info("Using IRQ range [%x-%x]", xibm->base, + xibm->base + xibm->count - 1); + return 0; +} + +static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm) +{ + int irq; + + irq = find_first_zero_bit(xibm->bitmap, xibm->count); + if (irq != xibm->count) { + set_bit(irq, xibm->bitmap); + irq += xibm->base; + } else { + irq = -ENOMEM; + } + + return irq; +} + +static int xive_irq_bitmap_alloc(void) +{ + struct xive_irq_bitmap *xibm; + unsigned long flags; + int irq = -ENOENT; + + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { + spin_lock_irqsave(&xibm->lock, flags); + irq = __xive_irq_bitmap_alloc(xibm); + spin_unlock_irqrestore(&xibm->lock, flags); + if (irq >= 0) + break; + } + return irq; +} + +static void xive_irq_bitmap_free(int irq) +{ + unsigned long flags; + struct xive_irq_bitmap *xibm; + + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { + if ((irq >= xibm->base) && (irq < xibm->base + xibm->count)) { + spin_lock_irqsave(&xibm->lock, flags); + clear_bit(irq - xibm->base, xibm->bitmap); + spin_unlock_irqrestore(&xibm->lock, flags); + break; + } + } +} + +static long plpar_int_get_source_info(unsigned long flags, + unsigned long lisn, + unsigned long *src_flags, + unsigned long *eoi_page, + unsigned long *trig_page, + unsigned long *esb_shift) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn); + if (rc) { + pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc); + return rc; + } + + *src_flags = retbuf[0]; + *eoi_page = retbuf[1]; + *trig_page = retbuf[2]; + *esb_shift = retbuf[3]; + + pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n", + retbuf[0], retbuf[1], retbuf[2], retbuf[3]); + + return 0; +} + +#define XIVE_SRC_SET_EISN (1ull << (63 - 62)) +#define XIVE_SRC_MASK (1ull << (63 - 63)) /* unused */ + +static long plpar_int_set_source_config(unsigned long flags, + unsigned long lisn, + unsigned long target, + unsigned long prio, + unsigned long sw_irq) +{ + long rc; + + + pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n", + flags, lisn, target, prio, sw_irq); + + + rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn, + target, prio, sw_irq); + if (rc) { + pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n", + lisn, target, prio, rc); + return rc; + } + + return 0; +} + +static long plpar_int_get_queue_info(unsigned long flags, + unsigned long target, + unsigned long priority, + unsigned long *esn_page, + unsigned long *esn_size) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target, priority); + if (rc) { + pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n", + target, priority, rc); + return rc; + } + + *esn_page = retbuf[0]; + *esn_size = retbuf[1]; + + pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n", + retbuf[0], retbuf[1]); + + return 0; +} + +#define XIVE_EQ_ALWAYS_NOTIFY (1ull << (63 - 63)) + +static long plpar_int_set_queue_config(unsigned long flags, + unsigned long target, + unsigned long priority, + unsigned long qpage, + unsigned long qsize) +{ + long rc; + + pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n", + flags, target, priority, qpage, qsize); + + rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target, + priority, qpage, qsize); + if (rc) { + pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n", + target, priority, qpage, rc); + return rc; + } + + return 0; +} + +static long plpar_int_sync(unsigned long flags, unsigned long lisn) +{ + long rc; + + rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn); + if (rc) { + pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc); + return rc; + } + + return 0; +} + +#define XIVE_SRC_H_INT_ESB (1ull << (63 - 60)) /* TODO */ +#define XIVE_SRC_LSI (1ull << (63 - 61)) +#define XIVE_SRC_TRIGGER (1ull << (63 - 62)) +#define XIVE_SRC_STORE_EOI (1ull << (63 - 63)) + +static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) +{ + long rc; + unsigned long flags; + unsigned long eoi_page; + unsigned long trig_page; + unsigned long esb_shift; + + memset(data, 0, sizeof(*data)); + + rc = plpar_int_get_source_info(0, hw_irq, &flags, &eoi_page, &trig_page, + &esb_shift); + if (rc) + return -EINVAL; + + if (flags & XIVE_SRC_STORE_EOI) + data->flags |= XIVE_IRQ_FLAG_STORE_EOI; + if (flags & XIVE_SRC_LSI) + data->flags |= XIVE_IRQ_FLAG_LSI; + data->eoi_page = eoi_page; + data->esb_shift = esb_shift; + data->trig_page = trig_page; + + /* + * No chip-id for the sPAPR backend. This has an impact how we + * pick a target. See xive_pick_irq_target(). + */ + data->src_chip = XIVE_INVALID_CHIP_ID; + + data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift); + if (!data->eoi_mmio) { + pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq); + return -ENOMEM; + } + + /* Full function page supports trigger */ + if (flags & XIVE_SRC_TRIGGER) { + data->trig_mmio = data->eoi_mmio; + return 0; + } + + data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift); + if (!data->trig_mmio) { + pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq); + return -ENOMEM; + } + return 0; +} + +static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq) +{ + long rc; + + rc = plpar_int_set_source_config(XIVE_SRC_SET_EISN, hw_irq, target, + prio, sw_irq); + + return rc == 0 ? 0 : -ENXIO; +} + +/* This can be called multiple time to change a queue configuration */ +static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, + __be32 *qpage, u32 order) +{ + s64 rc = 0; + unsigned long esn_page; + unsigned long esn_size; + u64 flags, qpage_phys; + + /* If there's an actual queue page, clean it */ + if (order) { + if (WARN_ON(!qpage)) + return -EINVAL; + qpage_phys = __pa(qpage); + } else { + qpage_phys = 0; + } + + /* Initialize the rest of the fields */ + q->msk = order ? ((1u << (order - 2)) - 1) : 0; + q->idx = 0; + q->toggle = 0; + + rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size); + if (rc) { + pr_err("Error %lld getting queue info prio %d\n", rc, prio); + rc = -EIO; + goto fail; + } + + /* TODO: add support for the notification page */ + q->eoi_phys = esn_page; + + /* Default is to always notify */ + flags = XIVE_EQ_ALWAYS_NOTIFY; + + /* Configure and enable the queue in HW */ + rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order); + if (rc) { + pr_err("Error %lld setting queue for prio %d\n", rc, prio); + rc = -EIO; + } else { + q->qpage = qpage; + } +fail: + return rc; +} + +static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc, + u8 prio) +{ + struct xive_q *q = &xc->queue[prio]; + __be32 *qpage; + + qpage = xive_queue_page_alloc(cpu, xive_queue_shift); + if (IS_ERR(qpage)) + return PTR_ERR(qpage); + + return xive_spapr_configure_queue(cpu, q, prio, qpage, + xive_queue_shift); +} + +static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, + u8 prio) +{ + struct xive_q *q = &xc->queue[prio]; + unsigned int alloc_order; + long rc; + + rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0); + if (rc) + pr_err("Error %ld setting queue for prio %d\n", rc, prio); + + alloc_order = xive_alloc_order(xive_queue_shift); + free_pages((unsigned long)q->qpage, alloc_order); + q->qpage = NULL; +} + +static bool xive_spapr_match(struct device_node *node) +{ + /* Ignore cascaded controllers for the moment */ + return 1; +} + +#ifdef CONFIG_SMP +static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) +{ + int irq = xive_irq_bitmap_alloc(); + + if (irq < 0) { + pr_err("Failed to allocate IPI on CPU %d\n", cpu); + return -ENXIO; + } + + xc->hw_ipi = irq; + return 0; +} + +static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) +{ + xive_irq_bitmap_free(xc->hw_ipi); +} +#endif /* CONFIG_SMP */ + +static void xive_spapr_shutdown(void) +{ + long rc; + + rc = plpar_hcall_norets(H_INT_RESET, 0); + if (rc) + pr_err("H_INT_RESET failed %ld\n", rc); +} + +/* + * Perform an "ack" cycle on the current thread. Grab the pending + * active priorities and update the CPPR to the most favored one. + */ +static void xive_spapr_update_pending(struct xive_cpu *xc) +{ + u8 nsr, cppr; + u16 ack; + + /* + * Perform the "Acknowledge O/S to Register" cycle. + * + * Let's speedup the access to the TIMA using the raw I/O + * accessor as we don't need the synchronisation routine of + * the higher level ones + */ + ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG)); + + /* Synchronize subsequent queue accesses */ + mb(); + + /* + * Grab the CPPR and the "NSR" field which indicates the source + * of the interrupt (if any) + */ + cppr = ack & 0xff; + nsr = ack >> 8; + + if (nsr & TM_QW1_NSR_EO) { + if (cppr == 0xff) + return; + /* Mark the priority pending */ + xc->pending_prio |= 1 << cppr; + + /* + * A new interrupt should never have a CPPR less favored + * than our current one. + */ + if (cppr >= xc->cppr) + pr_err("CPU %d odd ack CPPR, got %d at %d\n", + smp_processor_id(), cppr, xc->cppr); + + /* Update our idea of what the CPPR is */ + xc->cppr = cppr; + } +} + +static void xive_spapr_eoi(u32 hw_irq) +{ + /* Not used */; +} + +static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc) +{ + /* Only some debug on the TIMA settings */ + pr_debug("(HW value: %08x %08x %08x)\n", + in_be32(xive_tima + TM_QW1_OS + TM_WORD0), + in_be32(xive_tima + TM_QW1_OS + TM_WORD1), + in_be32(xive_tima + TM_QW1_OS + TM_WORD2)); +} + +static void xive_spapr_teardown_cpu(unsigned int cpu, struct xive_cpu *xc) +{ + /* Nothing to do */; +} + +static void xive_spapr_sync_source(u32 hw_irq) +{ + /* Specs are unclear on what this is doing */ + plpar_int_sync(0, hw_irq); +} + +static const struct xive_ops xive_spapr_ops = { + .populate_irq_data = xive_spapr_populate_irq_data, + .configure_irq = xive_spapr_configure_irq, + .setup_queue = xive_spapr_setup_queue, + .cleanup_queue = xive_spapr_cleanup_queue, + .match = xive_spapr_match, + .shutdown = xive_spapr_shutdown, + .update_pending = xive_spapr_update_pending, + .eoi = xive_spapr_eoi, + .setup_cpu = xive_spapr_setup_cpu, + .teardown_cpu = xive_spapr_teardown_cpu, + .sync_source = xive_spapr_sync_source, +#ifdef CONFIG_SMP + .get_ipi = xive_spapr_get_ipi, + .put_ipi = xive_spapr_put_ipi, +#endif /* CONFIG_SMP */ + .name = "spapr", +}; + +/* + * get max priority from "/ibm,plat-res-int-priorities" + */ +static bool xive_get_max_prio(u8 *max_prio) +{ + struct device_node *rootdn; + const __be32 *reg; + u32 len; + int prio, found; + + rootdn = of_find_node_by_path("/"); + if (!rootdn) { + pr_err("not root node found !\n"); + return false; + } + + reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len); + if (!reg) { + pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n"); + return false; + } + + if (len % (2 * sizeof(u32)) != 0) { + pr_err("invalid 'ibm,plat-res-int-priorities' property\n"); + return false; + } + + /* HW supports priorities in the range [0-7] and 0xFF is a + * wildcard priority used to mask. We scan the ranges reserved + * by the hypervisor to find the lowest priority we can use. + */ + found = 0xFF; + for (prio = 0; prio < 8; prio++) { + int reserved = 0; + int i; + + for (i = 0; i < len / (2 * sizeof(u32)); i++) { + int base = be32_to_cpu(reg[2 * i]); + int range = be32_to_cpu(reg[2 * i + 1]); + + if (prio >= base && prio < base + range) + reserved++; + } + + if (!reserved) + found = prio; + } + + if (found == 0xFF) { + pr_err("no valid priority found in 'ibm,plat-res-int-priorities'\n"); + return false; + } + + *max_prio = found; + return true; +} + +bool xive_spapr_init(void) +{ + struct device_node *np; + struct resource r; + void __iomem *tima; + struct property *prop; + u8 max_prio; + u32 val; + u32 len; + const __be32 *reg; + int i; + + if (xive_cmdline_disabled) + return false; + + pr_devel("%s()\n", __func__); + np = of_find_compatible_node(NULL, NULL, "ibm,power-ivpe"); + if (!np) { + pr_devel("not found !\n"); + return false; + } + pr_devel("Found %s\n", np->full_name); + + /* Resource 1 is the OS ring TIMA */ + if (of_address_to_resource(np, 1, &r)) { + pr_err("Failed to get thread mgmnt area resource\n"); + return false; + } + tima = ioremap(r.start, resource_size(&r)); + if (!tima) { + pr_err("Failed to map thread mgmnt area\n"); + return false; + } + + if (!xive_get_max_prio(&max_prio)) + return false; + + /* Feed the IRQ number allocator with the ranges given in the DT */ + reg = of_get_property(np, "ibm,xive-lisn-ranges", &len); + if (!reg) { + pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n"); + return false; + } + + if (len % (2 * sizeof(u32)) != 0) { + pr_err("invalid 'ibm,xive-lisn-ranges' property\n"); + return false; + } + + for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) + xive_irq_bitmap_add(be32_to_cpu(reg[0]), + be32_to_cpu(reg[1])); + + /* Iterate the EQ sizes and pick one */ + of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { + xive_queue_shift = val; + if (val == PAGE_SHIFT) + break; + } + + /* Initialize XIVE core with our backend */ + if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio)) + return false; + + pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); + return true; +} -- cgit v1.2.3-58-ga151 From c58a14a9ccf0a79bbdafc106a95c080340c00f49 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 30 Aug 2017 21:46:14 +0200 Subject: powerpc/xive: add the HW IRQ number under xive_irq_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be required later by the H_INT_ESB hcall. Signed-off-by: Cédric Le Goater Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xive.h | 1 + arch/powerpc/sysdev/xive/native.c | 2 ++ arch/powerpc/sysdev/xive/spapr.c | 2 ++ 3 files changed, 5 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 473f133a8555..64ec9bbcf03e 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -45,6 +45,7 @@ struct xive_irq_data { void __iomem *trig_mmio; u32 esb_shift; int src_chip; + u32 hw_irq; /* Setup/used by frontend */ int target; diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index ace3d7aedfb7..44f3a25ca630 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -82,6 +82,8 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) return -ENOMEM; } + data->hw_irq = hw_irq; + if (!data->trig_page) return 0; if (data->trig_page == data->eoi_page) { diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 797bb0636ab7..0fcae7504353 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -264,6 +264,8 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) return -ENOMEM; } + data->hw_irq = hw_irq; + /* Full function page supports trigger */ if (flags & XIVE_SRC_TRIGGER) { data->trig_mmio = data->eoi_mmio; -- cgit v1.2.3-58-ga151 From bed81ee181dd6b21171cffbb80472cc5b774c24d Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 30 Aug 2017 21:46:15 +0200 Subject: powerpc/xive: introduce H_INT_ESB hcall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The H_INT_ESB hcall() is used to issue a load or store to the ESB page instead of using the MMIO pages. This can be used as a workaround on some HW issues. The OS knows that this hcall should be used on an interrupt source when the ESB hcall flag is set to 1 in the hcall H_INT_GET_SOURCE_INFO. To maintain the frontier between the xive frontend and backend, we introduce a new xive operation 'esb_rw' to be used in the routines doing memory accesses on the ESBs. Signed-off-by: Cédric Le Goater Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xive.h | 1 + arch/powerpc/sysdev/xive/common.c | 10 ++++++-- arch/powerpc/sysdev/xive/spapr.c | 44 +++++++++++++++++++++++++++++++- arch/powerpc/sysdev/xive/xive-internal.h | 1 + 4 files changed, 53 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 64ec9bbcf03e..371fbebf1ec9 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -56,6 +56,7 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 #define XIVE_IRQ_FLAG_MASK_FW 0x08 #define XIVE_IRQ_FLAG_EOI_FW 0x10 +#define XIVE_IRQ_FLAG_H_INT_ESB 0x20 #define XIVE_INVALID_CHIP_ID -1 diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 9ac9626c0fa6..7014ca6da4f6 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -198,7 +198,10 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; - val = in_be64(xd->eoi_mmio + offset); + if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) + val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0); + else + val = in_be64(xd->eoi_mmio + offset); return (u8)val; } @@ -209,7 +212,10 @@ static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; - out_be64(xd->eoi_mmio + offset, data); + if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) + xive_ops->esb_rw(xd->hw_irq, offset, data, 1); + else + out_be64(xd->eoi_mmio + offset, data); } #ifdef CONFIG_XMON diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 0fcae7504353..43e9eeb0d39f 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -224,7 +224,46 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn) return 0; } -#define XIVE_SRC_H_INT_ESB (1ull << (63 - 60)) /* TODO */ +#define XIVE_ESB_FLAG_STORE (1ull << (63 - 63)) + +static long plpar_int_esb(unsigned long flags, + unsigned long lisn, + unsigned long offset, + unsigned long in_data, + unsigned long *out_data) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n", + flags, lisn, offset, in_data); + + rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, in_data); + if (rc) { + pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n", + lisn, offset, rc); + return rc; + } + + *out_data = retbuf[0]; + + return 0; +} + +static u64 xive_spapr_esb_rw(u32 lisn, u32 offset, u64 data, bool write) +{ + unsigned long read_data; + long rc; + + rc = plpar_int_esb(write ? XIVE_ESB_FLAG_STORE : 0, + lisn, offset, data, &read_data); + if (rc) + return -1; + + return write ? 0 : read_data; +} + +#define XIVE_SRC_H_INT_ESB (1ull << (63 - 60)) #define XIVE_SRC_LSI (1ull << (63 - 61)) #define XIVE_SRC_TRIGGER (1ull << (63 - 62)) #define XIVE_SRC_STORE_EOI (1ull << (63 - 63)) @@ -244,6 +283,8 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) if (rc) return -EINVAL; + if (flags & XIVE_SRC_H_INT_ESB) + data->flags |= XIVE_IRQ_FLAG_H_INT_ESB; if (flags & XIVE_SRC_STORE_EOI) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (flags & XIVE_SRC_LSI) @@ -487,6 +528,7 @@ static const struct xive_ops xive_spapr_ops = { .setup_cpu = xive_spapr_setup_cpu, .teardown_cpu = xive_spapr_teardown_cpu, .sync_source = xive_spapr_sync_source, + .esb_rw = xive_spapr_esb_rw, #ifdef CONFIG_SMP .get_ipi = xive_spapr_get_ipi, .put_ipi = xive_spapr_put_ipi, diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index dd1e2022cce4..f34abed0c05f 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -47,6 +47,7 @@ struct xive_ops { void (*update_pending)(struct xive_cpu *xc); void (*eoi)(u32 hw_irq); void (*sync_source)(u32 hw_irq); + u64 (*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write); #ifdef CONFIG_SMP int (*get_ipi)(unsigned int cpu, struct xive_cpu *xc); void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc); -- cgit v1.2.3-58-ga151 From ac5e5a5402d64acd48af3287718e24ff8ba9fa21 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 30 Aug 2017 21:46:16 +0200 Subject: powerpc/xive: add XIVE Exploitation Mode to CAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On POWER9, the Client Architecture Support (CAS) negotiation process determines whether the guest operates in XIVE Legacy compatibility or in XIVE exploitation mode. Now that we have initial guest support for the XIVE interrupt controller, let's inform the hypervisor what we can do. The platform advertises the XIVE Exploitation Mode support using the property "ibm,arch-vec-5-platform-support-vec-5", byte 23 bits 0-1 : - 0b00 XIVE legacy mode Only - 0b01 XIVE exploitation mode Only - 0b10 XIVE legacy or exploitation mode The OS asks for XIVE Exploitation Mode support using the property "ibm,architecture-vec-5", byte 23 bits 0-1: - 0b00 XIVE legacy mode Only - 0b01 XIVE exploitation mode Only Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/prom.h | 5 ++++- arch/powerpc/kernel/prom_init.c | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 35c00d7a0cf8..825bd5998701 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -159,7 +159,10 @@ struct of_drconf_cell { #define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */ #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ -#define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ +#define OV5_XIVE_SUPPORT 0x17C0 /* XIVE Exploitation Support Mask */ +#define OV5_XIVE_LEGACY 0x1700 /* XIVE legacy mode Only */ +#define OV5_XIVE_EXPLOIT 0x1740 /* XIVE exploitation mode Only */ +#define OV5_XIVE_EITHER 0x1780 /* XIVE legacy or exploitation mode */ /* MMU Base Architecture */ #define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ #define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 613f79f03877..02190e90c7ae 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -177,6 +177,7 @@ struct platform_support { bool hash_mmu; bool radix_mmu; bool radix_gtse; + bool xive; }; /* Platforms codes are now obsolete in the kernel. Now only used within this @@ -1041,6 +1042,27 @@ static void __init prom_parse_mmu_model(u8 val, } } +static void __init prom_parse_xive_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */ + prom_debug("XIVE - either mode supported\n"); + support->xive = true; + break; + case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */ + prom_debug("XIVE - exploitation mode supported\n"); + support->xive = true; + break; + case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */ + prom_debug("XIVE - legacy mode supported\n"); + break; + default: + prom_debug("Unknown xive support option: 0x%x\n", val); + break; + } +} + static void __init prom_parse_platform_support(u8 index, u8 val, struct platform_support *support) { @@ -1054,6 +1076,10 @@ static void __init prom_parse_platform_support(u8 index, u8 val, support->radix_gtse = true; } break; + case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */ + prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT), + support); + break; } } @@ -1062,7 +1088,8 @@ static void __init prom_check_platform_support(void) struct platform_support supported = { .hash_mmu = false, .radix_mmu = false, - .radix_gtse = false + .radix_gtse = false, + .xive = false }; int prop_len = prom_getproplen(prom.chosen, "ibm,arch-vec-5-platform-support"); @@ -1095,6 +1122,11 @@ static void __init prom_check_platform_support(void) /* We're probably on a legacy hypervisor */ prom_debug("Assuming legacy hash support\n"); } + + if (supported.xive) { + prom_debug("Asking for XIVE\n"); + ibm_architecture_vec.vec5.intarch = OV5_FEAT(OV5_XIVE_EXPLOIT); + } } static void __init prom_send_capabilities(void) -- cgit v1.2.3-58-ga151