diff options
Diffstat (limited to 'arch/powerpc/platforms')
45 files changed, 1832 insertions, 707 deletions
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 8c0d324f657e..3823df235f25 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -582,6 +582,7 @@ static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd, if (ret) break; /* fall through and return the timeout */ + fallthrough; case WDIOC_GETTIMEOUT: /* we need to round here as to avoid e.g. the following diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 87f524e4b09c..8a7e55acf090 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -73,7 +73,7 @@ smp_86xx_kick_cpu(int nr) /* Setup fake reset vector to call __secondary_start_mpc86xx. */ target = (unsigned long) __secondary_start_mpc86xx; - patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); + patch_branch(vector, target, BRANCH_SET_LINK); /* Kick that CPU */ smp_86xx_release_core(nr); @@ -83,7 +83,7 @@ smp_86xx_kick_cpu(int nr) mdelay(1); /* Restore the exception vector */ - patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); + patch_instruction(vector, ppc_inst(save_vector)); local_irq_restore(flags); diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 7a5e8f4541e3..e02d29a9d12f 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -20,6 +20,8 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" +source "arch/powerpc/platforms/book3s/Kconfig" +source "arch/powerpc/platforms/microwatt/Kconfig" config KVM_GUEST bool "KVM Guest support" @@ -49,6 +51,7 @@ config PPC_NATIVE config PPC_OF_BOOT_TRAMPOLINE bool "Support booting from Open Firmware or yaboot" depends on PPC_BOOK3S_32 || PPC64 + select RELOCATABLE if PPC64 default y help Support from booting from Open Firmware or yaboot using an diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7d271de8fcbd..0e1bb1cedd13 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -61,6 +61,7 @@ config 44x select 4xx_SOC select HAVE_PCI select PHYS_64BIT + select PPC_HAVE_KUEP endchoice @@ -390,7 +391,7 @@ config PPC_HAVE_KUEP config PPC_KUEP bool "Kernel Userspace Execution Prevention" depends on PPC_HAVE_KUEP - default y if !PPC_BOOK3S_32 + default y help Enable support for Kernel Userspace Execution Prevention (KUEP) @@ -402,7 +403,7 @@ config PPC_HAVE_KUAP config PPC_KUAP bool "Kernel Userspace Access Protection" depends on PPC_HAVE_KUAP - default y if !PPC_BOOK3S_32 + default y help Enable support for Kernel Userspace Access Protection (KUAP) @@ -425,10 +426,6 @@ config PPC_MMU_NOHASH def_bool y depends on !PPC_BOOK3S -config PPC_MMU_NOHASH_32 - def_bool y - depends on PPC_MMU_NOHASH && PPC32 - config PPC_BOOK3E_MMU def_bool y depends on FSL_BOOKE || PPC_BOOK3E @@ -477,9 +474,9 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-8192)" - range 2 8192 - depends on SMP + int "Maximum number of CPUs (2-8192)" if SMP + range 2 8192 if SMP + default "1" if !SMP default "32" if PPC64 default "4" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 143d4417f6cc..94470fb27c99 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,3 +22,5 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ +obj-$(CONFIG_PPC_BOOK3S) += book3s/ +obj-$(CONFIG_PPC_MICROWATT) += microwatt/ diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig new file mode 100644 index 000000000000..34c931592ef0 --- /dev/null +++ b/arch/powerpc/platforms/book3s/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_VAS + bool "IBM Virtual Accelerator Switchboard (VAS)" + depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES + default y + help + This enables support for IBM Virtual Accelerator Switchboard (VAS). + + VAS devices are found in POWER9-based and later systems, they + provide access to accelerator coprocessors such as NX-GZIP and + NX-842. This config allows the kernel to use NX-842 accelerators, + and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV + and POWER10 PowerVM platforms. + + If unsure, say "N". diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile new file mode 100644 index 000000000000..e790f1910f61 --- /dev/null +++ b/arch/powerpc/platforms/book3s/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_PPC_VAS) += vas-api.o diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c new file mode 100644 index 000000000000..30172e52e16b --- /dev/null +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * VAS user space API for its accelerators (Only NX-GZIP is supported now) + * Copyright (C) 2019 Haren Myneni, IBM Corp + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/kthread.h> +#include <linux/sched/signal.h> +#include <linux/mmu_context.h> +#include <linux/io.h> +#include <asm/vas.h> +#include <uapi/asm/vas-api.h> + +/* + * The driver creates the device node that can be used as follows: + * For NX-GZIP + * + * fd = open("/dev/crypto/nx-gzip", O_RDWR); + * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). + * vas_copy(&crb, 0, 1); + * vas_paste(paste_addr, 0, 1); + * close(fd) or exit process to close window. + * + * where "vas_copy" and "vas_paste" are defined in copy-paste.h. + * copy/paste returns to the user space directly. So refer NX hardware + * documententation for exact copy/paste usage and completion / error + * conditions. + */ + +/* + * Wrapper object for the nx-gzip device - there is just one instance of + * this node for the whole system. + */ +static struct coproc_dev { + struct cdev cdev; + struct device *device; + char *name; + dev_t devt; + struct class *class; + enum vas_cop_type cop_type; + const struct vas_user_win_ops *vops; +} coproc_device; + +struct coproc_instance { + struct coproc_dev *coproc; + struct vas_window *txwin; +}; + +static char *coproc_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); +} + +/* + * Take reference to pid and mm + */ +int get_vas_user_win_ref(struct vas_user_win_ref *task_ref) +{ + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + task_ref->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + task_ref->mm = get_task_mm(current); + if (!task_ref->mm) { + put_pid(task_ref->pid); + pr_err("VAS: pid(%d): mm_struct is not found\n", + current->pid); + return -EPERM; + } + + mmgrab(task_ref->mm); + mmput(task_ref->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. So takes tgid + * reference until window closed to make sure tgid is not + * reused. + */ + task_ref->tgid = find_get_pid(task_tgid_vnr(current)); + + return 0; +} + +/* + * Successful return must release the task reference with + * put_task_struct + */ +static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref, + struct task_struct **tskp, struct pid **pidp) +{ + struct task_struct *tsk; + struct pid *pid; + + pid = task_ref->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + if (!tsk) { + pid = task_ref->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread (tgid) will be closing window when it + * exits. So should not get here. + */ + if (WARN_ON_ONCE(!tsk)) + return false; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return false; + } + + *tskp = tsk; + *pidp = pid; + + return true; +} + +/* + * Update the CSB to indicate a translation error. + * + * User space will be polling on CSB after the request is issued. + * If NX can handle the request without any issues, it updates CSB. + * Whereas if NX encounters page fault, the kernel will handle the + * fault and update CSB with translation error. + * + * If we are unable to update the CSB means copy_to_user failed due to + * invalid csb_addr, send a signal to the process. + */ +void vas_update_csb(struct coprocessor_request_block *crb, + struct vas_user_win_ref *task_ref) +{ + struct coprocessor_status_block csb; + struct kernel_siginfo info; + struct task_struct *tsk; + void __user *csb_addr; + struct pid *pid; + int rc; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (WARN_ON_ONCE(!task_ref->mm)) + return; + + csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); + + memset(&csb, 0, sizeof(csb)); + csb.cc = CSB_CC_FAULT_ADDRESS; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * NX operates and returns in BE format as defined CRB struct. + * So saves fault_storage_addr in BE as NX pastes in FIFO and + * expects user space to convert to CPU format. + */ + csb.address = crb->stamp.nx.fault_storage_addr; + csb.flags = 0; + + /* + * Process closes send window after all pending NX requests are + * completed. In multi-thread applications, a child thread can + * open a window and can exit without closing it. May be some + * requests are pending or this window can be used by other + * threads later. We should handle faults if NX encounters + * pages faults on these requests. Update CSB with translation + * error and fault address. If csb_addr passed by user space is + * invalid, send SEGV signal to pid saved in window. If the + * child thread is not running, send the signal to tgid. + * Parent thread (tgid) will close this window upon its exit. + * + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it. + */ + + if (!ref_get_pid_and_task(task_ref, &tsk, &pid)) + return; + + kthread_use_mm(task_ref->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + if (!rc) { + csb.flags = CSB_V; + /* Make sure update to csb.flags is visible now */ + smp_mb(); + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + kthread_unuse_mm(task_ref->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + + pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application passes invalid csb_addr. So an error message will + * be displayed and leave it to user space whether to ignore or + * handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, + pid_vnr(pid), rc); +} + +void vas_dump_crb(struct coprocessor_request_block *crb) +{ + struct data_descriptor_entry *dde; + struct nx_fault_stamp *nx; + + dde = &crb->source; + pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + dde = &crb->target; + pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + nx = &crb->stamp.nx; + pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", + be32_to_cpu(nx->pswid), + be64_to_cpu(crb->stamp.nx.fault_storage_addr), + nx->flags, nx->fault_status); +} + +static int coproc_open(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst; + + cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); + if (!cp_inst) + return -ENOMEM; + + cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, + cdev); + fp->private_data = cp_inst; + + return 0; +} + +static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) +{ + void __user *uptr = (void __user *)arg; + struct vas_tx_win_open_attr uattr; + struct coproc_instance *cp_inst; + struct vas_window *txwin; + int rc; + + cp_inst = fp->private_data; + + /* + * One window for file descriptor + */ + if (cp_inst->txwin) + return -EEXIST; + + rc = copy_from_user(&uattr, uptr, sizeof(uattr)); + if (rc) { + pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); + return -EFAULT; + } + + if (uattr.version != 1) { + pr_err("Invalid window open API version\n"); + return -EINVAL; + } + + if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) { + pr_err("VAS API is not registered\n"); + return -EACCES; + } + + txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags, + cp_inst->coproc->cop_type); + if (IS_ERR(txwin)) { + pr_err("%s() VAS window open failed, %ld\n", __func__, + PTR_ERR(txwin)); + return PTR_ERR(txwin); + } + + cp_inst->txwin = txwin; + + return 0; +} + +static int coproc_release(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst = fp->private_data; + int rc; + + if (cp_inst->txwin) { + if (cp_inst->coproc->vops && + cp_inst->coproc->vops->close_win) { + rc = cp_inst->coproc->vops->close_win(cp_inst->txwin); + if (rc) + return rc; + } + cp_inst->txwin = NULL; + } + + kfree(cp_inst); + fp->private_data = NULL; + + /* + * We don't know here if user has other receive windows + * open, so we can't really call clear_thread_tidr(). + * So, once the process calls set_thread_tidr(), the + * TIDR value sticks around until process exits, resulting + * in an extra copy in restore_sprs(). + */ + + return 0; +} + +static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + unsigned long pfn; + u64 paste_addr; + pgprot_t prot; + int rc; + + txwin = cp_inst->txwin; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, + (vma->vm_end - vma->vm_start), PAGE_SIZE); + return -EINVAL; + } + + /* Ensure instance has an open send window */ + if (!txwin) { + pr_err("%s(): No send window open?\n", __func__); + return -EINVAL; + } + + if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) { + pr_err("%s(): VAS API is not registered\n", __func__); + return -EACCES; + } + + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (!paste_addr) { + pr_err("%s(): Window paste address failed\n", __func__); + return -EINVAL; + } + + pfn = paste_addr >> PAGE_SHIFT; + + /* flags, page_prot from cxl_mmap(), except we want cachable */ + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + + prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); + + rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, prot); + + pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, + paste_addr, vma->vm_start, rc); + + return rc; +} + +static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case VAS_TX_WIN_OPEN: + return coproc_ioc_tx_win_open(fp, arg); + default: + return -EINVAL; + } +} + +static struct file_operations coproc_fops = { + .open = coproc_open, + .release = coproc_release, + .mmap = coproc_mmap, + .unlocked_ioctl = coproc_ioctl, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name, + const struct vas_user_win_ops *vops) +{ + int rc = -EINVAL; + dev_t devno; + + rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); + if (rc) { + pr_err("Unable to allocate coproc major number: %i\n", rc); + return rc; + } + + pr_devel("%s device allocated, dev [%i,%i]\n", name, + MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); + + coproc_device.class = class_create(mod, name); + if (IS_ERR(coproc_device.class)) { + rc = PTR_ERR(coproc_device.class); + pr_err("Unable to create %s class %d\n", name, rc); + goto err_class; + } + coproc_device.class->devnode = coproc_devnode; + coproc_device.cop_type = cop_type; + coproc_device.vops = vops; + + coproc_fops.owner = mod; + cdev_init(&coproc_device.cdev, &coproc_fops); + + devno = MKDEV(MAJOR(coproc_device.devt), 0); + rc = cdev_add(&coproc_device.cdev, devno, 1); + if (rc) { + pr_err("cdev_add() failed %d\n", rc); + goto err_cdev; + } + + coproc_device.device = device_create(coproc_device.class, NULL, + devno, NULL, name, MINOR(devno)); + if (IS_ERR(coproc_device.device)) { + rc = PTR_ERR(coproc_device.device); + pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); + goto err; + } + + pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), + MINOR(devno)); + + return 0; + +err: + cdev_del(&coproc_device.cdev); +err_cdev: + class_destroy(coproc_device.class); +err_class: + unregister_chrdev_region(coproc_device.devt, 1); + return rc; +} + +void vas_unregister_coproc_api(void) +{ + dev_t devno; + + cdev_del(&coproc_device.cdev); + devno = MKDEV(MAJOR(coproc_device.devt), 0); + device_destroy(coproc_device.class, devno); + + class_destroy(coproc_device.class); + unregister_chrdev_region(coproc_device.devt, 1); +} diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index 93ea41680f54..a1c293f42a1f 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -25,10 +25,9 @@ struct spiderpci_iowa_private { static void spiderpci_io_flush(struct iowa_bus *bus) { struct spiderpci_iowa_private *priv; - u32 val; priv = bus->private; - val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); + in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); iosync(); } diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index d56b4e3241cd..b41e81b22fdc 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -1657,14 +1657,13 @@ static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu) static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) { struct spu_problem __iomem *prob = spu->problem; - u32 dummy = 0; /* Restore, Step 66: * If CSA.MB_Stat[P]=0 (mailbox empty) then * read from the PPU_MB register. */ if ((csa->prob.mb_stat_R & 0xFF) == 0) { - dummy = in_be32(&prob->pu_mb_R); + in_be32(&prob->pu_mb_R); eieio(); } } @@ -1672,14 +1671,13 @@ static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 dummy = 0UL; /* Restore, Step 66: * If CSA.MB_Stat[I]=0 (mailbox empty) then * read from the PPUINT_MB register. */ if ((csa->prob.mb_stat_R & 0xFF0000) == 0) { - dummy = in_be64(&priv2->puint_mb_R); + in_be64(&priv2->puint_mb_R); eieio(); spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); eieio(); diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 53065d564161..85521b3e7098 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -251,8 +251,8 @@ static int ppc750_machine_check_exception(struct pt_regs *regs) /* Are we prepared to handle this fault */ if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } return 0; diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 5565647dc879..d8da6a483e59 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -173,8 +173,8 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs) /* Are we prepared to handle this fault */ if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } return 0; diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig new file mode 100644 index 000000000000..8f6a81978461 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_MICROWATT + depends on PPC_BOOK3S_64 && !SMP + bool "Microwatt SoC platform" + select PPC_XICS + select PPC_ICS_NATIVE + select PPC_ICP_NATIVE + select PPC_NATIVE + select PPC_UDBG_16550 + select ARCH_RANDOM + help + This option enables support for FPGA-based Microwatt implementations. + diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile new file mode 100644 index 000000000000..116d6d3ad3f0 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Makefile @@ -0,0 +1 @@ +obj-y += setup.o rng.o diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c new file mode 100644 index 000000000000..3d8ee6eb7dad --- /dev/null +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/powerpc/platforms/powernv/rng.c, which is: + * Copyright 2013, Michael Ellerman, IBM Corporation. + */ + +#define pr_fmt(fmt) "microwatt-rng: " fmt + +#include <linux/kernel.h> +#include <linux/smp.h> +#include <asm/archrandom.h> +#include <asm/cputable.h> +#include <asm/machdep.h> + +#define DARN_ERR 0xFFFFFFFFFFFFFFFFul + +int microwatt_get_random_darn(unsigned long *v) +{ + unsigned long val; + + /* Using DARN with L=1 - 64-bit conditioned random number */ + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); + + if (val == DARN_ERR) + return 0; + + *v = val; + + return 1; +} + +static __init int rng_init(void) +{ + unsigned long val; + int i; + + for (i = 0; i < 10; i++) { + if (microwatt_get_random_darn(&val)) { + ppc_md.get_random_seed = microwatt_get_random_darn; + return 0; + } + } + + pr_warn("Unable to use DARN for get_random_seed()\n"); + + return -EIO; +} +machine_subsys_initcall(, rng_init); diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c new file mode 100644 index 000000000000..0b02603bdb74 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -0,0 +1,41 @@ +/* + * Microwatt FPGA-based SoC platform setup code. + * + * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_platform.h> + +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/xics.h> +#include <asm/udbg.h> + +static void __init microwatt_init_IRQ(void) +{ + xics_init(); +} + +static int __init microwatt_probe(void) +{ + return of_machine_is_compatible("microwatt-soc"); +} + +static int __init microwatt_populate(void) +{ + return of_platform_default_populate(NULL, NULL, NULL); +} +machine_arch_initcall(microwatt, microwatt_populate); + +define_machine(microwatt) { + .name = "microwatt", + .probe = microwatt_probe, + .init_IRQ = microwatt_init_IRQ, + .progress = udbg_progress, + .calibrate_decr = generic_calibrate_decr, +}; diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 1c954c90b0f4..9b88e3cded7d 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -37,7 +37,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) */ if (regs->msr & SRR1_WAKEMASK) - regs->nip = regs->link; + regs_set_return_ip(regs, regs->link); switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: @@ -58,7 +58,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) restore_astate(hard_smp_processor_id()); /* everything handled */ - regs->msr |= MSR_RI; + regs_set_return_msr(regs, regs->msr | MSR_RI); return 1; } diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index 9d4ecd292255..d20ef35e6d9d 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -433,7 +433,7 @@ static void __init btext_welcome(boot_infos_t *bi) bootx_printf("\nframe buffer at : 0x%x", bi->dispDeviceBase); bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase); bootx_printf(" (log)"); - bootx_printf("\nklimit : 0x%x",(unsigned long)klimit); + bootx_printf("\nklimit : 0x%x",(unsigned long)_end); bootx_printf("\nboot_info at : 0x%x", bi); __asm__ __volatile__ ("mfmsr %0" : "=r" (flags)); bootx_printf("\nMSR : 0x%x", flags); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index adae2a6712e1..bdfea6d6ab69 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -810,7 +810,7 @@ static int smp_core99_kick_cpu(int nr) * b __secondary_start_pmac_0 + nr*8 */ target = (unsigned long) __secondary_start_pmac_0 + nr * 8; - patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); + patch_branch(vector, target, BRANCH_SET_LINK); /* Put some life in our friend */ pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); @@ -823,7 +823,7 @@ static int smp_core99_kick_cpu(int nr) mdelay(1); /* Restore our exception vector */ - patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); + patch_instruction(vector, ppc_inst(save_vector)); local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 619b093a0657..043eefbbdd28 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -33,20 +33,6 @@ config PPC_MEMTRACE Enabling this option allows for runtime allocation of memory (RAM) for hardware tracing. -config PPC_VAS - bool "IBM Virtual Accelerator Switchboard (VAS)" - depends on PPC_POWERNV && PPC_64K_PAGES - default y - help - This enables support for IBM Virtual Accelerator Switchboard (VAS). - - VAS allows accelerators in co-processors like NX-GZIP and NX-842 - to be accessible to kernel subsystems and user processes. - - VAS adapters are found in POWER9 based systems. - - If unsure, say N. - config SCOM_DEBUGFS bool "Expose SCOM controllers via debugfs" depends on DEBUG_FS diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index be2546b96816..dc7b37c23b60 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o -obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 01401e3da7ca..f812c74c61e5 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/percpu.h> #include <linux/jump_label.h> +#include <asm/interrupt.h> #include <asm/opal-api.h> #include <asm/trace.h> #include <asm/asm-prototypes.h> @@ -100,6 +101,9 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, bool mmu = (msr & (MSR_IR|MSR_DR)); int64_t ret; + /* OPAL call / firmware may use SRR and/or HSRR */ + srr_regs_clobbered(); + msr &= ~MSR_EE; if (unlikely(!mmu)) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 303d7c775740..2835376e61a4 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -773,7 +773,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs) * Setup regs->nip to rfi into fixup address. */ if (recover_addr) - regs->nip = recover_addr; + regs_set_return_ip(regs, recover_addr); out: return !!recover_addr; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b18468dc31ff..6bb3c52633fb 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -711,7 +711,7 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, return PCIBIOS_SUCCESSFUL; } -#if CONFIG_EEH +#ifdef CONFIG_EEH static bool pnv_pci_cfg_check(struct pci_dn *pdn) { struct eeh_dev *edev = NULL; diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 73207b53dc2b..7e98b00ea2e8 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -169,6 +169,16 @@ static void update_hid_in_slw(u64 hid0) } } +static inline void update_power8_hid0(unsigned long hid0) +{ + /* + * The HID0 update on Power8 should at the very least be + * preceded by a SYNC instruction followed by an ISYNC + * instruction + */ + asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); +} + static void unsplit_core(void) { u64 hid0, mask; diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c deleted file mode 100644 index 98ed5d8c5441..000000000000 --- a/arch/powerpc/platforms/powernv/vas-api.c +++ /dev/null @@ -1,278 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * VAS user space API for its accelerators (Only NX-GZIP is supported now) - * Copyright (C) 2019 Haren Myneni, IBM Corp - */ - -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/cdev.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <asm/vas.h> -#include <uapi/asm/vas-api.h> -#include "vas.h" - -/* - * The driver creates the device node that can be used as follows: - * For NX-GZIP - * - * fd = open("/dev/crypto/nx-gzip", O_RDWR); - * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); - * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). - * vas_copy(&crb, 0, 1); - * vas_paste(paste_addr, 0, 1); - * close(fd) or exit process to close window. - * - * where "vas_copy" and "vas_paste" are defined in copy-paste.h. - * copy/paste returns to the user space directly. So refer NX hardware - * documententation for exact copy/paste usage and completion / error - * conditions. - */ - -/* - * Wrapper object for the nx-gzip device - there is just one instance of - * this node for the whole system. - */ -static struct coproc_dev { - struct cdev cdev; - struct device *device; - char *name; - dev_t devt; - struct class *class; - enum vas_cop_type cop_type; -} coproc_device; - -struct coproc_instance { - struct coproc_dev *coproc; - struct vas_window *txwin; -}; - -static char *coproc_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); -} - -static int coproc_open(struct inode *inode, struct file *fp) -{ - struct coproc_instance *cp_inst; - - cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); - if (!cp_inst) - return -ENOMEM; - - cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, - cdev); - fp->private_data = cp_inst; - - return 0; -} - -static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) -{ - void __user *uptr = (void __user *)arg; - struct vas_tx_win_attr txattr = {}; - struct vas_tx_win_open_attr uattr; - struct coproc_instance *cp_inst; - struct vas_window *txwin; - int rc, vasid; - - cp_inst = fp->private_data; - - /* - * One window for file descriptor - */ - if (cp_inst->txwin) - return -EEXIST; - - rc = copy_from_user(&uattr, uptr, sizeof(uattr)); - if (rc) { - pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); - return -EFAULT; - } - - if (uattr.version != 1) { - pr_err("Invalid version\n"); - return -EINVAL; - } - - vasid = uattr.vas_id; - - vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); - - txattr.lpid = mfspr(SPRN_LPID); - txattr.pidr = mfspr(SPRN_PID); - txattr.user_win = true; - txattr.rsvd_txbuf_count = false; - txattr.pswid = false; - - pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, - mfspr(SPRN_PID)); - - txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); - if (IS_ERR(txwin)) { - pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, - PTR_ERR(txwin)); - return PTR_ERR(txwin); - } - - cp_inst->txwin = txwin; - - return 0; -} - -static int coproc_release(struct inode *inode, struct file *fp) -{ - struct coproc_instance *cp_inst = fp->private_data; - - if (cp_inst->txwin) { - vas_win_close(cp_inst->txwin); - cp_inst->txwin = NULL; - } - - kfree(cp_inst); - fp->private_data = NULL; - - /* - * We don't know here if user has other receive windows - * open, so we can't really call clear_thread_tidr(). - * So, once the process calls set_thread_tidr(), the - * TIDR value sticks around until process exits, resulting - * in an extra copy in restore_sprs(). - */ - - return 0; -} - -static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) -{ - struct coproc_instance *cp_inst = fp->private_data; - struct vas_window *txwin; - unsigned long pfn; - u64 paste_addr; - pgprot_t prot; - int rc; - - txwin = cp_inst->txwin; - - if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { - pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, - (vma->vm_end - vma->vm_start), PAGE_SIZE); - return -EINVAL; - } - - /* Ensure instance has an open send window */ - if (!txwin) { - pr_err("%s(): No send window open?\n", __func__); - return -EINVAL; - } - - vas_win_paste_addr(txwin, &paste_addr, NULL); - pfn = paste_addr >> PAGE_SHIFT; - - /* flags, page_prot from cxl_mmap(), except we want cachable */ - vma->vm_flags |= VM_IO | VM_PFNMAP; - vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); - - prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); - - rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, prot); - - pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, - paste_addr, vma->vm_start, rc); - - return rc; -} - -static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case VAS_TX_WIN_OPEN: - return coproc_ioc_tx_win_open(fp, arg); - default: - return -EINVAL; - } -} - -static struct file_operations coproc_fops = { - .open = coproc_open, - .release = coproc_release, - .mmap = coproc_mmap, - .unlocked_ioctl = coproc_ioctl, -}; - -/* - * Supporting only nx-gzip coprocessor type now, but this API code - * extended to other coprocessor types later. - */ -int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, - const char *name) -{ - int rc = -EINVAL; - dev_t devno; - - rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); - if (rc) { - pr_err("Unable to allocate coproc major number: %i\n", rc); - return rc; - } - - pr_devel("%s device allocated, dev [%i,%i]\n", name, - MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); - - coproc_device.class = class_create(mod, name); - if (IS_ERR(coproc_device.class)) { - rc = PTR_ERR(coproc_device.class); - pr_err("Unable to create %s class %d\n", name, rc); - goto err_class; - } - coproc_device.class->devnode = coproc_devnode; - coproc_device.cop_type = cop_type; - - coproc_fops.owner = mod; - cdev_init(&coproc_device.cdev, &coproc_fops); - - devno = MKDEV(MAJOR(coproc_device.devt), 0); - rc = cdev_add(&coproc_device.cdev, devno, 1); - if (rc) { - pr_err("cdev_add() failed %d\n", rc); - goto err_cdev; - } - - coproc_device.device = device_create(coproc_device.class, NULL, - devno, NULL, name, MINOR(devno)); - if (IS_ERR(coproc_device.device)) { - rc = PTR_ERR(coproc_device.device); - pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); - goto err; - } - - pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), - MINOR(devno)); - - return 0; - -err: - cdev_del(&coproc_device.cdev); -err_cdev: - class_destroy(coproc_device.class); -err_class: - unregister_chrdev_region(coproc_device.devt, 1); - return rc; -} -EXPORT_SYMBOL_GPL(vas_register_coproc_api); - -void vas_unregister_coproc_api(void) -{ - dev_t devno; - - cdev_del(&coproc_device.cdev); - devno = MKDEV(MAJOR(coproc_device.devt), 0); - device_destroy(coproc_device.class, devno); - - class_destroy(coproc_device.class); - unregister_chrdev_region(coproc_device.devt, 1); -} -EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 41fa90d2f4ab..3ce89a4b54be 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <asm/vas.h> #include "vas.h" static struct dentry *vas_debugfs; @@ -28,7 +29,7 @@ static char *cop_to_str(int cop) static int info_show(struct seq_file *s, void *private) { - struct vas_window *window = s->private; + struct pnv_vas_window *window = s->private; mutex_lock(&vas_mutex); @@ -36,9 +37,9 @@ static int info_show(struct seq_file *s, void *private) if (!window->hvwc_map) goto unlock; - seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), + seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop), window->tx_win ? "Send" : "Receive"); - seq_printf(s, "Pid : %d\n", vas_window_pid(window)); + seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win)); unlock: mutex_unlock(&vas_mutex); @@ -47,7 +48,7 @@ unlock: DEFINE_SHOW_ATTRIBUTE(info); -static inline void print_reg(struct seq_file *s, struct vas_window *win, +static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win, char *name, u32 reg) { seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name); @@ -55,7 +56,7 @@ static inline void print_reg(struct seq_file *s, struct vas_window *win, static int hvwc_show(struct seq_file *s, void *private) { - struct vas_window *window = s->private; + struct pnv_vas_window *window = s->private; mutex_lock(&vas_mutex); @@ -103,8 +104,10 @@ unlock: DEFINE_SHOW_ATTRIBUTE(hvwc); -void vas_window_free_dbgdir(struct vas_window *window) +void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win) { + struct vas_window *window = &pnv_win->vas_win; + if (window->dbgdir) { debugfs_remove_recursive(window->dbgdir); kfree(window->dbgname); @@ -113,21 +116,21 @@ void vas_window_free_dbgdir(struct vas_window *window) } } -void vas_window_init_dbgdir(struct vas_window *window) +void vas_window_init_dbgdir(struct pnv_vas_window *window) { struct dentry *d; if (!window->vinst->dbgdir) return; - window->dbgname = kzalloc(16, GFP_KERNEL); - if (!window->dbgname) + window->vas_win.dbgname = kzalloc(16, GFP_KERNEL); + if (!window->vas_win.dbgname) return; - snprintf(window->dbgname, 16, "w%d", window->winid); + snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid); - d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir); - window->dbgdir = d; + d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir); + window->vas_win.dbgdir = d; debugfs_create_file("info", 0444, d, window, &info_fops); debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 3d21fce254b7..a7aabc18039e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -26,150 +26,6 @@ */ #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) -static void dump_crb(struct coprocessor_request_block *crb) -{ - struct data_descriptor_entry *dde; - struct nx_fault_stamp *nx; - - dde = &crb->source; - pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", - be64_to_cpu(dde->address), be32_to_cpu(dde->length), - dde->count, dde->index, dde->flags); - - dde = &crb->target; - pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", - be64_to_cpu(dde->address), be32_to_cpu(dde->length), - dde->count, dde->index, dde->flags); - - nx = &crb->stamp.nx; - pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", - be32_to_cpu(nx->pswid), - be64_to_cpu(crb->stamp.nx.fault_storage_addr), - nx->flags, nx->fault_status); -} - -/* - * Update the CSB to indicate a translation error. - * - * User space will be polling on CSB after the request is issued. - * If NX can handle the request without any issues, it updates CSB. - * Whereas if NX encounters page fault, the kernel will handle the - * fault and update CSB with translation error. - * - * If we are unable to update the CSB means copy_to_user failed due to - * invalid csb_addr, send a signal to the process. - */ -static void update_csb(struct vas_window *window, - struct coprocessor_request_block *crb) -{ - struct coprocessor_status_block csb; - struct kernel_siginfo info; - struct task_struct *tsk; - void __user *csb_addr; - struct pid *pid; - int rc; - - /* - * NX user space windows can not be opened for task->mm=NULL - * and faults will not be generated for kernel requests. - */ - if (WARN_ON_ONCE(!window->mm || !window->user_win)) - return; - - csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); - - memset(&csb, 0, sizeof(csb)); - csb.cc = CSB_CC_FAULT_ADDRESS; - csb.ce = CSB_CE_TERMINATION; - csb.cs = 0; - csb.count = 0; - - /* - * NX operates and returns in BE format as defined CRB struct. - * So saves fault_storage_addr in BE as NX pastes in FIFO and - * expects user space to convert to CPU format. - */ - csb.address = crb->stamp.nx.fault_storage_addr; - csb.flags = 0; - - pid = window->pid; - tsk = get_pid_task(pid, PIDTYPE_PID); - /* - * Process closes send window after all pending NX requests are - * completed. In multi-thread applications, a child thread can - * open a window and can exit without closing it. May be some - * requests are pending or this window can be used by other - * threads later. We should handle faults if NX encounters - * pages faults on these requests. Update CSB with translation - * error and fault address. If csb_addr passed by user space is - * invalid, send SEGV signal to pid saved in window. If the - * child thread is not running, send the signal to tgid. - * Parent thread (tgid) will close this window upon its exit. - * - * pid and mm references are taken when window is opened by - * process (pid). So tgid is used only when child thread opens - * a window and exits without closing it. - */ - if (!tsk) { - pid = window->tgid; - tsk = get_pid_task(pid, PIDTYPE_PID); - /* - * Parent thread (tgid) will be closing window when it - * exits. So should not get here. - */ - if (WARN_ON_ONCE(!tsk)) - return; - } - - /* Return if the task is exiting. */ - if (tsk->flags & PF_EXITING) { - put_task_struct(tsk); - return; - } - - kthread_use_mm(window->mm); - rc = copy_to_user(csb_addr, &csb, sizeof(csb)); - /* - * User space polls on csb.flags (first byte). So add barrier - * then copy first byte with csb flags update. - */ - if (!rc) { - csb.flags = CSB_V; - /* Make sure update to csb.flags is visible now */ - smp_mb(); - rc = copy_to_user(csb_addr, &csb, sizeof(u8)); - } - kthread_unuse_mm(window->mm); - put_task_struct(tsk); - - /* Success */ - if (!rc) - return; - - pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", - csb_addr, pid_vnr(pid)); - - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = EFAULT; - info.si_code = SEGV_MAPERR; - info.si_addr = csb_addr; - - /* - * process will be polling on csb.flags after request is sent to - * NX. So generally CSB update should not fail except when an - * application passes invalid csb_addr. So an error message will - * be displayed and leave it to user space whether to ignore or - * handle this signal. - */ - rcu_read_lock(); - rc = kill_pid_info(SIGSEGV, &info, pid); - rcu_read_unlock(); - - pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, - pid_vnr(pid), rc); -} - static void dump_fifo(struct vas_instance *vinst, void *entry) { unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size; @@ -212,7 +68,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) struct vas_instance *vinst = data; struct coprocessor_request_block *crb, *entry; struct coprocessor_request_block buf; - struct vas_window *window; + struct pnv_vas_window *window; unsigned long flags; void *fifo; @@ -272,7 +128,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) vinst->vas_id, vinst->fault_fifo, fifo, vinst->fault_crbs); - dump_crb(crb); + vas_dump_crb(crb); window = vas_pswid_to_window(vinst, be32_to_cpu(crb->stamp.nx.pswid)); @@ -293,7 +149,14 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) WARN_ON_ONCE(1); } else { - update_csb(window, crb); + /* + * NX sees faults only with user space windows. + */ + if (window->user_win) + vas_update_csb(crb, &window->vas_win.task_ref); + else + WARN_ON_ONCE(!window->user_win); + /* * Return credit for send window after processing * fault CRB. @@ -336,6 +199,7 @@ irqreturn_t vas_fault_handler(int irq, void *dev_id) int vas_setup_fault_window(struct vas_instance *vinst) { struct vas_rx_win_attr attr; + struct vas_window *win; vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); @@ -364,18 +228,17 @@ int vas_setup_fault_window(struct vas_instance *vinst) attr.lnotify_pid = mfspr(SPRN_PID); attr.lnotify_tid = mfspr(SPRN_PID); - vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, - &attr); - - if (IS_ERR(vinst->fault_win)) { - pr_err("VAS: Error %ld opening FaultWin\n", - PTR_ERR(vinst->fault_win)); + win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr); + if (IS_ERR(win)) { + pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win)); kfree(vinst->fault_fifo); - return PTR_ERR(vinst->fault_win); + return PTR_ERR(win); } + vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win); + pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", - vinst->fault_win->winid, attr.lnotify_lpid, + vinst->fault_win->vas_win.winid, attr.lnotify_lpid, attr.lnotify_pid, attr.lnotify_tid); return 0; diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h index a449b9f0c12e..ca2e08f2ddc0 100644 --- a/arch/powerpc/platforms/powernv/vas-trace.h +++ b/arch/powerpc/platforms/powernv/vas-trace.h @@ -80,7 +80,7 @@ TRACE_EVENT( vas_tx_win_open, TRACE_EVENT( vas_paste_crb, TP_PROTO(struct task_struct *tsk, - struct vas_window *win), + struct pnv_vas_window *win), TP_ARGS(tsk, win), @@ -96,7 +96,7 @@ TRACE_EVENT( vas_paste_crb, TP_fast_assign( __entry->pid = tsk->pid; __entry->vasid = win->vinst->vas_id; - __entry->winid = win->winid; + __entry->winid = win->vas_win.winid; __entry->paste_kaddr = (unsigned long)win->paste_kaddr ), diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 5f5fe63a3d1c..0f8d39fbf2b2 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -16,6 +16,7 @@ #include <linux/mmu_context.h> #include <asm/switch_to.h> #include <asm/ppc-opcode.h> +#include <asm/vas.h> #include "vas.h" #include "copy-paste.h" @@ -26,14 +27,14 @@ * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. */ -void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) +void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len) { int winid; u64 base, shift; base = window->vinst->paste_base_addr; shift = window->vinst->paste_win_id_shift; - winid = window->winid; + winid = window->vas_win.winid; *addr = base + (winid << shift); if (len) @@ -42,23 +43,23 @@ void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); } -static inline void get_hvwc_mmio_bar(struct vas_window *window, +static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window, u64 *start, int *len) { u64 pbaddr; pbaddr = window->vinst->hvwc_bar_start; - *start = pbaddr + window->winid * VAS_HVWC_SIZE; + *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE; *len = VAS_HVWC_SIZE; } -static inline void get_uwc_mmio_bar(struct vas_window *window, +static inline void get_uwc_mmio_bar(struct pnv_vas_window *window, u64 *start, int *len) { u64 pbaddr; pbaddr = window->vinst->uwc_bar_start; - *start = pbaddr + window->winid * VAS_UWC_SIZE; + *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE; *len = VAS_UWC_SIZE; } @@ -67,7 +68,7 @@ static inline void get_uwc_mmio_bar(struct vas_window *window, * space. Unlike MMIO regions (map_mmio_region() below), paste region must * be mapped cache-able and is only applicable to send windows. */ -static void *map_paste_region(struct vas_window *txwin) +static void *map_paste_region(struct pnv_vas_window *txwin) { int len; void *map; @@ -75,7 +76,7 @@ static void *map_paste_region(struct vas_window *txwin) u64 start; name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id, - txwin->winid); + txwin->vas_win.winid); if (!name) goto free_name; @@ -132,7 +133,7 @@ static void unmap_region(void *addr, u64 start, int len) /* * Unmap the paste address region for a window. */ -static void unmap_paste_region(struct vas_window *window) +static void unmap_paste_region(struct pnv_vas_window *window) { int len; u64 busaddr_start; @@ -153,7 +154,7 @@ static void unmap_paste_region(struct vas_window *window) * path, just minimize the time we hold the mutex for now. We can add * a per-instance mutex later if necessary. */ -static void unmap_winctx_mmio_bars(struct vas_window *window) +static void unmap_winctx_mmio_bars(struct pnv_vas_window *window) { int len; void *uwc_map; @@ -186,7 +187,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window) * OS/User Window Context (UWC) MMIO Base Address Region for the given window. * Map these bus addresses and save the mapped kernel addresses in @window. */ -static int map_winctx_mmio_bars(struct vas_window *window) +static int map_winctx_mmio_bars(struct pnv_vas_window *window) { int len; u64 start; @@ -214,7 +215,7 @@ static int map_winctx_mmio_bars(struct vas_window *window) * registers are not sequential. And, we can only write to offsets * with valid registers. */ -static void reset_window_regs(struct vas_window *window) +static void reset_window_regs(struct pnv_vas_window *window) { write_hvwc_reg(window, VREG(LPID), 0ULL); write_hvwc_reg(window, VREG(PID), 0ULL); @@ -270,7 +271,7 @@ static void reset_window_regs(struct vas_window *window) * want to add fields to vas_winctx and move the initialization to * init_vas_winctx_regs(). */ -static void init_xlate_regs(struct vas_window *window, bool user_win) +static void init_xlate_regs(struct pnv_vas_window *window, bool user_win) { u64 lpcr, val; @@ -335,7 +336,7 @@ static void init_xlate_regs(struct vas_window *window, bool user_win) * * TODO: Reserved (aka dedicated) send buffers are not supported yet. */ -static void init_rsvd_tx_buf_count(struct vas_window *txwin, +static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin, struct vas_winctx *winctx) { write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL); @@ -357,7 +358,7 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin, * as a one-time task? That could work for NX but what about other * receivers? Let the receivers tell us the rx-fifo buffers for now. */ -static void init_winctx_regs(struct vas_window *window, +static void init_winctx_regs(struct pnv_vas_window *window, struct vas_winctx *winctx) { u64 val; @@ -519,10 +520,10 @@ static int vas_assign_window_id(struct ida *ida) return winid; } -static void vas_window_free(struct vas_window *window) +static void vas_window_free(struct pnv_vas_window *window) { - int winid = window->winid; struct vas_instance *vinst = window->vinst; + int winid = window->vas_win.winid; unmap_winctx_mmio_bars(window); @@ -533,10 +534,10 @@ static void vas_window_free(struct vas_window *window) vas_release_window_id(&vinst->ida, winid); } -static struct vas_window *vas_window_alloc(struct vas_instance *vinst) +static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst) { int winid; - struct vas_window *window; + struct pnv_vas_window *window; winid = vas_assign_window_id(&vinst->ida); if (winid < 0) @@ -547,7 +548,7 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst) goto out_free; window->vinst = vinst; - window->winid = winid; + window->vas_win.winid = winid; if (map_winctx_mmio_bars(window)) goto out_free; @@ -562,7 +563,7 @@ out_free: return ERR_PTR(-ENOMEM); } -static void put_rx_win(struct vas_window *rxwin) +static void put_rx_win(struct pnv_vas_window *rxwin) { /* Better not be a send window! */ WARN_ON_ONCE(rxwin->tx_win); @@ -578,10 +579,11 @@ static void put_rx_win(struct vas_window *rxwin) * * NOTE: We access ->windows[] table and assume that vinst->mutex is held. */ -static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) +static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst, + u32 pswid) { int vasid, winid; - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; decode_pswid(pswid, &vasid, &winid); @@ -590,7 +592,7 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) rxwin = vinst->windows[winid]; - if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW) + if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW) return ERR_PTR(-EINVAL); return rxwin; @@ -602,10 +604,10 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) * * See also function header of set_vinst_win(). */ -static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, +static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst, enum vas_cop_type cop, u32 pswid) { - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; mutex_lock(&vinst->mutex); @@ -638,9 +640,9 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, * window, we also save the window in the ->rxwin[] table. */ static void set_vinst_win(struct vas_instance *vinst, - struct vas_window *window) + struct pnv_vas_window *window) { - int id = window->winid; + int id = window->vas_win.winid; mutex_lock(&vinst->mutex); @@ -649,8 +651,8 @@ static void set_vinst_win(struct vas_instance *vinst, * unless its a user (FTW) window. */ if (!window->user_win && !window->tx_win) { - WARN_ON_ONCE(vinst->rxwin[window->cop]); - vinst->rxwin[window->cop] = window; + WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = window; } WARN_ON_ONCE(vinst->windows[id] != NULL); @@ -663,16 +665,16 @@ static void set_vinst_win(struct vas_instance *vinst, * Clear this window from the table(s) of windows for this VAS instance. * See also function header of set_vinst_win(). */ -static void clear_vinst_win(struct vas_window *window) +static void clear_vinst_win(struct pnv_vas_window *window) { - int id = window->winid; + int id = window->vas_win.winid; struct vas_instance *vinst = window->vinst; mutex_lock(&vinst->mutex); if (!window->user_win && !window->tx_win) { - WARN_ON_ONCE(!vinst->rxwin[window->cop]); - vinst->rxwin[window->cop] = NULL; + WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = NULL; } WARN_ON_ONCE(vinst->windows[id] != window); @@ -681,7 +683,7 @@ static void clear_vinst_win(struct vas_window *window) mutex_unlock(&vinst->mutex); } -static void init_winctx_for_rxwin(struct vas_window *rxwin, +static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin, struct vas_rx_win_attr *rxattr, struct vas_winctx *winctx) { @@ -702,7 +704,7 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, winctx->rx_fifo = rxattr->rx_fifo; winctx->rx_fifo_size = rxattr->rx_fifo_size; - winctx->wcreds_max = rxwin->wcreds_max; + winctx->wcreds_max = rxwin->vas_win.wcreds_max; winctx->pin_win = rxattr->pin_win; winctx->nx_win = rxattr->nx_win; @@ -851,7 +853,7 @@ EXPORT_SYMBOL_GPL(vas_init_rx_win_attr); struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_rx_win_attr *rxattr) { - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; struct vas_winctx winctx; struct vas_instance *vinst; @@ -870,21 +872,21 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, rxwin = vas_window_alloc(vinst); if (IS_ERR(rxwin)) { pr_devel("Unable to allocate memory for Rx window\n"); - return rxwin; + return (struct vas_window *)rxwin; } rxwin->tx_win = false; rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; - rxwin->cop = cop; - rxwin->wcreds_max = rxattr->wcreds_max; + rxwin->vas_win.cop = cop; + rxwin->vas_win.wcreds_max = rxattr->wcreds_max; init_winctx_for_rxwin(rxwin, rxattr, &winctx); init_winctx_regs(rxwin, &winctx); set_vinst_win(vinst, rxwin); - return rxwin; + return &rxwin->vas_win; } EXPORT_SYMBOL_GPL(vas_rx_win_open); @@ -905,7 +907,7 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) } EXPORT_SYMBOL_GPL(vas_init_tx_win_attr); -static void init_winctx_for_txwin(struct vas_window *txwin, +static void init_winctx_for_txwin(struct pnv_vas_window *txwin, struct vas_tx_win_attr *txattr, struct vas_winctx *winctx) { @@ -926,7 +928,7 @@ static void init_winctx_for_txwin(struct vas_window *txwin, */ memset(winctx, 0, sizeof(struct vas_winctx)); - winctx->wcreds_max = txwin->wcreds_max; + winctx->wcreds_max = txwin->vas_win.wcreds_max; winctx->user_win = txattr->user_win; winctx->nx_win = txwin->rxwin->nx_win; @@ -946,13 +948,13 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; - winctx->rx_win_id = txwin->rxwin->winid; + winctx->rx_win_id = txwin->rxwin->vas_win.winid; /* * IRQ and fault window setup is successful. Set fault window * for the send window so that ready to handle faults. */ if (txwin->vinst->virq) - winctx->fault_win_id = txwin->vinst->fault_win->winid; + winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid; winctx->dma_type = VAS_DMA_TYPE_INJECT; winctx->tc_mode = txattr->tc_mode; @@ -962,7 +964,8 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->irq_port = txwin->vinst->irq_port; winctx->pswid = txattr->pswid ? txattr->pswid : - encode_pswid(txwin->vinst->vas_id, txwin->winid); + encode_pswid(txwin->vinst->vas_id, + txwin->vas_win.winid); } static bool tx_win_args_valid(enum vas_cop_type cop, @@ -993,8 +996,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, struct vas_tx_win_attr *attr) { int rc; - struct vas_window *txwin; - struct vas_window *rxwin; + struct pnv_vas_window *txwin; + struct pnv_vas_window *rxwin; struct vas_winctx winctx; struct vas_instance *vinst; @@ -1020,7 +1023,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rxwin = get_vinst_rxwin(vinst, cop, attr->pswid); if (IS_ERR(rxwin)) { pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop); - return rxwin; + return (struct vas_window *)rxwin; } txwin = vas_window_alloc(vinst); @@ -1029,12 +1032,12 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, goto put_rxwin; } - txwin->cop = cop; + txwin->vas_win.cop = cop; txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; txwin->user_win = attr->user_win; - txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; + txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; init_winctx_for_txwin(txwin, attr, &winctx); @@ -1064,56 +1067,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rc = -ENODEV; goto free_window; } - - /* - * Window opened by a child thread may not be closed when - * it exits. So take reference to its pid and release it - * when the window is free by parent thread. - * Acquire a reference to the task's pid to make sure - * pid will not be re-used - needed only for multithread - * applications. - */ - txwin->pid = get_task_pid(current, PIDTYPE_PID); - /* - * Acquire a reference to the task's mm. - */ - txwin->mm = get_task_mm(current); - - if (!txwin->mm) { - put_pid(txwin->pid); - pr_err("VAS: pid(%d): mm_struct is not found\n", - current->pid); - rc = -EPERM; + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) goto free_window; - } - mmgrab(txwin->mm); - mmput(txwin->mm); - mm_context_add_vas_window(txwin->mm); - /* - * Process closes window during exit. In the case of - * multithread application, the child thread can open - * window and can exit without closing it. Expects parent - * thread to use and close the window. So do not need - * to take pid reference for parent thread. - */ - txwin->tgid = find_get_pid(task_tgid_vnr(current)); - /* - * Even a process that has no foreign real address mapping can - * use an unpaired COPY instruction (to no real effect). Issue - * CP_ABORT to clear any pending COPY and prevent a covert - * channel. - * - * __switch_to() will issue CP_ABORT on future context switches - * if process / thread has any open VAS window (Use - * current->mm->context.vas_windows). - */ - asm volatile(PPC_CP_ABORT); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); } set_vinst_win(vinst, txwin); - return txwin; + return &txwin->vas_win; free_window: vas_window_free(txwin); @@ -1132,12 +1095,14 @@ int vas_copy_crb(void *crb, int offset) EXPORT_SYMBOL_GPL(vas_copy_crb); #define RMA_LSMP_REPORT_ENABLE PPC_BIT(53) -int vas_paste_crb(struct vas_window *txwin, int offset, bool re) +int vas_paste_crb(struct vas_window *vwin, int offset, bool re) { + struct pnv_vas_window *txwin; int rc; void *addr; uint64_t val; + txwin = container_of(vwin, struct pnv_vas_window, vas_win); trace_vas_paste_crb(current, txwin); /* @@ -1167,7 +1132,7 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re) else rc = -EINVAL; - pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid, + pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid, read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); return rc; @@ -1187,7 +1152,7 @@ EXPORT_SYMBOL_GPL(vas_paste_crb); * user space. (NX-842 driver waits for CSB and Fast thread-wakeup * doesn't use credit checking). */ -static void poll_window_credits(struct vas_window *window) +static void poll_window_credits(struct pnv_vas_window *window) { u64 val; int creds, mode; @@ -1217,7 +1182,7 @@ retry: * and issue CRB Kill to stop all pending requests. Need only * if there is a bug in NX or fault handling in kernel. */ - if (creds < window->wcreds_max) { + if (creds < window->vas_win.wcreds_max) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(10)); @@ -1228,7 +1193,8 @@ retry: */ if (!(count % 1000)) pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", - vas_window_pid(window), window->winid, + vas_window_pid(&window->vas_win), + window->vas_win.winid, creds, count); goto retry; @@ -1240,7 +1206,7 @@ retry: * short time to queue a CRB, so window should not be busy for too long. * Trying 5ms intervals. */ -static void poll_window_busy_state(struct vas_window *window) +static void poll_window_busy_state(struct pnv_vas_window *window) { int busy; u64 val; @@ -1260,7 +1226,8 @@ retry: */ if (!(count % 1000)) pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", - vas_window_pid(window), window->winid, count); + vas_window_pid(&window->vas_win), + window->vas_win.winid, count); goto retry; } @@ -1282,7 +1249,7 @@ retry: * casting out becomes necessary we should consider offloading the * job to a worker thread, so the window close can proceed quickly. */ -static void poll_window_castout(struct vas_window *window) +static void poll_window_castout(struct pnv_vas_window *window) { /* stub for now */ } @@ -1291,7 +1258,7 @@ static void poll_window_castout(struct vas_window *window) * Unpin and close a window so no new requests are accepted and the * hardware can evict this window from cache if necessary. */ -static void unpin_close_window(struct vas_window *window) +static void unpin_close_window(struct pnv_vas_window *window) { u64 val; @@ -1313,11 +1280,15 @@ static void unpin_close_window(struct vas_window *window) * * Besides the hardware, kernel has some bookkeeping of course. */ -int vas_win_close(struct vas_window *window) +int vas_win_close(struct vas_window *vwin) { - if (!window) + struct pnv_vas_window *window; + + if (!vwin) return 0; + window = container_of(vwin, struct pnv_vas_window, vas_win); + if (!window->tx_win && atomic_read(&window->num_txwins) != 0) { pr_devel("Attempting to close an active Rx window!\n"); WARN_ON_ONCE(1); @@ -1339,12 +1310,8 @@ int vas_win_close(struct vas_window *window) /* if send window, drop reference to matching receive window */ if (window->tx_win) { if (window->user_win) { - /* Drop references to pid and mm */ - put_pid(window->pid); - if (window->mm) { - mm_context_remove_vas_window(window->mm); - mmdrop(window->mm); - } + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); } put_rx_win(window->rxwin); } @@ -1377,7 +1344,7 @@ EXPORT_SYMBOL_GPL(vas_win_close); * - The kernel with return credit on fault window after reading entry * from fault FIFO. */ -void vas_return_credit(struct vas_window *window, bool tx) +void vas_return_credit(struct pnv_vas_window *window, bool tx) { uint64_t val; @@ -1391,10 +1358,10 @@ void vas_return_credit(struct vas_window *window, bool tx) } } -struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, +struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid) { - struct vas_window *window; + struct pnv_vas_window *window; int winid; if (!pswid) { @@ -1431,13 +1398,74 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, * by NX). */ if (!window->tx_win || !window->user_win || !window->nx_win || - window->cop == VAS_COP_TYPE_FAULT || - window->cop == VAS_COP_TYPE_FTW) { + window->vas_win.cop == VAS_COP_TYPE_FAULT || + window->vas_win.cop == VAS_COP_TYPE_FTW) { pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", winid, window->tx_win, window->user_win, - window->nx_win, window->cop); + window->nx_win, window->vas_win.cop); WARN_ON(1); } return window; } + +static struct vas_window *vas_user_win_open(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + struct vas_tx_win_attr txattr = {}; + + vas_init_tx_win_attr(&txattr, cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + return vas_tx_win_open(vas_id, cop_type, &txattr); +} + +static u64 vas_user_win_paste_addr(struct vas_window *txwin) +{ + struct pnv_vas_window *win; + u64 paste_addr; + + win = container_of(txwin, struct pnv_vas_window, vas_win); + vas_win_paste_addr(win, &paste_addr, NULL); + + return paste_addr; +} + +static int vas_user_win_close(struct vas_window *txwin) +{ + vas_win_close(txwin); + + return 0; +} + +static const struct vas_user_win_ops vops = { + .open_win = vas_user_win_open, + .paste_addr = vas_user_win_paste_addr, + .close_win = vas_user_win_close, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + + return vas_register_coproc_api(mod, cop_type, name, &vops); +} +EXPORT_SYMBOL_GPL(vas_register_api_powernv); + +void vas_unregister_api_powernv(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_powernv); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index c7db3190baca..8bb08e395de0 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -334,11 +334,11 @@ struct vas_instance { int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */ spinlock_t fault_lock; /* Protects fifo_in_progress update */ void *fault_fifo; - struct vas_window *fault_win; /* Fault window */ + struct pnv_vas_window *fault_win; /* Fault window */ struct mutex mutex; - struct vas_window *rxwin[VAS_COP_TYPE_MAX]; - struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; + struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX]; + struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP]; char *name; char *dbgname; @@ -346,32 +346,24 @@ struct vas_instance { }; /* - * In-kernel state a VAS window. One per window. + * In-kernel state a VAS window on PowerNV. One per window. */ -struct vas_window { +struct pnv_vas_window { + struct vas_window vas_win; /* Fields common to send and receive windows */ struct vas_instance *vinst; - int winid; bool tx_win; /* True if send window */ bool nx_win; /* True if NX window */ bool user_win; /* True if user space window */ void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ - struct pid *pid; /* Linux process id of owner */ - struct pid *tgid; /* Thread group ID of owner */ - struct mm_struct *mm; /* Linux process mm_struct */ - int wcreds_max; /* Window credits */ - - char *dbgname; - struct dentry *dbgdir; /* Fields applicable only to send windows */ void *paste_kaddr; char *paste_addr_name; - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; - /* Feilds applicable only to receive windows */ - enum vas_cop_type cop; + /* Fields applicable only to receive windows */ atomic_t num_txwins; }; @@ -430,32 +422,32 @@ extern struct mutex vas_mutex; extern struct vas_instance *find_vas_instance(int vasid); extern void vas_init_dbgdir(void); extern void vas_instance_init_dbgdir(struct vas_instance *vinst); -extern void vas_window_init_dbgdir(struct vas_window *win); -extern void vas_window_free_dbgdir(struct vas_window *win); +extern void vas_window_init_dbgdir(struct pnv_vas_window *win); +extern void vas_window_free_dbgdir(struct pnv_vas_window *win); extern int vas_setup_fault_window(struct vas_instance *vinst); extern irqreturn_t vas_fault_thread_fn(int irq, void *data); extern irqreturn_t vas_fault_handler(int irq, void *dev_id); -extern void vas_return_credit(struct vas_window *window, bool tx); -extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, +extern void vas_return_credit(struct pnv_vas_window *window, bool tx); +extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid); -extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, - int *len); +extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, + int *len); static inline int vas_window_pid(struct vas_window *window) { - return pid_vnr(window->pid); + return pid_vnr(window->task_ref.pid); } -static inline void vas_log_write(struct vas_window *win, char *name, +static inline void vas_log_write(struct pnv_vas_window *win, char *name, void *regptr, u64 val) { if (val) pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n", - win->tx_win ? "Tx" : "Rx", win->winid, name, - regptr, val); + win->tx_win ? "Tx" : "Rx", win->vas_win.winid, + name, regptr, val); } -static inline void write_uwc_reg(struct vas_window *win, char *name, +static inline void write_uwc_reg(struct pnv_vas_window *win, char *name, s32 reg, u64 val) { void *regptr; @@ -466,7 +458,7 @@ static inline void write_uwc_reg(struct vas_window *win, char *name, out_be64(regptr, val); } -static inline void write_hvwc_reg(struct vas_window *win, char *name, +static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name, s32 reg, u64 val) { void *regptr; @@ -477,7 +469,7 @@ static inline void write_hvwc_reg(struct vas_window *win, char *name, out_be64(regptr, val); } -static inline u64 read_hvwc_reg(struct vas_window *win, +static inline u64 read_hvwc_reg(struct pnv_vas_window *win, char *name __maybe_unused, s32 reg) { return in_be64(win->hvwc_map+reg); diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index 4d0535cc7946..a4048b8c8c50 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -86,6 +86,15 @@ config PS3_SYS_MANAGER This support is required for PS3 system control. In general, all users will say Y or M. +config PS3_VERBOSE_RESULT + bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED + depends on PPC_PS3 + help + Enables more verbose log mesages for LV1 hypercall results. + + If in doubt, say N here and reduce the size of the kernel by a + small amount. + config PS3_REPOSITORY_WRITE bool "PS3 Repository write support" if PS3_ADVANCED depends on PPC_PS3 diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index d094321964fb..a81eac35d900 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -6,6 +6,7 @@ * Copyright 2006 Sony Corp. */ +#include <linux/dma-mapping.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/memblock.h> @@ -1118,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, enum ps3_dma_region_type region_type, void *addr, unsigned long len) { unsigned long lpar_addr; + int result; lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0; @@ -1129,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset -= map.r1.offset; r->len = len ? len : ALIGN(map.total, 1 << r->page_size); + dev->core.dma_mask = &r->dma_mask; + + result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32)); + + if (result < 0) { + dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n", + __func__, __LINE__, result); + return result; + } + switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: r->region_ops = (USE_DYNAMIC_DMA) diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index e9ae5dd03593..3de9145c20bc 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -36,6 +36,7 @@ DEFINE_MUTEX(ps3_gpu_mutex); EXPORT_SYMBOL_GPL(ps3_gpu_mutex); static union ps3_firmware_version ps3_firmware_version; +static char ps3_firmware_version_str[16]; void ps3_get_firmware_version(union ps3_firmware_version *v) { @@ -182,6 +183,40 @@ static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx) return lv1_set_dabr(dabr, dabrx) ? -1 : 0; } +static ssize_t ps3_fw_version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s", ps3_firmware_version_str); +} + +static int __init ps3_setup_sysfs(void) +{ + static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO, + ps3_fw_version_show, NULL); + static struct kobject *kobj; + int result; + + kobj = kobject_create_and_add("ps3", firmware_kobj); + + if (!kobj) { + pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__, + __LINE__); + return -ENOMEM; + } + + result = sysfs_create_file(kobj, &attr.attr); + + if (result) { + pr_warn("%s:%d: sysfs_create_file failed.\n", __func__, + __LINE__); + kobject_put(kobj); + return -ENOMEM; + } + + return 0; +} +core_initcall(ps3_setup_sysfs); + static void __init ps3_setup_arch(void) { u64 tmp; @@ -190,9 +225,11 @@ static void __init ps3_setup_arch(void) lv1_get_version_info(&ps3_firmware_version.raw, &tmp); - printk(KERN_INFO "PS3 firmware version %u.%u.%u\n", - ps3_firmware_version.major, ps3_firmware_version.minor, - ps3_firmware_version.rev); + snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str), + "%u.%u.%u", ps3_firmware_version.major, + ps3_firmware_version.minor, ps3_firmware_version.rev); + + printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str); ps3_spu_set_platform(); diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index b431f41c6cb5..1a5665875165 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -64,9 +64,10 @@ static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev) result = lv1_open_device(dev->bus_id, dev->dev_id, 0); if (result) { - pr_debug("%s:%d: lv1_open_device failed: %s\n", __func__, - __LINE__, ps3_result(result)); - result = -EPERM; + pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n", + __func__, __LINE__, dev->match_id, dev->match_sub_id, + dev_name(&dev->core), ps3_result(result)); + result = -EPERM; } done: @@ -120,7 +121,7 @@ static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev) result = lv1_gpu_open(0); if (result) { - pr_debug("%s:%d: lv1_gpu_open failed: %s\n", __func__, + pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__, __LINE__, ps3_result(result)); result = -EPERM; } diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index c8a2b0b05ac0..4cda0ef87be0 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PPC_VAS) += vas.o diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 3ac70790ec7a..b1f01ac0c29e 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_UNUSABLE) return -1; @@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_PRESENT) return -1; @@ -333,8 +331,7 @@ int dlpar_unisolate_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_PRESENT) return -1; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8377f1f7c78e..377d852f5a9a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np) static bool lmb_is_removable(struct drmem_lmb *lmb) { - if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) return false; #ifdef CONFIG_FA_DUMP @@ -401,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { struct drmem_lmb *lmb; - int lmbs_removed = 0; + int lmbs_reserved = 0; int lmbs_available = 0; int rc; @@ -435,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) */ drmem_mark_lmb_reserved(lmb); - lmbs_removed++; - if (lmbs_removed == lmbs_to_remove) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_remove) break; } - if (lmbs_removed != lmbs_to_remove) { + if (lmbs_reserved != lmbs_to_remove) { pr_err("Memory hot-remove failed, adding LMB's back\n"); for_each_drmem_lmb(lmb) { @@ -453,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->drc_index); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = -EINVAL; @@ -466,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->base_addr); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = 0; } @@ -508,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-remove %u LMB(s) at %x\n", @@ -521,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (rc) return -EINVAL; - /* Validate that there are enough LMBs to satisfy the request */ + /* + * Validate that all LMBs in range are not reserved. Note that it + * is ok if they are !ASSIGNED since our goal here is to remove the + * LMB range, regardless of whether some LMBs were already removed + * by any other reason. + * + * This is a contrast to what is done in remove_by_count() where we + * check for both RESERVED and !ASSIGNED (via lmb_is_removable()), + * because we want to remove a fixed amount of LMBs in that function. + */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_remove) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + /* + * dlpar_remove_lmb() will error out if the LMB is already + * !ASSIGNED, but this case is a no-op for us. + */ if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; @@ -551,6 +570,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (!drmem_lmb_reserved(lmb)) continue; + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the LMB removal failed. + */ + dlpar_unisolate_drc(lmb->drc_index); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB, drc index %x\n", @@ -585,10 +611,6 @@ static inline int pseries_remove_mem_node(struct device_node *np) { return 0; } -static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) -{ - return -EOPNOTSUPP; -} static int dlpar_remove_lmb(struct drmem_lmb *lmb) { return -EOPNOTSUPP; @@ -651,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) { struct drmem_lmb *lmb; int lmbs_available = 0; - int lmbs_added = 0; + int lmbs_reserved = 0; int rc; pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add); @@ -661,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) /* Validate that there are enough LMBs to satisfy the request */ for_each_drmem_lmb(lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) + continue; + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) lmbs_available++; @@ -689,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) * requested LMBs cannot be added. */ drmem_mark_lmb_reserved(lmb); - - lmbs_added++; - if (lmbs_added == lmbs_to_add) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_add) break; } - if (lmbs_added != lmbs_to_add) { + if (lmbs_reserved != lmbs_to_add) { pr_err("Memory hot-add failed, removing any added LMBs\n"); for_each_drmem_lmb(lmb) { @@ -710,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) dlpar_release_drc(lmb->drc_index); drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; } rc = -EINVAL; } else { @@ -720,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) pr_debug("Memory at %llx (drc index %x) was hot-added\n", lmb->base_addr, lmb->drc_index); drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; } rc = 0; } @@ -764,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index) static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-add %u LMB(s) at index %x\n", @@ -779,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) /* Validate that the LMBs in this range are not reserved */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + /* Fail immediately if the whole range can't be hot-added */ + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_add) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { if (lmb->flags & DRCONF_MEM_ASSIGNED) continue; diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 8a2b8d64265b..ab9fc6506861 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -108,6 +108,10 @@ _GLOBAL_TOC(plpar_hcall_norets_notrace) mfcr r0 stw r0,8(r1) HVSC /* invoke the hypervisor */ + + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ @@ -120,6 +124,9 @@ _GLOBAL_TOC(plpar_hcall_norets) HCALL_BRANCH(plpar_hcall_norets_trace) HVSC /* invoke the hypervisor */ + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ @@ -129,6 +136,10 @@ plpar_hcall_norets_trace: HCALL_INST_PRECALL(R4) HVSC HCALL_INST_POSTCALL_NORETS + + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr @@ -159,6 +170,9 @@ _GLOBAL_TOC(plpar_hcall) std r6, 16(r12) std r7, 24(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -188,6 +202,9 @@ plpar_hcall_trace: HCALL_INST_POSTCALL(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -223,6 +240,9 @@ _GLOBAL(plpar_hcall_raw) std r6, 16(r12) std r7, 24(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -262,6 +282,9 @@ _GLOBAL_TOC(plpar_hcall9) std r11,56(r12) std r0, 64(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -300,6 +323,9 @@ plpar_hcall9_trace: HCALL_INST_POSTCALL(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -339,6 +365,9 @@ _GLOBAL(plpar_hcall9_raw) std r11,56(r12) std r0, 64(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index ef26fe40efb0..f48e87ac89c9 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -18,6 +18,7 @@ #include <asm/plpar_wrappers.h> #include <asm/papr_pdsm.h> #include <asm/mce.h> +#include <asm/unaligned.h> #define BIND_ANY_ADDR (~0ul) @@ -114,6 +115,9 @@ struct papr_scm_priv { /* Health information for the dimm */ u64 health_bitmap; + /* Holds the last known dirty shutdown counter value */ + u64 dirty_shutdown_counter; + /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; }; @@ -260,7 +264,7 @@ err_out: * Query the Dimm performance stats from PHYP and copy them (if returned) to * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast * (num_stats + header) bytes. - * - If buff_stats == NULL the return value is the size in byes of the buffer + * - If buff_stats == NULL the return value is the size in bytes of the buffer * needed to hold all supported performance-statistics. * - If buff_stats != NULL and num_stats == 0 then we copy all known * performance-statistics to 'buff_stat' and expect to be large enough to @@ -310,6 +314,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, dev_err(&p->pdev->dev, "Unknown performance stats, Err:0x%016lX\n", ret[0]); return -ENOENT; + } else if (rc == H_AUTHORITY) { + dev_info(&p->pdev->dev, + "Permission denied while accessing performance stats"); + return -EPERM; + } else if (rc == H_UNSUPPORTED) { + dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); + return -EOPNOTSUPP; } else if (rc != H_SUCCESS) { dev_err(&p->pdev->dev, "Failed to query performance stats, Err:%lld\n", rc); @@ -596,6 +607,16 @@ free_stats: return rc; } +/* Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_dsc(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; + payload->health.dimm_dsc = p->dirty_shutdown_counter; + + return sizeof(struct nd_papr_pdsm_health); +} + /* Fetch the DIMM health info and populate it in provided package. */ static int papr_pdsm_health(struct papr_scm_priv *p, union nd_pdsm_payload *payload) @@ -639,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p, /* Populate the fuel gauge meter in the payload */ papr_pdsm_fuel_gauge(p, payload); + /* Populate the dirty-shutdown-counter field */ + papr_pdsm_dsc(p, payload); rc = sizeof(struct nd_papr_pdsm_health); @@ -900,15 +923,41 @@ static ssize_t flags_show(struct device *dev, } DEVICE_ATTR_RO(flags); +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); +} +DEVICE_ATTR_RO(dirty_shutdown); + +static umode_t papr_nd_attribute_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); + + /* For if perf-stats not available remove perf_stats sysfs */ + if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) + return 0; + + return attr->mode; +} + /* papr_scm specific dimm attributes */ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, + &dev_attr_dirty_shutdown.attr, NULL, }; static struct attribute_group papr_nd_attribute_group = { .name = "papr", + .is_visible = papr_nd_attribute_visible, .attrs = papr_nd_attributes, }; @@ -924,7 +973,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) struct nd_region_desc ndr_desc; unsigned long dimm_flags; int target_nid, online_nid; - ssize_t stat_size; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -1009,16 +1057,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) list_add_tail(&p->region_list, &papr_nd_regions); mutex_unlock(&papr_ndr_lock); - /* Try retriving the stat buffer and see if its supported */ - stat_size = drc_pmem_query_stats(p, NULL, 0); - if (stat_size > 0) { - p->stat_buffer_len = stat_size; - dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", - p->stat_buffer_len); - } else { - dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); - } - return 0; err: nvdimm_bus_unregister(p->bus); @@ -1094,8 +1132,10 @@ static int papr_scm_probe(struct platform_device *pdev) u32 drc_index, metadata_size; u64 blocks, block_size; struct papr_scm_priv *p; + u8 uuid_raw[UUID_SIZE]; const char *uuid_str; - u64 uuid[2]; + ssize_t stat_size; + uuid_t uuid; int rc; /* check we have all the required DT properties */ @@ -1137,17 +1177,28 @@ static int papr_scm_probe(struct platform_device *pdev) p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); + if (of_property_read_u64(dn, "ibm,persistence-failed-count", + &p->dirty_shutdown_counter)) + p->dirty_shutdown_counter = 0; + /* We just need to ensure that set cookies are unique across */ - uuid_parse(uuid_str, (uuid_t *) uuid); + uuid_parse(uuid_str, &uuid); + /* - * cookie1 and cookie2 are not really little endian - * we store a little endian representation of the - * uuid str so that we can compare this with the label - * area cookie irrespective of the endian config with which - * the kernel is built. + * The cookie1 and cookie2 are not really little endian. + * We store a raw buffer representation of the + * uuid string so that we can compare this with the label + * area cookie irrespective of the endian configuration + * with which the kernel is built. + * + * Historically we stored the cookie in the below format. + * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa + * cookie1 was 0xfd423b0b671b5172 + * cookie2 was 0xaabce8cae35b1d8d */ - p->nd_set.cookie1 = cpu_to_le64(uuid[0]); - p->nd_set.cookie2 = cpu_to_le64(uuid[1]); + export_uuid(uuid_raw, &uuid); + p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); + p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); /* might be zero */ p->metadata_size = metadata_size; @@ -1172,6 +1223,14 @@ static int papr_scm_probe(struct platform_device *pdev) p->res.name = pdev->name; p->res.flags = IORESOURCE_MEM; + /* Try retrieving the stat buffer and see if its supported */ + stat_size = drc_pmem_query_stats(p, NULL, 0); + if (stat_size > 0) { + p->stat_buffer_len = stat_size; + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", + p->stat_buffer_len); + } + rc = papr_scm_nvdimm_init(p); if (rc) goto err2; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9d4ef65da7f3..167f2e1b8d39 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -487,8 +487,8 @@ int pSeries_system_reset_exception(struct pt_regs *regs) if ((be64_to_cpu(regs->msr) & (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { - regs->nip = be64_to_cpu((__be64)regs->nip); - regs->msr = 0; + regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); + regs_set_return_msr(regs, 0); } #endif @@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.severity = MCE_SEV_SEVERE; else if (severity == RTAS_SEVERITY_ERROR) mce_err.severity = MCE_SEV_SEVERE; - else if (severity == RTAS_SEVERITY_FATAL) - mce_err.severity = MCE_SEV_FATAL; else mce_err.severity = MCE_SEV_FATAL; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 754e493b7c05..631a0d57b6cd 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -549,6 +549,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + + if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER) + security_ftr_clear(SEC_FTR_STF_BARRIER); + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index c70b4be9f0a5..096629f54576 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -211,7 +211,9 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - if (check_kvm_guest()) { + check_kvm_guest(); + + if (is_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp * faults to the hypervisor which then reads the instruction diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c new file mode 100644 index 000000000000..b5c1cf1bc64d --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020-21 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/irqdomain.h> +#include <asm/machdep.h> +#include <asm/hvcall.h> +#include <asm/plpar_wrappers.h> +#include <asm/vas.h> +#include "vas.h" + +#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul +#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul +/* The hypervisor allows one credit per window right now */ +#define DEF_WIN_CREDS 1 + +static struct vas_all_caps caps_all; +static bool copypaste_feat; + +static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; +static DEFINE_MUTEX(vas_pseries_mutex); + +static long hcall_return_busy_check(long rc) +{ + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + + return rc; +} + +/* + * Allocate VAS window hcall + */ +static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, + u8 wintype, u16 credits) +{ + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; + + do { + rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, + credits, domain[0], domain[1], domain[2], + domain[3], domain[4], domain[5]); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) { + if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { + pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + win->vas_win.winid = retbuf[0]; + win->win_addr = retbuf[1]; + win->complete_irq = retbuf[2]; + win->fault_irq = retbuf[3]; + return 0; + } + + pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", + rc, wintype, credits); + + return -EIO; +} + +/* + * Deallocate VAS window hcall. + */ +static int h_deallocate_vas_window(u64 winid) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", + rc, winid); + return -EIO; +} + +/* + * Modify VAS window. + * After the window is opened with allocate window hcall, configure it + * with flags and LPAR PID before using. + */ +static int h_modify_vas_window(struct pseries_vas_window *win) +{ + long rc; + u32 lpid = mfspr(SPRN_PID); + + /* + * AMR value is not supported in Linux VAS implementation. + * The hypervisor ignores it if 0 is passed. + */ + do { + rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, + win->vas_win.winid, lpid, 0, + VAS_MOD_WIN_FLAGS, 0); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", + rc, win->vas_win.winid, lpid); + return -EIO; +} + +/* + * This hcall is used to determine the capabilities from the hypervisor. + * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES + * @query_type: If 0 is passed, the hypervisor returns the overall + * capabilities which provides all feature(s) that are + * available. Then query the hypervisor to get the + * corresponding capabilities for the specific feature. + * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS + * and VAS GZIP Default capabilities. + * H_QUERY_NX_CAPABILITIES provides NX GZIP + * capabilities. + * @result: Return buffer to save capabilities. + */ +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) +{ + long rc; + + rc = plpar_hcall_norets(hcall, query_type, result); + + if (rc == H_SUCCESS) + return 0; + + pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", + hcall, rc, query_type, result); + return -EIO; +} +EXPORT_SYMBOL_GPL(h_query_vas_capabilities); + +/* + * hcall to get fault CRB from the hypervisor. + */ +static int h_get_nx_fault(u32 winid, u64 buffer) +{ + long rc; + + rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", + rc, winid, buffer); + return -EIO; + +} + +/* + * Handle the fault interrupt. + * When the fault interrupt is received for each window, query the + * hypervisor to get the fault CRB on the specific fault. Then + * process the CRB by updating CSB or send signal if the user space + * CSB is invalid. + * Note: The hypervisor forwards an interrupt for each fault request. + * So one fault CRB to process for each H_GET_NX_FAULT hcall. + */ +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) +{ + struct pseries_vas_window *txwin = data; + struct coprocessor_request_block crb; + struct vas_user_win_ref *tsk_ref; + int rc; + + rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); + if (!rc) { + tsk_ref = &txwin->vas_win.task_ref; + vas_dump_crb(&crb); + vas_update_csb(&crb, tsk_ref); + } + + return IRQ_HANDLED; +} + +/* + * Allocate window and setup IRQ mapping. + */ +static int allocate_setup_window(struct pseries_vas_window *txwin, + u64 *domain, u8 wintype) +{ + int rc; + + rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); + if (rc) + return rc; + /* + * On PowerVM, the hypervisor setup and forwards the fault + * interrupt per window. So the IRQ setup and fault handling + * will be done for each open window separately. + */ + txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); + if (!txwin->fault_virq) { + pr_err("Failed irq mapping %d\n", txwin->fault_irq); + rc = -EINVAL; + goto out_win; + } + + txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", + txwin->vas_win.winid); + if (!txwin->name) { + rc = -ENOMEM; + goto out_irq; + } + + rc = request_threaded_irq(txwin->fault_virq, NULL, + pseries_vas_fault_thread_fn, IRQF_ONESHOT, + txwin->name, txwin); + if (rc) { + pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", + txwin->vas_win.winid, txwin->fault_virq, rc); + goto out_free; + } + + txwin->vas_win.wcreds_max = DEF_WIN_CREDS; + + return 0; +out_free: + kfree(txwin->name); +out_irq: + irq_dispose_mapping(txwin->fault_virq); +out_win: + h_deallocate_vas_window(txwin->vas_win.winid); + return rc; +} + +static inline void free_irq_setup(struct pseries_vas_window *txwin) +{ + free_irq(txwin->fault_virq, txwin); + kfree(txwin->name); + irq_dispose_mapping(txwin->fault_virq); +} + +static struct vas_window *vas_allocate_window(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *cop_feat_caps; + struct vas_caps *caps; + struct pseries_vas_window *txwin; + int rc; + + txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); + if (!txwin) + return ERR_PTR(-ENOMEM); + + /* + * A VAS window can have many credits which means that many + * requests can be issued simultaneously. But the hypervisor + * restricts one credit per window. + * The hypervisor introduces 2 different types of credits: + * Default credit type (Uses normal priority FIFO): + * A limited number of credits are assigned to partitions + * based on processor entitlement. But these credits may be + * over-committed on a system depends on whether the CPUs + * are in shared or dedicated modes - that is, more requests + * may be issued across the system than NX can service at + * once which can result in paste command failure (RMA_busy). + * Then the process has to resend requests or fall-back to + * SW compression. + * Quality of Service (QoS) credit type (Uses high priority FIFO): + * To avoid NX HW contention, the system admins can assign + * QoS credits for each LPAR so that this partition is + * guaranteed access to NX resources. These credits are + * assigned to partitions via the HMC. + * Refer PAPR for more information. + * + * Allocate window with QoS credits if user requested. Otherwise + * default credits are used. + */ + if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) + caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; + else + caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; + + cop_feat_caps = &caps->caps; + + if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > + atomic_read(&cop_feat_caps->target_lpar_creds)) { + pr_err("Credits are not available to allocate window\n"); + rc = -EINVAL; + goto out; + } + + if (vas_id == -1) { + /* + * The user space is requesting to allocate a window on + * a VAS instance where the process is executing. + * On PowerVM, domain values are passed to the hypervisor + * to select VAS instance. Useful if the process is + * affinity to NUMA node. + * The hypervisor selects VAS instance if + * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. + * The h_allocate_vas_window hcall is defined to take a + * domain values as specified by h_home_node_associativity, + * So no unpacking needs to be done. + */ + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, + VPHN_FLAG_VCPU, smp_processor_id()); + if (rc != H_SUCCESS) { + pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); + goto out; + } + } + + /* + * Allocate / Deallocate window hcalls and setup / free IRQs + * have to be protected with mutex. + * Open VAS window: Allocate window hcall and setup IRQ + * Close VAS window: Deallocate window hcall and free IRQ + * The hypervisor waits until all NX requests are + * completed before closing the window. So expects OS + * to handle NX faults, means IRQ can be freed only + * after the deallocate window hcall is returned. + * So once the window is closed with deallocate hcall before + * the IRQ is freed, it can be assigned to new allocate + * hcall with the same fault IRQ by the hypervisor. It can + * result in setup IRQ fail for the new window since the + * same fault IRQ is not freed by the OS before. + */ + mutex_lock(&vas_pseries_mutex); + rc = allocate_setup_window(txwin, (u64 *)&domain[0], + cop_feat_caps->win_type); + mutex_unlock(&vas_pseries_mutex); + if (rc) + goto out; + + /* + * Modify window and it is ready to use. + */ + rc = h_modify_vas_window(txwin); + if (!rc) + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) + goto out_free; + + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + txwin->win_type = cop_feat_caps->win_type; + mutex_lock(&vas_pseries_mutex); + list_add(&txwin->win_list, &caps->list); + mutex_unlock(&vas_pseries_mutex); + + return &txwin->vas_win; + +out_free: + /* + * Window is not operational. Free IRQ before closing + * window so that do not have to hold mutex. + */ + free_irq_setup(txwin); + h_deallocate_vas_window(txwin->vas_win.winid); +out: + atomic_dec(&cop_feat_caps->used_lpar_creds); + kfree(txwin); + return ERR_PTR(rc); +} + +static u64 vas_paste_address(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + return win->win_addr; +} + +static int deallocate_free_window(struct pseries_vas_window *win) +{ + int rc = 0; + + /* + * The hypervisor waits for all requests including faults + * are processed before closing the window - Means all + * credits have to be returned. In the case of fault + * request, a credit is returned after OS issues + * H_GET_NX_FAULT hcall. + * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW + * hcall. + */ + rc = h_deallocate_vas_window(win->vas_win.winid); + if (!rc) + free_irq_setup(win); + + return rc; +} + +static int vas_deallocate_window(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + struct vas_cop_feat_caps *caps; + int rc = 0; + + if (!vwin) + return -EINVAL; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + + /* Should not happen */ + if (win->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Window (%u): Invalid window type %u\n", + vwin->winid, win->win_type); + return -EINVAL; + } + + caps = &vascaps[win->win_type].caps; + mutex_lock(&vas_pseries_mutex); + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + + list_del(&win->win_list); + atomic_dec(&caps->used_lpar_creds); + mutex_unlock(&vas_pseries_mutex); + + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); + + kfree(win); + return 0; +} + +static const struct vas_user_win_ops vops_pseries = { + .open_win = vas_allocate_window, /* Open and configure window */ + .paste_addr = vas_paste_address, /* To do copy/paste */ + .close_win = vas_deallocate_window, /* Close window */ +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc; + + if (!copypaste_feat) + return -ENOTSUPP; + + rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); + + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_api_pseries); + +void vas_unregister_api_pseries(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); + +/* + * Get the specific capabilities based on the feature type. + * Right now supports GZIP default and GZIP QoS capabilities. + */ +static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, + struct hv_vas_cop_feat_caps *hv_caps) +{ + struct vas_cop_feat_caps *caps; + struct vas_caps *vcaps; + int rc = 0; + + vcaps = &vascaps[type]; + memset(vcaps, 0, sizeof(*vcaps)); + INIT_LIST_HEAD(&vcaps->list); + + caps = &vcaps->caps; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + return rc; + + caps->user_mode = hv_caps->user_mode; + if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { + pr_err("User space COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + + caps->descriptor = be64_to_cpu(hv_caps->descriptor); + caps->win_type = hv_caps->win_type; + if (caps->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Unsupported window type %u\n", caps->win_type); + return -EINVAL; + } + caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); + caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); + atomic_set(&caps->target_lpar_creds, + be16_to_cpu(hv_caps->target_lpar_creds)); + if (feat == VAS_GZIP_DEF_FEAT) { + caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); + + if (caps->max_win_creds < DEF_WIN_CREDS) { + pr_err("Window creds(%u) > max allowed window creds(%u)\n", + DEF_WIN_CREDS, caps->max_win_creds); + return -EINVAL; + } + } + + copypaste_feat = true; + + return 0; +} + +static int __init pseries_vas_init(void) +{ + struct hv_vas_cop_feat_caps *hv_cop_caps; + struct hv_vas_all_caps *hv_caps; + int rc; + + /* + * Linux supports user space COPY/PASTE only with Radix + */ + if (!radix_enabled()) { + pr_err("API is supported only with radix page tables\n"); + return -ENOTSUPP; + } + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + /* + * Get VAS overall capabilities by passing 0 to feature type. + */ + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); + caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + + hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); + if (!hv_cop_caps) { + rc = -ENOMEM; + goto out; + } + /* + * QOS capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, + VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + + if (rc) + goto out_cop; + } + /* + * Default capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, + VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); + if (rc) + goto out_cop; + } + + pr_info("GZIP feature is available\n"); + +out_cop: + kfree(hv_cop_caps); +out: + kfree(hv_caps); + return rc; +} +machine_device_initcall(pseries, pseries_vas_init); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h new file mode 100644 index 000000000000..4ecb3fcabd10 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2020-21 IBM Corp. + */ + +#ifndef _VAS_H +#define _VAS_H +#include <asm/vas.h> +#include <linux/mutex.h> +#include <linux/stringify.h> + +/* + * VAS window modify flags + */ +#define VAS_MOD_WIN_CLOSE PPC_BIT(0) +#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1) +#define VAS_MOD_WIN_DR PPC_BIT(3) +#define VAS_MOD_WIN_PR PPC_BIT(4) +#define VAS_MOD_WIN_SF PPC_BIT(5) +#define VAS_MOD_WIN_TA PPC_BIT(6) +#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \ + VAS_MOD_WIN_PR | VAS_MOD_WIN_SF) + +#define VAS_WIN_ACTIVE 0x0 +#define VAS_WIN_CLOSED 0x1 +#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */ +/* Process of being modified, deallocated, or quiesced */ +#define VAS_WIN_MOD_IN_PROCESS 0x3 + +#define VAS_COPY_PASTE_USER_MODE 0x00000001 +#define VAS_COP_OP_USER_MODE 0x00000010 + +/* + * Co-processor feature - GZIP QoS windows or GZIP default windows + */ +enum vas_cop_feat_type { + VAS_GZIP_QOS_FEAT_TYPE, + VAS_GZIP_DEF_FEAT_TYPE, + VAS_MAX_FEAT_TYPE, +}; + +/* + * Use to get feature specific capabilities from the + * hypervisor. + */ +struct hv_vas_cop_feat_caps { + __be64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; + __be16 max_lpar_creds; + __be16 max_win_creds; + union { + __be16 reserved; + __be16 def_lpar_creds; /* Used for default capabilities */ + }; + __be16 target_lpar_creds; +} __packed __aligned(0x1000); + +/* + * Feature specific (QoS or default) capabilities. + */ +struct vas_cop_feat_caps { + u64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; /* User mode copy/paste or COP HCALL */ + u16 max_lpar_creds; /* Max credits available in LPAR */ + /* Max credits can be assigned per window */ + u16 max_win_creds; + union { + u16 reserved; /* Used for QoS credit type */ + u16 def_lpar_creds; /* Used for default credit type */ + }; + /* Total LPAR available credits. Can be different from max LPAR */ + /* credits due to DLPAR operation */ + atomic_t target_lpar_creds; + atomic_t used_lpar_creds; /* Used credits so far */ + u16 avail_lpar_creds; /* Remaining available credits */ +}; + +/* + * Feature (QoS or Default) specific to store capabilities and + * the list of open windows. + */ +struct vas_caps { + struct vas_cop_feat_caps caps; + struct list_head list; /* List of open windows */ +}; + +/* + * To get window information from the hypervisor. + */ +struct hv_vas_win_lpar { + __be16 version; + u8 win_type; + u8 status; + __be16 credits; /* No of credits assigned to this window */ + __be16 reserved; + __be32 pid; /* LPAR Process ID */ + __be32 tid; /* LPAR Thread ID */ + __be64 win_addr; /* Paste address */ + __be32 interrupt; /* Interrupt when NX request completes */ + __be32 fault; /* Interrupt when NX sees fault */ + /* Associativity Domain Identifiers as returned in */ + /* H_HOME_NODE_ASSOCIATIVITY */ + __be64 domain[6]; + __be64 win_util; /* Number of bytes processed */ +} __packed __aligned(0x1000); + +struct pseries_vas_window { + struct vas_window vas_win; + u64 win_addr; /* Physical paste address */ + u8 win_type; /* QoS or Default window */ + u32 complete_irq; /* Completion interrupt */ + u32 fault_irq; /* Fault interrupt */ + u64 domain[6]; /* Associativity domain Ids */ + /* this window is allocated */ + u64 util; + + /* List of windows opened which is used for LPM */ + struct list_head win_list; + u64 flags; + char *name; + int fault_virq; +}; +#endif /* _VAS_H */ |