summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Makefile33
-rw-r--r--arch/powerpc/configs/le.config1
-rw-r--r--arch/powerpc/configs/ppc64_defconfig1
-rw-r--r--arch/powerpc/configs/pseries_defconfig2
-rw-r--r--arch/powerpc/configs/pseries_le_defconfig319
-rw-r--r--arch/powerpc/include/asm/cputable.h10
-rw-r--r--arch/powerpc/include/asm/device.h3
-rw-r--r--arch/powerpc/include/asm/iommu.h119
-rw-r--r--arch/powerpc/include/asm/machdep.h25
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h3
-rw-r--r--arch/powerpc/include/asm/mmu_context.h18
-rw-r--r--arch/powerpc/include/asm/opal-api.h21
-rw-r--r--arch/powerpc/include/asm/opal.h1
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h8
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h26
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h2
-rw-r--r--arch/powerpc/include/asm/processor.h9
-rw-r--r--arch/powerpc/include/asm/pte-book3e.h1
-rw-r--r--arch/powerpc/include/asm/pte-common.h2
-rw-r--r--arch/powerpc/include/asm/pte-hash64.h1
-rw-r--r--arch/powerpc/include/asm/trace.h20
-rw-r--r--arch/powerpc/include/asm/uaccess.h8
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/include/uapi/asm/cputable.h1
-rw-r--r--arch/powerpc/include/uapi/asm/opal-prd.h58
-rw-r--r--arch/powerpc/include/uapi/asm/tm.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/cputable.c4
-rw-r--r--arch/powerpc/kernel/eeh.c6
-rw-r--r--arch/powerpc/kernel/entry_64.S37
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S16
-rw-r--r--arch/powerpc/kernel/iommu.c245
-rw-r--r--arch/powerpc/kernel/pci-common.c11
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c5
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/setup_64.c3
-rw-r--r--arch/powerpc/kernel/sysfs.c38
-rw-r--r--arch/powerpc/kernel/tm.S4
-rw-r--r--arch/powerpc/kernel/traps.c45
-rw-r--r--arch/powerpc/kernel/vio.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/copro_fault.c9
-rw-r--r--arch/powerpc/mm/hash_native_64.c2
-rw-r--r--arch/powerpc/mm/hash_utils_64.c4
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c6
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c316
-rw-r--r--arch/powerpc/perf/core-book3s.c11
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c2
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c2
-rw-r--r--arch/powerpc/platforms/cell/iommu.c8
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.c2
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c7
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig7
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/idle.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c449
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c12
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S1
-rw-r--r--arch/powerpc/platforms/powernv/opal.c31
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c767
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c35
-rw-r--r--arch/powerpc/platforms/powernv/pci.c178
-rw-r--r--arch/powerpc/platforms/powernv/pci.h25
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c179
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c12
-rw-r--r--arch/powerpc/sysdev/uic.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c14
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c2
71 files changed, 2295 insertions, 919 deletions
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index a314cb024c8b..05f464eb6952 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -66,7 +66,10 @@ endif
UTS_MACHINE := $(OLDARCH)
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC += -mlittle-endian -mno-strict-align
+override CC += -mlittle-endian
+ifneq ($(COMPILER),clang)
+override CC += -mno-strict-align
+endif
override AS += -mlittle-endian
override LD += -EL
override CROSS32CC += -mlittle-endian
@@ -113,14 +116,14 @@ else
endif
endif
-CFLAGS-$(CONFIG_PPC64) := -mtraceback=no
+CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no)
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,-mcall-aixdesc)
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2)
else
-CFLAGS-$(CONFIG_PPC64) += -mcall-aixdesc
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc)
endif
-CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc)
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
@@ -160,7 +163,8 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
KBUILD_CPPFLAGS += -Iarch/$(ARCH) $(asinstr)
KBUILD_AFLAGS += -Iarch/$(ARCH) $(AFLAGS-y)
-KBUILD_CFLAGS += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y)
+KBUILD_CFLAGS += $(call cc-option,-msoft-float)
+KBUILD_CFLAGS += -pipe -Iarch/$(ARCH) $(CFLAGS-y)
CPP = $(CC) -E $(KBUILD_CFLAGS)
CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__
@@ -192,7 +196,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm)
# Never use string load/store instructions as they are
# often slow when they are implemented at all
-KBUILD_CFLAGS += -mno-string
+KBUILD_CFLAGS += $(call cc-option,-mno-string)
ifeq ($(CONFIG_6xx),y)
KBUILD_CFLAGS += -mcpu=powerpc
@@ -269,6 +273,21 @@ bootwrapper_install:
%.dtb: scripts
$(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+# Used to create 'merged defconfigs'
+# To use it $(call) it with the first argument as the base defconfig
+# and the second argument as a space separated list of .config files to merge,
+# without the .config suffix.
+define merge_into_defconfig
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
+ -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \
+ $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config)
+ +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
+endef
+
+PHONY += pseries_le_defconfig
+pseries_le_defconfig:
+ $(call merge_into_defconfig,pseries_defconfig,le)
+
define archhelp
@echo '* zImage - Build default images selected by kernel config'
@echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)'
diff --git a/arch/powerpc/configs/le.config b/arch/powerpc/configs/le.config
new file mode 100644
index 000000000000..ee43fdb3b8f4
--- /dev/null
+++ b/arch/powerpc/configs/le.config
@@ -0,0 +1 @@
+CONFIG_CPU_LITTLE_ENDIAN=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index aad501ae3834..a97efc2146fd 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -155,6 +155,7 @@ CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
CONFIG_PCNET32=y
CONFIG_TIGON3=y
+CONFIG_BNX2X=m
CONFIG_CHELSIO_T1=m
CONFIG_BE2NET=m
CONFIG_S2IO=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index c2e39f66b182..0d9efcedaf34 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -154,6 +154,7 @@ CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
CONFIG_PCNET32=y
CONFIG_TIGON3=y
+CONFIG_BNX2X=m
CONFIG_CHELSIO_T1=m
CONFIG_BE2NET=m
CONFIG_S2IO=m
@@ -297,7 +298,6 @@ CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig
deleted file mode 100644
index 09bc96e792cd..000000000000
--- a/arch/powerpc/configs/pseries_le_defconfig
+++ /dev/null
@@ -1,319 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2048
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
-CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_USER_NS=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_PPC_SPLPAR=y
-CONFIG_SCANLOG=m
-CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
-# CONFIG_PPC_PMAC is not set
-CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
-CONFIG_HZ_100=y
-CONFIG_BINFMT_MISC=m
-CONFIG_PPC_TRANSACTIONAL_MEM=y
-CONFIG_KEXEC=y
-CONFIG_IRQ_ALL_CPUS=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_KSM=y
-CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
-CONFIG_SCHED_SMT=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_RPA=m
-CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_NET_IPIP=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_ADVANCED is not set
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_BLK_DEV_FD=m
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_VIRTIO_BLK=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_BE2ISCSI=m
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_IBMVSCSI=y
-CONFIG_SCSI_IBMVFC=m
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
-CONFIG_SCSI_IPR=y
-CONFIG_SCSI_QLA_FC=m
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_SCSI_LPFC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_ALUA=m
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-# CONFIG_ATA_SFF is not set
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=y
-CONFIG_MD_RAID0=y
-CONFIG_MD_RAID1=y
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_UEVENT=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_VXLAN=m
-CONFIG_NETCONSOLE=y
-CONFIG_TUN=m
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
-CONFIG_VORTEX=y
-CONFIG_ACENIC=m
-CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_PCNET32=y
-CONFIG_TIGON3=y
-CONFIG_CHELSIO_T1=m
-CONFIG_BE2NET=m
-CONFIG_S2IO=m
-CONFIG_IBMVETH=y
-CONFIG_EHEA=y
-CONFIG_E100=y
-CONFIG_E1000=y
-CONFIG_E1000E=y
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_MLX4_EN=m
-CONFIG_MYRI10GE=m
-CONFIG_QLGE=m
-CONFIG_NETXEN_NIC=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPPOE=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_PCSPKR=m
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_ICOM=m
-CONFIG_SERIAL_JSM=m
-CONFIG_HVC_CONSOLE=y
-CONFIG_HVC_RTAS=y
-CONFIG_HVCS=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IBM_BSR=m
-CONFIG_GEN_RTC=y
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=1024
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_OF=y
-CONFIG_FB_MATROX=y
-CONFIG_FB_MATROX_MILLENIUM=y
-CONFIG_FB_MATROX_MYSTIQUE=y
-CONFIG_FB_MATROX_G=y
-CONFIG_FB_RADEON=y
-CONFIG_FB_IBM_GXT4500=y
-CONFIG_LCD_PLATFORM=m
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB_HIDDEV=y
-CONFIG_USB=y
-CONFIG_USB_MON=m
-CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=m
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_EHCA=m
-CONFIG_INFINIBAND_CXGB3=m
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-CONFIG_NILFS2_FS=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
-CONFIG_OVERLAY_FS=m
-CONFIG_ISO9660_FS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_HUGETLBFS=y
-CONFIG_CRAMFS=m
-CONFIG_SQUASHFS=m
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_PSTORE=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_CODE_PATCHING_SELFTEST=y
-CONFIG_FTR_FIXUP_SELFTEST=y
-CONFIG_MSI_BITMAP_SELFTEST=y
-CONFIG_XMON=y
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM_BOOK3S_64=m
-CONFIG_KVM_BOOK3S_64_HV=m
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index ae1fa65bb26d..b118072670fb 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -242,11 +242,13 @@ enum {
/* We only set the TM feature if the kernel was compiled with TM supprt */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-#define CPU_FTR_TM_COMP CPU_FTR_TM
-#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM
+#define CPU_FTR_TM_COMP CPU_FTR_TM
+#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM
+#define PPC_FEATURE2_HTM_NOSC_COMP PPC_FEATURE2_HTM_NOSC
#else
-#define CPU_FTR_TM_COMP 0
-#define PPC_FEATURE2_HTM_COMP 0
+#define CPU_FTR_TM_COMP 0
+#define PPC_FEATURE2_HTM_COMP 0
+#define PPC_FEATURE2_HTM_NOSC_COMP 0
#endif
/* We need to mark all pages as being coherent if we're SMP or we have a
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 9f1371bab5fc..e9bdda88f1fb 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -46,6 +46,9 @@ struct dev_archdata {
#ifdef CONFIG_FAIL_IOMMU
int fail_iommu;
#endif
+#ifdef CONFIG_CXL_BASE
+ struct cxl_context *cxl_ctx;
+#endif
};
struct pdev_archdata {
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 1e27d6338565..ca18cff90900 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -44,6 +44,39 @@
extern int iommu_is_off;
extern int iommu_force_on;
+struct iommu_table_ops {
+ /*
+ * When called with direction==DMA_NONE, it is equal to clear().
+ * uaddr is a linear map address.
+ */
+ int (*set)(struct iommu_table *tbl,
+ long index, long npages,
+ unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs);
+#ifdef CONFIG_IOMMU_API
+ /*
+ * Exchanges existing TCE with new TCE plus direction bits;
+ * returns old TCE and DMA direction mask.
+ * @tce is a physical address.
+ */
+ int (*exchange)(struct iommu_table *tbl,
+ long index,
+ unsigned long *hpa,
+ enum dma_data_direction *direction);
+#endif
+ void (*clear)(struct iommu_table *tbl,
+ long index, long npages);
+ /* get() returns a physical address */
+ unsigned long (*get)(struct iommu_table *tbl, long index);
+ void (*flush)(struct iommu_table *tbl);
+ void (*free)(struct iommu_table *tbl);
+};
+
+/* These are used by VIO */
+extern struct iommu_table_ops iommu_table_lpar_multi_ops;
+extern struct iommu_table_ops iommu_table_pseries_ops;
+
/*
* IOMAP_MAX_ORDER defines the largest contiguous block
* of dma space we can get. IOMAP_MAX_ORDER = 13
@@ -64,6 +97,9 @@ struct iommu_pool {
struct iommu_table {
unsigned long it_busno; /* Bus number this table belongs to */
unsigned long it_size; /* Size of iommu table in entries */
+ unsigned long it_indirect_levels;
+ unsigned long it_level_size;
+ unsigned long it_allocated_size;
unsigned long it_offset; /* Offset into global table */
unsigned long it_base; /* mapped address of tce table */
unsigned long it_index; /* which iommu table this is */
@@ -75,15 +111,16 @@ struct iommu_table {
struct iommu_pool pools[IOMMU_NR_POOLS];
unsigned long *it_map; /* A simple allocation bitmap for now */
unsigned long it_page_shift;/* table iommu page size */
-#ifdef CONFIG_IOMMU_API
- struct iommu_group *it_group;
-#endif
- void (*set_bypass)(struct iommu_table *tbl, bool enable);
-#ifdef CONFIG_PPC_POWERNV
- void *data;
-#endif
+ struct list_head it_group_list;/* List of iommu_table_group_link */
+ unsigned long *it_userspace; /* userspace view of the table */
+ struct iommu_table_ops *it_ops;
};
+#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+ ((tbl)->it_userspace ? \
+ &((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
+ NULL)
+
/* Pure 2^n version of get_order */
static inline __attribute_const__
int get_iommu_order(unsigned long size, struct iommu_table *tbl)
@@ -112,14 +149,62 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
*/
extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
int nid);
+#define IOMMU_TABLE_GROUP_MAX_TABLES 2
+
+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+ unsigned long (*get_table_size)(
+ __u32 page_shift,
+ __u64 window_size,
+ __u32 levels);
+ long (*create_table)(struct iommu_table_group *table_group,
+ int num,
+ __u32 page_shift,
+ __u64 window_size,
+ __u32 levels,
+ struct iommu_table **ptbl);
+ long (*set_window)(struct iommu_table_group *table_group,
+ int num,
+ struct iommu_table *tblnew);
+ long (*unset_window)(struct iommu_table_group *table_group,
+ int num);
+ /* Switch ownership from platform code to external user (e.g. VFIO) */
+ void (*take_ownership)(struct iommu_table_group *table_group);
+ /* Switch ownership from external user (e.g. VFIO) back to core */
+ void (*release_ownership)(struct iommu_table_group *table_group);
+};
+
+struct iommu_table_group_link {
+ struct list_head next;
+ struct rcu_head rcu;
+ struct iommu_table_group *table_group;
+};
+
+struct iommu_table_group {
+ /* IOMMU properties */
+ __u32 tce32_start;
+ __u32 tce32_size;
+ __u64 pgsizes; /* Bitmap of supported page sizes */
+ __u32 max_dynamic_windows_supported;
+ __u32 max_levels;
+
+ struct iommu_group *group;
+ struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+ struct iommu_table_group_ops *ops;
+};
+
#ifdef CONFIG_IOMMU_API
-extern void iommu_register_group(struct iommu_table *tbl,
+
+extern void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num);
extern int iommu_add_device(struct device *dev);
extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
+extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction);
#else
-static inline void iommu_register_group(struct iommu_table *tbl,
+static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
unsigned long pe_num)
{
@@ -140,13 +225,6 @@ static inline int __init tce_iommu_bus_notifier_init(void)
}
#endif /* !CONFIG_IOMMU_API */
-static inline void set_iommu_table_base_and_group(struct device *dev,
- void *base)
-{
- set_iommu_table_base(dev, base);
- iommu_add_device(dev);
-}
-
extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
struct scatterlist *sglist, int nelems,
unsigned long mask,
@@ -197,20 +275,13 @@ extern int iommu_tce_clear_param_check(struct iommu_table *tbl,
unsigned long npages);
extern int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce);
-extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
- unsigned long hwaddr, enum dma_data_direction direction);
-extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
- unsigned long entry);
-extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
- unsigned long entry, unsigned long pages);
-extern int iommu_put_tce_user_mode(struct iommu_table *tbl,
- unsigned long entry, unsigned long tce);
extern void iommu_flush_tce(struct iommu_table *tbl);
extern int iommu_take_ownership(struct iommu_table *tbl);
extern void iommu_release_ownership(struct iommu_table *tbl);
extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
+extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);
#endif /* __KERNEL__ */
#endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 0d387d0570cd..952579f5e79a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -65,31 +65,6 @@ struct machdep_calls {
* destroyed as well */
void (*hpte_clear_all)(void);
- int (*tce_build)(struct iommu_table *tbl,
- long index,
- long npages,
- unsigned long uaddr,
- enum dma_data_direction direction,
- struct dma_attrs *attrs);
- void (*tce_free)(struct iommu_table *tbl,
- long index,
- long npages);
- unsigned long (*tce_get)(struct iommu_table *tbl,
- long index);
- void (*tce_flush)(struct iommu_table *tbl);
-
- /* _rm versions are for real mode use only */
- int (*tce_build_rm)(struct iommu_table *tbl,
- long index,
- long npages,
- unsigned long uaddr,
- enum dma_data_direction direction,
- struct dma_attrs *attrs);
- void (*tce_free_rm)(struct iommu_table *tbl,
- long index,
- long npages);
- void (*tce_flush_rm)(struct iommu_table *tbl);
-
void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size,
unsigned long flags, void *caller);
void (*iounmap)(volatile void __iomem *token);
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 1da6a81ce541..a82f5347540a 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -536,6 +536,9 @@ typedef struct {
/* for 4K PTE fragment support */
void *pte_frag;
#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ struct list_head iommu_group_mem_list;
+#endif
} mm_context_t;
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 73382eba02dc..3e5184210d9b 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -16,6 +16,24 @@
*/
extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
extern void destroy_context(struct mm_struct *mm);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct mm_iommu_table_group_mem_t;
+
+extern bool mm_iommu_preregistered(void);
+extern long mm_iommu_get(unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem);
+extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_init(mm_context_t *ctx);
+extern void mm_iommu_cleanup(mm_context_t *ctx);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
+ unsigned long size);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
+ unsigned long entries);
+extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned long *hpa);
+extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
+#endif
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index a49e5fa6f939..e9e4c52f3685 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -153,7 +153,8 @@
#define OPAL_FLASH_READ 110
#define OPAL_FLASH_WRITE 111
#define OPAL_FLASH_ERASE 112
-#define OPAL_LAST 112
+#define OPAL_PRD_MSG 113
+#define OPAL_LAST 113
/* Device tree flags */
@@ -359,6 +360,7 @@ enum opal_msg_type {
OPAL_MSG_SHUTDOWN, /* params[0] = 1 reboot, 0 shutdown */
OPAL_MSG_HMI_EVT,
OPAL_MSG_DPO,
+ OPAL_MSG_PRD,
OPAL_MSG_TYPE_MAX,
};
@@ -681,6 +683,23 @@ typedef struct oppanel_line {
__be64 line_len;
} oppanel_line_t;
+enum opal_prd_msg_type {
+ OPAL_PRD_MSG_TYPE_INIT = 0, /* HBRT --> OPAL */
+ OPAL_PRD_MSG_TYPE_FINI, /* HBRT/kernel --> OPAL */
+ OPAL_PRD_MSG_TYPE_ATTN, /* HBRT <-- OPAL */
+ OPAL_PRD_MSG_TYPE_ATTN_ACK, /* HBRT --> OPAL */
+ OPAL_PRD_MSG_TYPE_OCC_ERROR, /* HBRT <-- OPAL */
+ OPAL_PRD_MSG_TYPE_OCC_RESET, /* HBRT <-- OPAL */
+};
+
+struct opal_prd_msg_header {
+ uint8_t type;
+ uint8_t pad[1];
+ __be16 size;
+};
+
+struct opal_prd_msg;
+
/*
* SG entries
*
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 1412814347ba..958e941c0cda 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -194,6 +194,7 @@ int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t *msg_len);
int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
struct opal_i2c_request *oreq);
+int64_t opal_prd_msg(struct opal_prd_msg *msg);
int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf,
uint64_t size, uint64_t token);
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 6d17bb8498bf..712add590445 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -27,6 +27,10 @@ struct pci_controller_ops {
* allow assignment/enabling of the device. */
bool (*enable_device_hook)(struct pci_dev *);
+ void (*disable_device)(struct pci_dev *);
+
+ void (*release_device)(struct pci_dev *);
+
/* Called during PCI resource reassignment */
resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type);
void (*reset_secondary_bus)(struct pci_dev *dev);
@@ -38,6 +42,8 @@ struct pci_controller_ops {
#endif
int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask);
+
+ void (*shutdown)(struct pci_controller *);
};
/*
@@ -193,7 +199,7 @@ struct pci_dn {
struct pci_dn *parent;
struct pci_controller *phb; /* for pci devices */
- struct iommu_table *iommu_table; /* for phb's or bridges */
+ struct iommu_table_group *table_group; /* for phb's or bridges */
struct device_node *node; /* back-pointer to the device_node */
int pci_ext_config_space; /* for pci devices */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index f951d9cf358a..f890f7ce1593 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -347,11 +347,27 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
/* Encode and de-code a swap entry */
-#define __swp_type(entry) (((entry).val >> 1) & 0x3f)
-#define __swp_offset(entry) ((entry).val >> 8)
-#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
-#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
+#define MAX_SWAPFILES_CHECK() do { \
+ BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+ /* \
+ * Don't have overlapping bits with _PAGE_HPTEFLAGS \
+ * We filter HPTEFLAGS on set_pte. \
+ */ \
+ BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
+ } while (0)
+/*
+ * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
+ */
+#define SWP_TYPE_BITS 5
+#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
+ & ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT)
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+ ((type) << _PAGE_BIT_SWAP_TYPE) \
+ | ((offset) << PTE_RPN_SHIFT) })
+
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
+#define __swp_entry_to_pte(x) __pte((x).val)
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index f9b498292a5c..6f77f71ee964 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -11,7 +11,7 @@
#define _ASM_PNV_PCI_H
#include <linux/pci.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index bf117d8fb45f..28ded5d9b579 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -295,6 +295,15 @@ struct thread_struct {
#endif
#ifdef CONFIG_PPC64
unsigned long dscr;
+ /*
+ * This member element dscr_inherit indicates that the process
+ * has explicitly attempted and changed the DSCR register value
+ * for itself. Hence kernel wont use the default CPU DSCR value
+ * contained in the PACA structure anymore during process context
+ * switch. Once this variable is set, this behaviour will also be
+ * inherited to all the children of this process from that point
+ * onwards.
+ */
int dscr_inherit;
unsigned long ppr; /* used to save/restore SMT priority */
#endif
diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h
index 91a704952ca1..8d8473278d91 100644
--- a/arch/powerpc/include/asm/pte-book3e.h
+++ b/arch/powerpc/include/asm/pte-book3e.h
@@ -11,6 +11,7 @@
/* Architected bits */
#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */
#define _PAGE_SW1 0x000002
+#define _PAGE_BIT_SWAP_TYPE 2
#define _PAGE_BAP_SR 0x000004
#define _PAGE_BAP_UR 0x000008
#define _PAGE_BAP_SW 0x000010
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index c5a755ef7011..b7c8d079c121 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -85,10 +85,8 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
* 64-bit PTEs
*/
#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-#define PTE_RPN_MAX (1ULL << (64 - PTE_RPN_SHIFT))
#define PTE_RPN_MASK (~((1ULL<<PTE_RPN_SHIFT)-1))
#else
-#define PTE_RPN_MAX (1UL << (32 - PTE_RPN_SHIFT))
#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1))
#endif
diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h
index fc852f7e7b3a..ef612c160da7 100644
--- a/arch/powerpc/include/asm/pte-hash64.h
+++ b/arch/powerpc/include/asm/pte-hash64.h
@@ -16,6 +16,7 @@
*/
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
+#define _PAGE_BIT_SWAP_TYPE 2
#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x0008
/* We can derive Memory coherence from _PAGE_NO_CACHE */
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index c15da6073cb8..8e86b48d0369 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -144,6 +144,26 @@ TRACE_EVENT_FN(opal_exit,
);
#endif
+TRACE_EVENT(hash_fault,
+
+ TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
+ TP_ARGS(addr, access, trap),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, access)
+ __field(unsigned long, trap)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->access = access;
+ __entry->trap = trap;
+ ),
+
+ TP_printk("hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
+ __entry->addr, __entry->access, __entry->trap)
+);
+
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a0c071d24e0e..2a8ebae0936b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -265,7 +265,7 @@ do { \
({ \
long __gu_err; \
unsigned long __gu_val; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
if (!is_kernel_addr((unsigned long)__gu_addr)) \
might_fault(); \
@@ -279,7 +279,7 @@ do { \
({ \
long __gu_err; \
long long __gu_val; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
if (!is_kernel_addr((unsigned long)__gu_addr)) \
might_fault(); \
@@ -293,7 +293,7 @@ do { \
({ \
long __gu_err = -EFAULT; \
unsigned long __gu_val = 0; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
might_fault(); \
if (access_ok(VERIFY_READ, __gu_addr, (size))) \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
@@ -305,7 +305,7 @@ do { \
({ \
long __gu_err; \
unsigned long __gu_val; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 79c4068be278..f44a027818af 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -18,6 +18,7 @@ header-y += kvm_para.h
header-y += mman.h
header-y += msgbuf.h
header-y += nvram.h
+header-y += opal-prd.h
header-y += param.h
header-y += perf_event.h
header-y += poll.h
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index de2c0e4ee1aa..43686043e297 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -42,5 +42,6 @@
#define PPC_FEATURE2_ISEL 0x08000000
#define PPC_FEATURE2_TAR 0x04000000
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+#define PPC_FEATURE2_HTM_NOSC 0x01000000
#endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h b/arch/powerpc/include/uapi/asm/opal-prd.h
new file mode 100644
index 000000000000..319ff4a26158
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/opal-prd.h
@@ -0,0 +1,58 @@
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * (C) Copyright IBM 2015
+ *
+ * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_OPAL_PRD_H_
+#define _UAPI_ASM_POWERPC_OPAL_PRD_H_
+
+#include <linux/types.h>
+
+/**
+ * The version of the kernel interface of the PRD system. This describes the
+ * interface available for the /dev/opal-prd device. The actual PRD message
+ * layout and content is private to the firmware <--> userspace interface, so
+ * is not covered by this versioning.
+ *
+ * Future interface versions are backwards-compatible; if a later kernel
+ * version is encountered, functionality provided in earlier versions
+ * will work.
+ */
+#define OPAL_PRD_KERNEL_VERSION 1
+
+#define OPAL_PRD_GET_INFO _IOR('o', 0x01, struct opal_prd_info)
+#define OPAL_PRD_SCOM_READ _IOR('o', 0x02, struct opal_prd_scom)
+#define OPAL_PRD_SCOM_WRITE _IOW('o', 0x03, struct opal_prd_scom)
+
+#ifndef __ASSEMBLY__
+
+struct opal_prd_info {
+ __u64 version;
+ __u64 reserved[3];
+};
+
+struct opal_prd_scom {
+ __u64 chip;
+ __u64 addr;
+ __u64 data;
+ __s64 rc;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_POWERPC_OPAL_PRD_H */
diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h
index 5d836b7c1176..5047659815a5 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -11,7 +11,7 @@
#define TM_CAUSE_RESCHED 0xde
#define TM_CAUSE_TLBI 0xdc
#define TM_CAUSE_FAC_UNAV 0xda
-#define TM_CAUSE_SYSCALL 0xd8 /* future use */
+#define TM_CAUSE_SYSCALL 0xd8
#define TM_CAUSE_MISC 0xd6 /* future use */
#define TM_CAUSE_SIGNAL 0xd4
#define TM_CAUSE_ALIGNMENT 0xd2
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 0034b6b3556a..98230579d99c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -247,7 +247,7 @@ int main(void)
#endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
- DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
+ DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default));
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 60262fdf35ba..7d80bfdfb15e 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -108,7 +108,9 @@ extern void __restore_cpu_e6500(void);
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_PSERIES_PERFMON_COMPAT)
#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
- PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_HTM_COMP | \
+ PPC_FEATURE2_HTM_NOSC_COMP | \
+ PPC_FEATURE2_DSCR | \
PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
PPC_FEATURE2_VEC_CRYPTO)
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 23e0aa773643..af9b597b10af 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -717,7 +717,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
/* The caller should restore state for the specified device */
if (pdev != dev)
- pci_save_state(pdev);
+ pci_restore_state(pdev);
return NULL;
}
@@ -1410,13 +1410,11 @@ static int dev_has_iommu_table(struct device *dev, void *data)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_dev **ppdev = data;
- struct iommu_table *tbl;
if (!dev)
return 0;
- tbl = get_iommu_table_base(dev);
- if (tbl && tbl->it_group) {
+ if (dev->iommu_group) {
*ppdev = pdev;
return 1;
}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index afbc20019c2e..579e0f9a2d57 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,6 +34,7 @@
#include <asm/ftrace.h>
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
+#include <asm/tm.h>
/*
* System calls.
@@ -51,6 +52,12 @@ exception_marker:
.globl system_call_common
system_call_common:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+ bne tabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
andi. r10,r12,MSR_PR
mr r10,r1
addi r1,r1,-INT_FRAME_SIZE
@@ -311,6 +318,34 @@ syscall_exit_work:
bl do_syscall_trace_leave
b ret_from_except
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tabort_syscall:
+ /* Firstly we need to enable TM in the kernel */
+ mfmsr r10
+ li r13, 1
+ rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r10, 0
+
+ /* tabort, this dooms the transaction, nothing else */
+ li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+ TABORT(R13)
+
+ /*
+ * Return directly to userspace. We have corrupted user register state,
+ * but userspace will never see that register state. Execution will
+ * resume after the tbegin of the aborted transaction with the
+ * checkpointed register state.
+ */
+ li r13, MSR_RI
+ andc r10, r10, r13
+ mtmsrd r10, 1
+ mtspr SPRN_SRR0, r11
+ mtspr SPRN_SRR1, r12
+
+ rfid
+ b . /* prevent speculative execution */
+#endif
+
/* Save non-volatile GPRs, if not already saved. */
_GLOBAL(save_nvgprs)
ld r11,_TRAP(r1)
@@ -556,7 +591,7 @@ BEGIN_FTR_SECTION
ld r0,THREAD_DSCR(r4)
cmpwi r6,0
bne 1f
- ld r0,PACA_DSCR(r13)
+ ld r0,PACA_DSCR_DEFAULT(r13)
1:
BEGIN_FTR_SECTION_NESTED(70)
mfspr r8, SPRN_FSCR
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9519e6bdc6d7..0a0399c2af11 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -59,14 +59,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
#if defined(CONFIG_RELOCATABLE)
/*
- * We can't branch directly; in the direct case we use LR
- * and system_call_entry restores LR. (We thus need to move
- * LR to r10 in the RFID case too.)
+ * We can't branch directly so we do it via the CTR which
+ * is volatile across system calls.
*/
#define SYSCALL_PSERIES_2_DIRECT \
mflr r10 ; \
ld r12,PACAKBASE(r13) ; \
- LOAD_HANDLER(r12, system_call_entry_direct) ; \
+ LOAD_HANDLER(r12, system_call_entry) ; \
mtctr r12 ; \
mfspr r12,SPRN_SRR1 ; \
/* Re-use of r13... No spare regs to do this */ \
@@ -80,7 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
mfspr r12,SPRN_SRR1 ; \
li r10,MSR_RI ; \
mtmsrd r10,1 ; /* Set RI (EE=0) */ \
- b system_call_entry_direct ;
+ b system_call_common ;
#endif
/*
@@ -969,13 +968,6 @@ hv_facility_unavailable_relon_trampoline:
__end_interrupts:
.align 7
-system_call_entry_direct:
-#if defined(CONFIG_RELOCATABLE)
- /* The first level prologue may have used LR to get here, saving
- * orig in r10. To save hacking/ifdeffing common code, restore here.
- */
- mtlr r10
-#endif
system_call_entry:
b system_call_common
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index b054f33ab1fb..a8e3490b54e3 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -322,11 +322,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
ret = entry << tbl->it_page_shift; /* Set the return dma address */
/* Put the TCEs in the HW table */
- build_fail = ppc_md.tce_build(tbl, entry, npages,
+ build_fail = tbl->it_ops->set(tbl, entry, npages,
(unsigned long)page &
IOMMU_PAGE_MASK(tbl), direction, attrs);
- /* ppc_md.tce_build() only returns non-zero for transient errors.
+ /* tbl->it_ops->set() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
* DMA_ERROR_CODE. For all other errors the functionality is
* not altered.
@@ -337,8 +337,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
}
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
/* Make sure updates are seen by hardware */
mb();
@@ -408,7 +408,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
if (!iommu_free_check(tbl, dma_addr, npages))
return;
- ppc_md.tce_free(tbl, entry, npages);
+ tbl->it_ops->clear(tbl, entry, npages);
spin_lock_irqsave(&(pool->lock), flags);
bitmap_clear(tbl->it_map, free_entry, npages);
@@ -424,8 +424,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
* not do an mb() here on purpose, it is not needed on any of
* the current platforms.
*/
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
}
int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
@@ -495,7 +495,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
npages, entry, dma_addr);
/* Insert into HW table */
- build_fail = ppc_md.tce_build(tbl, entry, npages,
+ build_fail = tbl->it_ops->set(tbl, entry, npages,
vaddr & IOMMU_PAGE_MASK(tbl),
direction, attrs);
if(unlikely(build_fail))
@@ -534,8 +534,8 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
}
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
DBG("mapped %d elements:\n", outcount);
@@ -600,8 +600,8 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
* do not do an mb() here, the affected platforms do not need it
* when freeing.
*/
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
}
static void iommu_table_clear(struct iommu_table *tbl)
@@ -613,17 +613,17 @@ static void iommu_table_clear(struct iommu_table *tbl)
*/
if (!is_kdump_kernel() || is_fadump_active()) {
/* Clear the table in case firmware left allocations in it */
- ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+ tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
return;
}
#ifdef CONFIG_CRASH_DUMP
- if (ppc_md.tce_get) {
+ if (tbl->it_ops->get) {
unsigned long index, tceval, tcecount = 0;
/* Reserve the existing mappings left by the first kernel. */
for (index = 0; index < tbl->it_size; index++) {
- tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
+ tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
/*
* Freed TCE entry contains 0x7fffffffffffffff on JS20
*/
@@ -657,6 +657,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
unsigned int i;
struct iommu_pool *p;
+ BUG_ON(!tbl->it_ops);
+
/* number of bytes needed for the bitmap */
sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
@@ -713,9 +715,11 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
unsigned long bitmap_sz;
unsigned int order;
- if (!tbl || !tbl->it_map) {
- printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
- node_name);
+ if (!tbl)
+ return;
+
+ if (!tbl->it_map) {
+ kfree(tbl);
return;
}
@@ -726,13 +730,6 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
-#ifdef CONFIG_IOMMU_API
- if (tbl->it_group) {
- iommu_group_put(tbl->it_group);
- BUG_ON(tbl->it_group);
- }
-#endif
-
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
@@ -871,17 +868,33 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
}
}
+unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
+{
+ switch (dir) {
+ case DMA_BIDIRECTIONAL:
+ return TCE_PCI_READ | TCE_PCI_WRITE;
+ case DMA_FROM_DEVICE:
+ return TCE_PCI_WRITE;
+ case DMA_TO_DEVICE:
+ return TCE_PCI_READ;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
+
#ifdef CONFIG_IOMMU_API
/*
* SPAPR TCE API
*/
static void group_release(void *iommu_data)
{
- struct iommu_table *tbl = iommu_data;
- tbl->it_group = NULL;
+ struct iommu_table_group *table_group = iommu_data;
+
+ table_group->group = NULL;
}
-void iommu_register_group(struct iommu_table *tbl,
+void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num)
{
struct iommu_group *grp;
@@ -893,8 +906,8 @@ void iommu_register_group(struct iommu_table *tbl,
PTR_ERR(grp));
return;
}
- tbl->it_group = grp;
- iommu_group_set_iommudata(grp, tbl, group_release);
+ table_group->group = grp;
+ iommu_group_set_iommudata(grp, table_group, group_release);
name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
pci_domain_number, pe_num);
if (!name)
@@ -919,8 +932,8 @@ EXPORT_SYMBOL_GPL(iommu_tce_direction);
void iommu_flush_tce(struct iommu_table *tbl)
{
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
/* Make sure updates are seen by hardware */
mb();
@@ -931,7 +944,7 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce_value,
unsigned long npages)
{
- /* ppc_md.tce_free() does not support any value but 0 */
+ /* tbl->it_ops->clear() does not support any value but 0 */
if (tce_value)
return -EINVAL;
@@ -952,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce)
{
- if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
- return -EINVAL;
-
- if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
+ if (tce & ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;
if (ioba & ~IOMMU_PAGE_MASK(tbl))
@@ -972,68 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
}
EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
-{
- unsigned long oldtce;
- struct iommu_pool *pool = get_pool(tbl, entry);
-
- spin_lock(&(pool->lock));
-
- oldtce = ppc_md.tce_get(tbl, entry);
- if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
- ppc_md.tce_free(tbl, entry, 1);
- else
- oldtce = 0;
-
- spin_unlock(&(pool->lock));
-
- return oldtce;
-}
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
-
-int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
- unsigned long entry, unsigned long pages)
-{
- unsigned long oldtce;
- struct page *page;
-
- for ( ; pages; --pages, ++entry) {
- oldtce = iommu_clear_tce(tbl, entry);
- if (!oldtce)
- continue;
-
- page = pfn_to_page(oldtce >> PAGE_SHIFT);
- WARN_ON(!page);
- if (page) {
- if (oldtce & TCE_PCI_WRITE)
- SetPageDirty(page);
- put_page(page);
- }
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
-
-/*
- * hwaddr is a kernel virtual address here (0xc... bazillion),
- * tce_build converts it to a physical address.
- */
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
- unsigned long hwaddr, enum dma_data_direction direction)
+long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction)
{
- int ret = -EBUSY;
- unsigned long oldtce;
- struct iommu_pool *pool = get_pool(tbl, entry);
-
- spin_lock(&(pool->lock));
+ long ret;
- oldtce = ppc_md.tce_get(tbl, entry);
- /* Add new entry if it is not busy */
- if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
- ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
+ ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
- spin_unlock(&(pool->lock));
+ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+ (*direction == DMA_BIDIRECTIONAL)))
+ SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
@@ -1042,84 +1000,72 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_tce_build);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg);
-int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
- unsigned long tce)
+int iommu_take_ownership(struct iommu_table *tbl)
{
- int ret;
- struct page *page = NULL;
- unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
- enum dma_data_direction direction = iommu_tce_direction(tce);
-
- ret = get_user_pages_fast(tce & PAGE_MASK, 1,
- direction != DMA_TO_DEVICE, &page);
- if (unlikely(ret != 1)) {
- /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
- tce, entry << tbl->it_page_shift, ret); */
- return -EFAULT;
- }
- hwaddr = (unsigned long) page_address(page) + offset;
-
- ret = iommu_tce_build(tbl, entry, hwaddr, direction);
- if (ret)
- put_page(page);
-
- if (ret < 0)
- pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
- __func__, entry << tbl->it_page_shift, tce, ret);
+ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+ int ret = 0;
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
+ /*
+ * VFIO does not control TCE entries allocation and the guest
+ * can write new TCEs on top of existing ones so iommu_tce_build()
+ * must be able to release old pages. This functionality
+ * requires exchange() callback defined so if it is not
+ * implemented, we disallow taking ownership over the table.
+ */
+ if (!tbl->it_ops->exchange)
+ return -EINVAL;
-int iommu_take_ownership(struct iommu_table *tbl)
-{
- unsigned long sz = (tbl->it_size + 7) >> 3;
+ spin_lock_irqsave(&tbl->large_pool.lock, flags);
+ for (i = 0; i < tbl->nr_pools; i++)
+ spin_lock(&tbl->pools[i].lock);
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
- return -EBUSY;
+ ret = -EBUSY;
+ /* Restore bit#0 set by iommu_init_table() */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+ } else {
+ memset(tbl->it_map, 0xff, sz);
}
- memset(tbl->it_map, 0xff, sz);
- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+ for (i = 0; i < tbl->nr_pools; i++)
+ spin_unlock(&tbl->pools[i].lock);
+ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
- /*
- * Disable iommu bypass, otherwise the user can DMA to all of
- * our physical memory via the bypass window instead of just
- * the pages that has been explicitly mapped into the iommu
- */
- if (tbl->set_bypass)
- tbl->set_bypass(tbl, false);
-
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(iommu_take_ownership);
void iommu_release_ownership(struct iommu_table *tbl)
{
- unsigned long sz = (tbl->it_size + 7) >> 3;
+ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+
+ spin_lock_irqsave(&tbl->large_pool.lock, flags);
+ for (i = 0; i < tbl->nr_pools; i++)
+ spin_lock(&tbl->pools[i].lock);
- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
memset(tbl->it_map, 0, sz);
/* Restore bit#0 set by iommu_init_table() */
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
- /* The kernel owns the device now, we can restore the iommu bypass */
- if (tbl->set_bypass)
- tbl->set_bypass(tbl, true);
+ for (i = 0; i < tbl->nr_pools; i++)
+ spin_unlock(&tbl->pools[i].lock);
+ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
}
EXPORT_SYMBOL_GPL(iommu_release_ownership);
int iommu_add_device(struct device *dev)
{
struct iommu_table *tbl;
+ struct iommu_table_group_link *tgl;
/*
* The sysfs entries should be populated before
@@ -1137,15 +1083,22 @@ int iommu_add_device(struct device *dev)
}
tbl = get_iommu_table_base(dev);
- if (!tbl || !tbl->it_group) {
+ if (!tbl) {
pr_debug("%s: Skipping device %s with no tbl\n",
__func__, dev_name(dev));
return 0;
}
+ tgl = list_first_entry_or_null(&tbl->it_group_list,
+ struct iommu_table_group_link, next);
+ if (!tgl) {
+ pr_debug("%s: Skipping device %s with no group\n",
+ __func__, dev_name(dev));
+ return 0;
+ }
pr_debug("%s: Adding %s to iommu group %d\n",
__func__, dev_name(dev),
- iommu_group_id(tbl->it_group));
+ iommu_group_id(tgl->table_group->group));
if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
@@ -1154,7 +1107,7 @@ int iommu_add_device(struct device *dev)
return -EINVAL;
}
- return iommu_group_add_device(tbl->it_group, dev);
+ return iommu_group_add_device(tgl->table_group->group, dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 0d054068a21d..b9de34d44fcb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -89,6 +89,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
#endif
return phb;
}
+EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
void pcibios_free_controller(struct pci_controller *phb)
{
@@ -1447,6 +1448,7 @@ void pcibios_claim_one_bus(struct pci_bus *bus)
list_for_each_entry(child_bus, &bus->children, node)
pcibios_claim_one_bus(child_bus);
}
+EXPORT_SYMBOL_GPL(pcibios_claim_one_bus);
/* pcibios_finish_adding_to_bus
@@ -1488,6 +1490,14 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
return pci_enable_resources(dev, mask);
}
+void pcibios_disable_device(struct pci_dev *dev)
+{
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ if (phb->controller_ops.disable_device)
+ phb->controller_ops.disable_device(dev);
+}
+
resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
{
return (unsigned long) hose->io_base_virt - _IO_BASE;
@@ -1680,6 +1690,7 @@ void pcibios_scan_phb(struct pci_controller *hose)
pcie_bus_configure_settings(child);
}
}
+EXPORT_SYMBOL_GPL(pcibios_scan_phb);
static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
{
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 7ed85a69a9c2..7f9ed0c1f6b9 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -29,7 +29,12 @@
*/
void pcibios_release_device(struct pci_dev *dev)
{
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
eeh_remove_device(dev);
+
+ if (phb->controller_ops.release_device)
+ phb->controller_ops.release_device(dev);
}
/**
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index febb50dd5328..8005e18d1b40 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1112,7 +1112,6 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy a thread..
*/
-extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
/*
* Copy architecture-specific thread state
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 9fe3dcdbfca7..bdcbb716f4d6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -687,6 +687,9 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC_64K_PAGES
init_mm.context.pte_frag = NULL;
#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_init(&init_mm.context);
+#endif
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index fa1fd8a0c867..692873bff334 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -496,13 +496,34 @@ static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
static unsigned long dscr_default;
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val: Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
static void read_dscr(void *val)
{
*(unsigned long *)val = get_paca()->dscr_default;
}
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val: New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
static void write_dscr(void *val)
{
get_paca()->dscr_default = *(unsigned long *)val;
@@ -520,12 +541,29 @@ static void add_write_permission_dev_attr(struct device_attribute *attr)
attr->attr.mode |= 0200;
}
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
static ssize_t show_dscr_default(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%lx\n", dscr_default);
}
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ * @count: Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
static ssize_t __used store_dscr_default(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 5754b226da7e..bf8f34a58670 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -293,7 +293,7 @@ dont_backup_fp:
ld r2, STK_GOT(r1)
/* Load CPU's default DSCR */
- ld r0, PACA_DSCR(r13)
+ ld r0, PACA_DSCR_DEFAULT(r13)
mtspr SPRN_DSCR, r0
blr
@@ -473,7 +473,7 @@ restore_gprs:
ld r2, STK_GOT(r1)
/* Load CPU's default DSCR */
- ld r0, PACA_DSCR(r13)
+ ld r0, PACA_DSCR_DEFAULT(r13)
mtspr SPRN_DSCR, r0
blr
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 19e4744b6eba..6530f1b8874d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1377,6 +1377,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
};
char *facility = "unknown";
u64 value;
+ u32 instword, rd;
u8 status;
bool hv;
@@ -1388,12 +1389,46 @@ void facility_unavailable_exception(struct pt_regs *regs)
status = value >> 56;
if (status == FSCR_DSCR_LG) {
- /* User is acessing the DSCR. Set the inherit bit and allow
- * the user to set it directly in future by setting via the
- * FSCR DSCR bit. We always leave HFSCR DSCR set.
+ /*
+ * User is accessing the DSCR register using the problem
+ * state only SPR number (0x03) either through a mfspr or
+ * a mtspr instruction. If it is a write attempt through
+ * a mtspr, then we set the inherit bit. This also allows
+ * the user to write or read the register directly in the
+ * future by setting via the FSCR DSCR bit. But in case it
+ * is a read DSCR attempt through a mfspr instruction, we
+ * just emulate the instruction instead. This code path will
+ * always emulate all the mfspr instructions till the user
+ * has attempted atleast one mtspr instruction. This way it
+ * preserves the same behaviour when the user is accessing
+ * the DSCR through privilege level only SPR number (0x11)
+ * which is emulated through illegal instruction exception.
+ * We always leave HFSCR DSCR set.
*/
- current->thread.dscr_inherit = 1;
- mtspr(SPRN_FSCR, value | FSCR_DSCR);
+ if (get_user(instword, (u32 __user *)(regs->nip))) {
+ pr_err("Failed to fetch the user instruction\n");
+ return;
+ }
+
+ /* Write into DSCR (mtspr 0x03, RS) */
+ if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
+ == PPC_INST_MTSPR_DSCR_USER) {
+ rd = (instword >> 21) & 0x1f;
+ current->thread.dscr = regs->gpr[rd];
+ current->thread.dscr_inherit = 1;
+ mtspr(SPRN_FSCR, value | FSCR_DSCR);
+ }
+
+ /* Read from DSCR (mfspr RT, 0x03) */
+ if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
+ == PPC_INST_MFSPR_DSCR_USER) {
+ if (emulate_instruction(regs)) {
+ pr_err("DSCR based mfspr emulation failed\n");
+ return;
+ }
+ regs->nip += 4;
+ emulate_single_step(regs);
+ }
return;
}
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 5bfdab9047be..b41426c60ef6 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1196,6 +1196,11 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
tbl->it_type = TCE_VB;
tbl->it_blocksize = 16;
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ else
+ tbl->it_ops = &iommu_table_pseries_ops;
+
return iommu_init_table(tbl, -1);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4d70df26c402..faa86e9c0551 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -324,7 +324,7 @@ kvm_start_guest:
kvm_secondary_got_guest:
/* Set HSTATE_DSCR(r13) to something sensible */
- ld r6, PACA_DSCR(r13)
+ ld r6, PACA_DSCR_DEFAULT(r13)
std r6, HSTATE_DSCR(r13)
/* Order load of vcore, ptid etc. after load of vcpu */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 7902802a19a5..a47e14277fd8 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -33,6 +33,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
obj-$(CONFIG_ALTIVEC) += xor_vmx.o
-CFLAGS_xor_vmx.o += -maltivec -mabi=altivec
+CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 9c8770b5f96f..3eb73a38220d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
+obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index f031a47d7701..6527882ce05e 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -26,7 +26,7 @@
#include <asm/reg.h>
#include <asm/copro.h>
#include <asm/spu.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
/*
* This ought to be kept in sync with the powerpc specific do_page_fault
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
{
- u64 vsid;
+ u64 vsid, vsidkey;
int psize, ssize;
switch (REGION_ID(ea)) {
@@ -109,6 +109,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
+ vsidkey = SLB_VSID_USER;
break;
case VMALLOC_REGION_ID:
pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
@@ -118,19 +119,21 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
psize = mmu_io_psize;
ssize = mmu_kernel_ssize;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ vsidkey = SLB_VSID_KERNEL;
break;
case KERNEL_REGION_ID:
pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
psize = mmu_linear_psize;
ssize = mmu_kernel_ssize;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ vsidkey = SLB_VSID_KERNEL;
break;
default:
pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
return 1;
}
- vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER;
+ vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey;
vsid |= mmu_psize_defs[psize].sllp |
((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 9c4880ddecd6..13befa35d8a8 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -29,7 +29,7 @@
#include <asm/kexec.h>
#include <asm/ppc-opcode.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index fda236f908eb..5ec987f65b2c 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -57,6 +57,7 @@
#include <asm/fadump.h>
#include <asm/firmware.h>
#include <asm/tm.h>
+#include <asm/trace.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -1004,6 +1005,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
ea, access, trap);
+ trace_hash_fault(ea, access, trap);
/* Get region & vsid */
switch (REGION_ID(ea)) {
@@ -1475,7 +1477,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long hash;
unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
- unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
+ unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL));
long ret;
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 178876aef40f..4e4efbc2658e 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -89,6 +89,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
#ifdef CONFIG_PPC_64K_PAGES
mm->context.pte_frag = NULL;
#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_init(&mm->context);
+#endif
return 0;
}
@@ -132,6 +135,9 @@ static inline void destroy_pagetable_page(struct mm_struct *mm)
void destroy_context(struct mm_struct *mm)
{
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_cleanup(&mm->context);
+#endif
#ifdef CONFIG_PPC_ICSWX
drop_cop(mm->context.acop, mm);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
new file mode 100644
index 000000000000..da6a2168ae9e
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -0,0 +1,316 @@
+/*
+ * IOMMU helpers in MMU context.
+ *
+ * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+#include <asm/mmu_context.h>
+
+static DEFINE_MUTEX(mem_list_mutex);
+
+struct mm_iommu_table_group_mem_t {
+ struct list_head next;
+ struct rcu_head rcu;
+ unsigned long used;
+ atomic64_t mapped;
+ u64 ua; /* userspace address */
+ u64 entries; /* number of entries in hpas[] */
+ u64 *hpas; /* vmalloc'ed */
+};
+
+static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
+ unsigned long npages, bool incr)
+{
+ long ret = 0, locked, lock_limit;
+
+ if (!npages)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+
+ if (incr) {
+ locked = mm->locked_vm + npages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ ret = -ENOMEM;
+ else
+ mm->locked_vm += npages;
+ } else {
+ if (WARN_ON_ONCE(npages > mm->locked_vm))
+ npages = mm->locked_vm;
+ mm->locked_vm -= npages;
+ }
+
+ pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
+ current->pid,
+ incr ? '+' : '-',
+ npages << PAGE_SHIFT,
+ mm->locked_vm << PAGE_SHIFT,
+ rlimit(RLIMIT_MEMLOCK));
+ up_write(&mm->mmap_sem);
+
+ return ret;
+}
+
+bool mm_iommu_preregistered(void)
+{
+ if (!current || !current->mm)
+ return false;
+
+ return !list_empty(&current->mm->context.iommu_group_mem_list);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+
+long mm_iommu_get(unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ struct mm_iommu_table_group_mem_t *mem;
+ long i, j, ret = 0, locked_entries = 0;
+ struct page *page = NULL;
+
+ if (!current || !current->mm)
+ return -ESRCH; /* process exited */
+
+ mutex_lock(&mem_list_mutex);
+
+ list_for_each_entry_rcu(mem, &current->mm->context.iommu_group_mem_list,
+ next) {
+ if ((mem->ua == ua) && (mem->entries == entries)) {
+ ++mem->used;
+ *pmem = mem;
+ goto unlock_exit;
+ }
+
+ /* Overlap? */
+ if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
+ (ua < (mem->ua +
+ (mem->entries << PAGE_SHIFT)))) {
+ ret = -EINVAL;
+ goto unlock_exit;
+ }
+
+ }
+
+ ret = mm_iommu_adjust_locked_vm(current->mm, entries, true);
+ if (ret)
+ goto unlock_exit;
+
+ locked_entries = entries;
+
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem) {
+ ret = -ENOMEM;
+ goto unlock_exit;
+ }
+
+ mem->hpas = vzalloc(entries * sizeof(mem->hpas[0]));
+ if (!mem->hpas) {
+ kfree(mem);
+ ret = -ENOMEM;
+ goto unlock_exit;
+ }
+
+ for (i = 0; i < entries; ++i) {
+ if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
+ 1/* pages */, 1/* iswrite */, &page)) {
+ for (j = 0; j < i; ++j)
+ put_page(pfn_to_page(
+ mem->hpas[j] >> PAGE_SHIFT));
+ vfree(mem->hpas);
+ kfree(mem);
+ ret = -EFAULT;
+ goto unlock_exit;
+ }
+
+ mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
+ }
+
+ atomic64_set(&mem->mapped, 1);
+ mem->used = 1;
+ mem->ua = ua;
+ mem->entries = entries;
+ *pmem = mem;
+
+ list_add_rcu(&mem->next, &current->mm->context.iommu_group_mem_list);
+
+unlock_exit:
+ if (locked_entries && ret)
+ mm_iommu_adjust_locked_vm(current->mm, locked_entries, false);
+
+ mutex_unlock(&mem_list_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_get);
+
+static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+{
+ long i;
+ struct page *page = NULL;
+
+ for (i = 0; i < mem->entries; ++i) {
+ if (!mem->hpas[i])
+ continue;
+
+ page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
+ if (!page)
+ continue;
+
+ put_page(page);
+ mem->hpas[i] = 0;
+ }
+}
+
+static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
+{
+
+ mm_iommu_unpin(mem);
+ vfree(mem->hpas);
+ kfree(mem);
+}
+
+static void mm_iommu_free(struct rcu_head *head)
+{
+ struct mm_iommu_table_group_mem_t *mem = container_of(head,
+ struct mm_iommu_table_group_mem_t, rcu);
+
+ mm_iommu_do_free(mem);
+}
+
+static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
+{
+ list_del_rcu(&mem->next);
+ mm_iommu_adjust_locked_vm(current->mm, mem->entries, false);
+ call_rcu(&mem->rcu, mm_iommu_free);
+}
+
+long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem)
+{
+ long ret = 0;
+
+ if (!current || !current->mm)
+ return -ESRCH; /* process exited */
+
+ mutex_lock(&mem_list_mutex);
+
+ if (mem->used == 0) {
+ ret = -ENOENT;
+ goto unlock_exit;
+ }
+
+ --mem->used;
+ /* There are still users, exit */
+ if (mem->used)
+ goto unlock_exit;
+
+ /* Are there still mappings? */
+ if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
+ ++mem->used;
+ ret = -EBUSY;
+ goto unlock_exit;
+ }
+
+ /* @mapped became 0 so now mappings are disabled, release the region */
+ mm_iommu_release(mem);
+
+unlock_exit:
+ mutex_unlock(&mem_list_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_put);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
+ unsigned long size)
+{
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ list_for_each_entry_rcu(mem,
+ &current->mm->context.iommu_group_mem_list,
+ next) {
+ if ((mem->ua <= ua) &&
+ (ua + size <= mem->ua +
+ (mem->entries << PAGE_SHIFT))) {
+ ret = mem;
+ break;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_lookup);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
+ unsigned long entries)
+{
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ list_for_each_entry_rcu(mem,
+ &current->mm->context.iommu_group_mem_list,
+ next) {
+ if ((mem->ua == ua) && (mem->entries == entries)) {
+ ret = mem;
+ break;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_find);
+
+long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned long *hpa)
+{
+ const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+ u64 *va = &mem->hpas[entry];
+
+ if (entry >= mem->entries)
+ return -EFAULT;
+
+ *hpa = *va | (ua & ~PAGE_MASK);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
+
+long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
+{
+ if (atomic64_inc_not_zero(&mem->mapped))
+ return 0;
+
+ /* Last mm_iommu_put() has been called, no more mappings allowed() */
+ return -ENXIO;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
+
+void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
+{
+ atomic64_add_unless(&mem->mapped, -1, 1);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
+
+void mm_iommu_init(mm_context_t *ctx)
+{
+ INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list);
+}
+
+void mm_iommu_cleanup(mm_context_t *ctx)
+{
+ struct mm_iommu_table_group_mem_t *mem, *tmp;
+
+ list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) {
+ list_del_rcu(&mem->next);
+ mm_iommu_do_free(mem);
+ }
+}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 12b638425bb9..d90893b76e7c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -131,7 +131,16 @@ static void pmao_restore_workaround(bool ebb) { }
static bool regs_use_siar(struct pt_regs *regs)
{
- return !!regs->result;
+ /*
+ * When we take a performance monitor exception the regs are setup
+ * using perf_read_regs() which overloads some fields, in particular
+ * regs->result to tell us whether to use SIAR.
+ *
+ * However if the regs are from another exception, eg. a syscall, then
+ * they have not been setup using perf_read_regs() and so regs->result
+ * is something random.
+ */
+ return ((TRAP(regs) == 0xf00) && regs->result);
}
/*
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index c949ca055712..63016621aff8 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -193,7 +193,7 @@ static struct irq_chip mpc52xx_gpt_irq_chip = {
void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc)
{
- struct mpc52xx_gpt_priv *gpt = irq_get_handler_data(virq);
+ struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
int sub_virq;
u32 status;
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index d9b8a443458f..fe51de4fcf13 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -96,7 +96,7 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- struct axon_msic *msic = irq_get_handler_data(irq);
+ struct axon_msic *msic = irq_desc_get_handler_data(desc);
u32 write_offset, msi;
int idx;
int retry = 0;
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 21b502398bf3..14a582b21274 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -466,6 +466,11 @@ static inline u32 cell_iommu_get_ioid(struct device_node *np)
return *ioid;
}
+static struct iommu_table_ops cell_iommu_ops = {
+ .set = tce_build_cell,
+ .clear = tce_free_cell
+};
+
static struct iommu_window * __init
cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
unsigned long offset, unsigned long size,
@@ -492,6 +497,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
window->table.it_offset =
(offset >> window->table.it_page_shift) + pte_offset;
window->table.it_size = size >> window->table.it_page_shift;
+ window->table.it_ops = &cell_iommu_ops;
iommu_init_table(&window->table, iommu->nid);
@@ -1201,8 +1207,6 @@ static int __init cell_iommu_init(void)
/* Setup various callbacks */
cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
- ppc_md.tce_build = tce_build_cell;
- ppc_md.tce_free = tce_free_cell;
if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
goto bail;
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index c269caee58f9..9dd154d6f89a 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -124,7 +124,7 @@ static void hlwd_pic_irq_cascade(unsigned int cascade_virq,
struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- struct irq_domain *irq_domain = irq_get_handler_data(cascade_virq);
+ struct irq_domain *irq_domain = irq_desc_get_handler_data(desc);
unsigned int virq;
raw_spin_lock(&desc->lock);
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index b8f567b2ea19..c929644e74a6 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -134,6 +134,10 @@ static void iobmap_free(struct iommu_table *tbl, long index,
}
}
+static struct iommu_table_ops iommu_table_iobmap_ops = {
+ .set = iobmap_build,
+ .clear = iobmap_free
+};
static void iommu_table_iobmap_setup(void)
{
@@ -153,6 +157,7 @@ static void iommu_table_iobmap_setup(void)
* Should probably be 8 (64 bytes)
*/
iommu_table_iobmap.it_blocksize = 4;
+ iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
iommu_init_table(&iommu_table_iobmap, 0);
pr_debug(" <- %s\n", __func__);
}
@@ -252,8 +257,6 @@ void __init iommu_init_early_pasemi(void)
pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi;
pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
- ppc_md.tce_build = iobmap_build;
- ppc_md.tce_free = iobmap_free;
set_pci_dma_ops(&dma_iommu_ops);
}
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 4b044d8cb49a..604190cab522 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -19,3 +19,10 @@ config PPC_POWERNV
select CPU_FREQ_GOV_CONSERVATIVE
select PPC_DOORBELL
default y
+
+config OPAL_PRD
+ tristate 'OPAL PRD driver'
+ depends on PPC_POWERNV
+ help
+ This enables the opal-prd driver, a facility to run processor
+ recovery diagnostics on OpenPower machines
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 26bd7e417b7c..1c8cdb6250e7 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_EEH) += eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
+obj-$(CONFIG_OPAL_PRD) += opal-prd.o
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index bd39a120bd60..59d735d2e5c0 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -17,6 +17,7 @@
#include <linux/cpu.h>
#include <asm/firmware.h>
+#include <asm/machdep.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
#include <asm/cpuidle.h>
@@ -289,5 +290,4 @@ out_free:
out:
return 0;
}
-
-subsys_initcall(pnv_init_idle_states);
+machine_subsys_initcall(powernv, pnv_init_idle_states);
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 841135f48981..e2e7d75f52f3 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -231,6 +231,7 @@ out:
of_node_put(opal_node);
return rc;
}
+machine_arch_initcall(powernv, opal_event_init);
/**
* opal_event_request(unsigned int opal_event_nr) - Request an event
@@ -244,6 +245,9 @@ out:
*/
int opal_event_request(unsigned int opal_event_nr)
{
+ if (WARN_ON_ONCE(!opal_event_irqchip.domain))
+ return NO_IRQ;
+
return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr);
}
EXPORT_SYMBOL(opal_event_request);
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
new file mode 100644
index 000000000000..46cb3feb0a13
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -0,0 +1,449 @@
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * Copyright IBM Corporation 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "opal-prd: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/poll.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/opal-prd.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+
+/**
+ * The msg member must be at the end of the struct, as it's followed by the
+ * message data.
+ */
+struct opal_prd_msg_queue_item {
+ struct list_head list;
+ struct opal_prd_msg_header msg;
+};
+
+static struct device_node *prd_node;
+static LIST_HEAD(opal_prd_msg_queue);
+static DEFINE_SPINLOCK(opal_prd_msg_queue_lock);
+static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait);
+static atomic_t prd_usage;
+
+static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size)
+{
+ struct device_node *parent, *node;
+ bool found;
+
+ if (addr + size < addr)
+ return false;
+
+ parent = of_find_node_by_path("/reserved-memory");
+ if (!parent)
+ return false;
+
+ found = false;
+
+ for_each_child_of_node(parent, node) {
+ uint64_t range_addr, range_size, range_end;
+ const __be32 *addrp;
+ const char *label;
+
+ addrp = of_get_address(node, 0, &range_size, NULL);
+
+ range_addr = of_read_number(addrp, 2);
+ range_end = range_addr + range_size;
+
+ label = of_get_property(node, "ibm,prd-label", NULL);
+
+ /* PRD ranges need a label */
+ if (!label)
+ continue;
+
+ if (range_end <= range_addr)
+ continue;
+
+ if (addr >= range_addr && addr + size <= range_end) {
+ found = true;
+ of_node_put(node);
+ break;
+ }
+ }
+
+ of_node_put(parent);
+ return found;
+}
+
+static int opal_prd_open(struct inode *inode, struct file *file)
+{
+ /*
+ * Prevent multiple (separate) processes from concurrent interactions
+ * with the FW PRD channel
+ */
+ if (atomic_xchg(&prd_usage, 1) == 1)
+ return -EBUSY;
+
+ return 0;
+}
+
+/*
+ * opal_prd_mmap - maps firmware-provided ranges into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ */
+
+static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ size_t addr, size;
+ int rc;
+
+ pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
+ vma->vm_start, vma->vm_end, vma->vm_pgoff,
+ vma->vm_flags);
+
+ addr = vma->vm_pgoff << PAGE_SHIFT;
+ size = vma->vm_end - vma->vm_start;
+
+ /* ensure we're mapping within one of the allowable ranges */
+ if (!opal_prd_range_is_valid(addr, size))
+ return -EINVAL;
+
+ vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file,
+ vma->vm_pgoff,
+ size, vma->vm_page_prot))
+ | _PAGE_SPECIAL);
+
+ rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
+ vma->vm_page_prot);
+
+ return rc;
+}
+
+static bool opal_msg_queue_empty(void)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ ret = list_empty(&opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ return ret;
+}
+
+static unsigned int opal_prd_poll(struct file *file,
+ struct poll_table_struct *wait)
+{
+ poll_wait(file, &opal_prd_msg_wait, wait);
+
+ if (!opal_msg_queue_empty())
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+static ssize_t opal_prd_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct opal_prd_msg_queue_item *item;
+ unsigned long flags;
+ ssize_t size, err;
+ int rc;
+
+ /* we need at least a header's worth of data */
+ if (count < sizeof(item->msg))
+ return -EINVAL;
+
+ if (*ppos)
+ return -ESPIPE;
+
+ item = NULL;
+
+ for (;;) {
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ if (!list_empty(&opal_prd_msg_queue)) {
+ item = list_first_entry(&opal_prd_msg_queue,
+ struct opal_prd_msg_queue_item, list);
+ list_del(&item->list);
+ }
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ if (item)
+ break;
+
+ if (file->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ rc = wait_event_interruptible(opal_prd_msg_wait,
+ !opal_msg_queue_empty());
+ if (rc)
+ return -EINTR;
+ }
+
+ size = be16_to_cpu(item->msg.size);
+ if (size > count) {
+ err = -EINVAL;
+ goto err_requeue;
+ }
+
+ rc = copy_to_user(buf, &item->msg, size);
+ if (rc) {
+ err = -EFAULT;
+ goto err_requeue;
+ }
+
+ kfree(item);
+
+ return size;
+
+err_requeue:
+ /* eep! re-queue at the head of the list */
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ list_add(&item->list, &opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+ return err;
+}
+
+static ssize_t opal_prd_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct opal_prd_msg_header hdr;
+ ssize_t size;
+ void *msg;
+ int rc;
+
+ size = sizeof(hdr);
+
+ if (count < size)
+ return -EINVAL;
+
+ /* grab the header */
+ rc = copy_from_user(&hdr, buf, sizeof(hdr));
+ if (rc)
+ return -EFAULT;
+
+ size = be16_to_cpu(hdr.size);
+
+ msg = kmalloc(size, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rc = copy_from_user(msg, buf, size);
+ if (rc) {
+ size = -EFAULT;
+ goto out_free;
+ }
+
+ rc = opal_prd_msg(msg);
+ if (rc) {
+ pr_warn("write: opal_prd_msg returned %d\n", rc);
+ size = -EIO;
+ }
+
+out_free:
+ kfree(msg);
+
+ return size;
+}
+
+static int opal_prd_release(struct inode *inode, struct file *file)
+{
+ struct opal_prd_msg_header msg;
+
+ msg.size = cpu_to_be16(sizeof(msg));
+ msg.type = OPAL_PRD_MSG_TYPE_FINI;
+
+ opal_prd_msg((struct opal_prd_msg *)&msg);
+
+ atomic_xchg(&prd_usage, 0);
+
+ return 0;
+}
+
+static long opal_prd_ioctl(struct file *file, unsigned int cmd,
+ unsigned long param)
+{
+ struct opal_prd_info info;
+ struct opal_prd_scom scom;
+ int rc = 0;
+
+ switch (cmd) {
+ case OPAL_PRD_GET_INFO:
+ memset(&info, 0, sizeof(info));
+ info.version = OPAL_PRD_KERNEL_VERSION;
+ rc = copy_to_user((void __user *)param, &info, sizeof(info));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ case OPAL_PRD_SCOM_READ:
+ rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+
+ scom.rc = opal_xscom_read(scom.chip, scom.addr,
+ (__be64 *)&scom.data);
+ scom.data = be64_to_cpu(scom.data);
+ pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n",
+ scom.chip, scom.addr, scom.data, scom.rc);
+
+ rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ case OPAL_PRD_SCOM_WRITE:
+ rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+
+ scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data);
+ pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n",
+ scom.chip, scom.addr, scom.data, scom.rc);
+
+ rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ default:
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static const struct file_operations opal_prd_fops = {
+ .open = opal_prd_open,
+ .mmap = opal_prd_mmap,
+ .poll = opal_prd_poll,
+ .read = opal_prd_read,
+ .write = opal_prd_write,
+ .unlocked_ioctl = opal_prd_ioctl,
+ .release = opal_prd_release,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice opal_prd_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "opal-prd",
+ .fops = &opal_prd_fops,
+};
+
+/* opal interface */
+static int opal_prd_msg_notifier(struct notifier_block *nb,
+ unsigned long msg_type, void *_msg)
+{
+ struct opal_prd_msg_queue_item *item;
+ struct opal_prd_msg_header *hdr;
+ struct opal_msg *msg = _msg;
+ int msg_size, item_size;
+ unsigned long flags;
+
+ if (msg_type != OPAL_MSG_PRD)
+ return 0;
+
+ /* Calculate total size of the message and item we need to store. The
+ * 'size' field in the header includes the header itself. */
+ hdr = (void *)msg->params;
+ msg_size = be16_to_cpu(hdr->size);
+ item_size = msg_size + sizeof(*item) - sizeof(item->msg);
+
+ item = kzalloc(item_size, GFP_ATOMIC);
+ if (!item)
+ return -ENOMEM;
+
+ memcpy(&item->msg, msg->params, msg_size);
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ list_add_tail(&item->list, &opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ wake_up_interruptible(&opal_prd_msg_wait);
+
+ return 0;
+}
+
+static struct notifier_block opal_prd_event_nb = {
+ .notifier_call = opal_prd_msg_notifier,
+ .next = NULL,
+ .priority = 0,
+};
+
+static int opal_prd_probe(struct platform_device *pdev)
+{
+ int rc;
+
+ if (!pdev || !pdev->dev.of_node)
+ return -ENODEV;
+
+ /* We should only have one prd driver instance per machine; ensure
+ * that we only get a valid probe on a single OF node.
+ */
+ if (prd_node)
+ return -EBUSY;
+
+ prd_node = pdev->dev.of_node;
+
+ rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb);
+ if (rc) {
+ pr_err("Couldn't register event notifier\n");
+ return rc;
+ }
+
+ rc = misc_register(&opal_prd_dev);
+ if (rc) {
+ pr_err("failed to register miscdev\n");
+ opal_message_notifier_unregister(OPAL_MSG_PRD,
+ &opal_prd_event_nb);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int opal_prd_remove(struct platform_device *pdev)
+{
+ misc_deregister(&opal_prd_dev);
+ opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
+ return 0;
+}
+
+static const struct of_device_id opal_prd_match[] = {
+ { .compatible = "ibm,opal-prd" },
+ { },
+};
+
+static struct platform_driver opal_prd_driver = {
+ .driver = {
+ .name = "opal-prd",
+ .owner = THIS_MODULE,
+ .of_match_table = opal_prd_match,
+ },
+ .probe = opal_prd_probe,
+ .remove = opal_prd_remove,
+};
+
+module_platform_driver(opal_prd_driver);
+
+MODULE_DEVICE_TABLE(of, opal_prd_match);
+MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index 2e52b47393e7..afe66c576a38 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -55,8 +55,10 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
}
ret = opal_get_param(token, param_id, (u64)buffer, length);
- if (ret != OPAL_ASYNC_COMPLETION)
+ if (ret != OPAL_ASYNC_COMPLETION) {
+ ret = opal_error_code(ret);
goto out_token;
+ }
ret = opal_async_wait_response(token, &msg);
if (ret) {
@@ -65,7 +67,7 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
goto out_token;
}
- ret = be64_to_cpu(msg.params[1]);
+ ret = opal_error_code(be64_to_cpu(msg.params[1]));
out_token:
opal_async_release_token(token);
@@ -89,8 +91,10 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
ret = opal_set_param(token, param_id, (u64)buffer, length);
- if (ret != OPAL_ASYNC_COMPLETION)
+ if (ret != OPAL_ASYNC_COMPLETION) {
+ ret = opal_error_code(ret);
goto out_token;
+ }
ret = opal_async_wait_response(token, &msg);
if (ret) {
@@ -99,7 +103,7 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
goto out_token;
}
- ret = be64_to_cpu(msg.params[1]);
+ ret = opal_error_code(be64_to_cpu(msg.params[1]));
out_token:
opal_async_release_token(token);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index bf15ead00e12..d6a7b8252e4d 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -296,3 +296,4 @@ OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
OPAL_CALL(opal_flash_read, OPAL_FLASH_READ);
OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE);
OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
+OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 8403307c5362..f084afa0e3ba 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -235,6 +235,7 @@ int opal_message_notifier_register(enum opal_msg_type msg_type,
return atomic_notifier_chain_register(
&opal_msg_notifier_head[msg_type], nb);
}
+EXPORT_SYMBOL_GPL(opal_message_notifier_register);
int opal_message_notifier_unregister(enum opal_msg_type msg_type,
struct notifier_block *nb)
@@ -242,6 +243,7 @@ int opal_message_notifier_unregister(enum opal_msg_type msg_type,
return atomic_notifier_chain_unregister(
&opal_msg_notifier_head[msg_type], nb);
}
+EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
static void opal_message_do_notify(uint32_t msg_type, void *msg)
{
@@ -600,21 +602,13 @@ static void __init opal_dump_region_init(void)
"rc = %d\n", rc);
}
-static void opal_flash_init(struct device_node *opal_node)
+static void opal_pdev_init(struct device_node *opal_node,
+ const char *compatible)
{
struct device_node *np;
for_each_child_of_node(opal_node, np)
- if (of_device_is_compatible(np, "ibm,opal-flash"))
- of_platform_device_create(np, NULL, NULL);
-}
-
-static void opal_ipmi_init(struct device_node *opal_node)
-{
- struct device_node *np;
-
- for_each_child_of_node(opal_node, np)
- if (of_device_is_compatible(np, "ibm,opal-ipmi"))
+ if (of_device_is_compatible(np, compatible))
of_platform_device_create(np, NULL, NULL);
}
@@ -663,9 +657,6 @@ static int __init opal_init(void)
return -ENODEV;
}
- /* Initialise OPAL events */
- opal_event_init();
-
/* Register OPAL consoles if any ports */
if (firmware_has_feature(FW_FEATURE_OPALv2))
consoles = of_find_node_by_path("/ibm,opal/consoles");
@@ -717,10 +708,10 @@ static int __init opal_init(void)
opal_msglog_init();
}
- /* Initialize OPAL IPMI backend */
- opal_ipmi_init(opal_node);
-
- opal_flash_init(opal_node);
+ /* Initialize platform devices: IPMI backend, PRD & flash interface */
+ opal_pdev_init(opal_node, "ibm,opal-ipmi");
+ opal_pdev_init(opal_node, "ibm,opal-flash");
+ opal_pdev_init(opal_node, "ibm,opal-prd");
return 0;
}
@@ -752,11 +743,14 @@ void opal_shutdown(void)
/* Export this so that test modules can use it */
EXPORT_SYMBOL_GPL(opal_invalid_call);
+EXPORT_SYMBOL_GPL(opal_xscom_read);
+EXPORT_SYMBOL_GPL(opal_xscom_write);
EXPORT_SYMBOL_GPL(opal_ipmi_send);
EXPORT_SYMBOL_GPL(opal_ipmi_recv);
EXPORT_SYMBOL_GPL(opal_flash_read);
EXPORT_SYMBOL_GPL(opal_flash_write);
EXPORT_SYMBOL_GPL(opal_flash_erase);
+EXPORT_SYMBOL_GPL(opal_prd_msg);
/* Convert a region of vmalloc memory to an opal sg list */
struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
@@ -830,6 +824,7 @@ int opal_error_code(int rc)
case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
case OPAL_BUSY_EVENT: return -EBUSY;
case OPAL_NO_MEM: return -ENOMEM;
+ case OPAL_PERMISSION: return -EPERM;
case OPAL_UNSUPPORTED: return -EIO;
case OPAL_HARDWARE: return -EIO;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 508f530e367b..5738d315248b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -23,6 +23,9 @@
#include <linux/io.h>
#include <linux/msi.h>
#include <linux/memblock.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
+#include <linux/sizes.h>
#include <asm/sections.h>
#include <asm/io.h>
@@ -38,8 +41,9 @@
#include <asm/debug.h>
#include <asm/firmware.h>
#include <asm/pnv-pci.h>
+#include <asm/mmzone.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
#include "powernv.h"
#include "pci.h"
@@ -47,6 +51,11 @@
/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
+#define POWERNV_IOMMU_DEFAULT_LEVELS 1
+#define POWERNV_IOMMU_MAX_LEVELS 5
+
+static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
{
@@ -1086,10 +1095,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
return;
}
- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
- GFP_KERNEL, hose->node);
- pe->tce32_table->data = pe;
-
/* Associate it with all child devices */
pnv_ioda_setup_same_PE(bus, pe);
@@ -1283,36 +1288,27 @@ m64_failed:
return -EBUSY;
}
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+ int num);
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+
static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
{
- struct pci_bus *bus;
- struct pci_controller *hose;
- struct pnv_phb *phb;
struct iommu_table *tbl;
- unsigned long addr;
int64_t rc;
- bus = dev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
- tbl = pe->tce32_table;
- addr = tbl->it_base;
-
- opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
- pe->pe_number << 1, 1, __pa(addr),
- 0, 0x1000);
-
- rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
- pe->pe_number,
- (pe->pe_number << 1) + 1,
- pe->tce_bypass_base,
- 0);
+ tbl = pe->table_group.tables[0];
+ rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pnv_pci_ioda2_set_bypass(pe, false);
+ if (pe->table_group.group) {
+ iommu_group_put(pe->table_group.group);
+ BUG_ON(pe->table_group.group);
+ }
+ pnv_pci_ioda2_table_free_pages(tbl);
iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
- free_pages(addr, get_order(TCE32_TABLE_SIZE));
- pe->tce32_table = NULL;
}
static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
@@ -1460,10 +1456,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
continue;
}
- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
- GFP_KERNEL, hose->node);
- pe->tce32_table->data = pe;
-
/* Put PE to the list */
mutex_lock(&phb->ioda.pe_list_mutex);
list_add_tail(&pe->list, &phb->ioda.pe_list);
@@ -1598,7 +1590,13 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
+ set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
+ /*
+ * Note: iommu_add_device() will fail here as
+ * for physical PE: the device is already added by now;
+ * for virtual PE: sysfs entries are not ready yet and
+ * tce_iommu_bus_notifier will add the device to a group later.
+ */
}
static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
@@ -1626,7 +1624,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
} else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops);
- set_iommu_table_base(&pdev->dev, pe->tce32_table);
+ set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
}
*pdev->dev.dma_mask = dma_mask;
return 0;
@@ -1655,36 +1653,36 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb,
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- struct pci_bus *bus,
- bool add_to_iommu_group)
+ struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
- if (add_to_iommu_group)
- set_iommu_table_base_and_group(&dev->dev,
- pe->tce32_table);
- else
- set_iommu_table_base(&dev->dev, pe->tce32_table);
+ set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
+ iommu_add_device(&dev->dev);
- if (dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate,
- add_to_iommu_group);
+ if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate);
}
}
-static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
- struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages, bool rm)
{
+ struct iommu_table_group_link *tgl = list_first_entry_or_null(
+ &tbl->it_group_list, struct iommu_table_group_link,
+ next);
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
__be64 __iomem *invalidate = rm ?
- (__be64 __iomem *)pe->tce_inval_reg_phys :
- (__be64 __iomem *)tbl->it_index;
+ (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+ pe->phb->ioda.tce_inval_reg;
unsigned long start, end, inc;
const unsigned shift = tbl->it_page_shift;
- start = __pa(startp);
- end = __pa(endp);
+ start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
+ end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset +
+ npages - 1);
/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
if (tbl->it_busno) {
@@ -1720,26 +1718,79 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
*/
}
-static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
- struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
+
+ if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+ pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+
+ return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+ if (!ret && (tbl->it_type &
+ (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+ pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false);
+
+ return ret;
+}
+#endif
+
+static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
+ pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda1_iommu_ops = {
+ .set = pnv_ioda1_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda1_tce_xchg,
+#endif
+ .clear = pnv_ioda1_tce_free,
+ .get = pnv_tce_get,
+};
+
+static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+{
+ /* 01xb - invalidate TCEs that match the specified PE# */
+ unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
+ struct pnv_phb *phb = pe->phb;
+
+ if (!phb->ioda.tce_inval_reg)
+ return;
+
+ mb(); /* Ensure above stores are visible */
+ __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+}
+
+static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
+ __be64 __iomem *invalidate, unsigned shift,
+ unsigned long index, unsigned long npages)
{
unsigned long start, end, inc;
- __be64 __iomem *invalidate = rm ?
- (__be64 __iomem *)pe->tce_inval_reg_phys :
- (__be64 __iomem *)tbl->it_index;
- const unsigned shift = tbl->it_page_shift;
/* We'll invalidate DMA address in PE scope */
start = 0x2ull << 60;
- start |= (pe->pe_number & 0xFF);
+ start |= (pe_number & 0xFF);
end = start;
/* Figure out the start, end and step */
- inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
- start |= (inc << shift);
- inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
- end |= (inc << shift);
+ start |= (index << shift);
+ end |= ((index + npages - 1) << shift);
inc = (0x1ull << shift);
mb();
@@ -1752,25 +1803,83 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
}
}
-void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages, bool rm)
{
- struct pnv_ioda_pe *pe = tbl->data;
- struct pnv_phb *phb = pe->phb;
+ struct iommu_table_group_link *tgl;
- if (phb->type == PNV_PHB_IODA1)
- pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
- else
- pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
+ list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
+ __be64 __iomem *invalidate = rm ?
+ (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+ pe->phb->ioda.tce_inval_reg;
+
+ pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
+ invalidate, tbl->it_page_shift,
+ index, npages);
+ }
+}
+
+static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
+
+ if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+
+ return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+ if (!ret && (tbl->it_type &
+ (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+ pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
+
+ return ret;
+}
+#endif
+
+static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+}
+
+static void pnv_ioda2_table_free(struct iommu_table *tbl)
+{
+ pnv_pci_ioda2_table_free_pages(tbl);
+ iommu_free_table(tbl, "pnv");
}
+static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+ .set = pnv_ioda2_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda2_tce_xchg,
+#endif
+ .clear = pnv_ioda2_tce_free,
+ .get = pnv_tce_get,
+ .free = pnv_ioda2_table_free,
+};
+
static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe, unsigned int base,
unsigned int segs)
{
struct page *tce_mem = NULL;
- const __be64 *swinvp;
struct iommu_table *tbl;
unsigned int i;
int64_t rc;
@@ -1784,6 +1893,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
if (WARN_ON(pe->tce32_seg >= 0))
return;
+ tbl = pnv_pci_table_alloc(phb->hose->node);
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+ pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
+
/* Grab a 32-bit TCE table */
pe->tce32_seg = base;
pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
@@ -1818,39 +1932,30 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
}
/* Setup linux iommu table */
- tbl = pe->tce32_table;
pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
base << 28, IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of P7IOC SW invalidated TCEs */
- swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
- if (swinvp) {
- /* We need a couple more fields -- an address and a data
- * to or. Since the bus is only printed out on table free
- * errors, and on the first pass the data will be a relative
- * bus number, print that out instead.
- */
- pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
- tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
- 8);
+ if (phb->ioda.tce_inval_reg)
tbl->it_type |= (TCE_PCI_SWINV_CREATE |
TCE_PCI_SWINV_FREE |
TCE_PCI_SWINV_PAIR);
- }
+
+ tbl->it_ops = &pnv_ioda1_iommu_ops;
+ pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
+ pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
iommu_init_table(tbl, phb->hose->node);
if (pe->flags & PNV_IODA_PE_DEV) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
- } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
- } else if (pe->flags & PNV_IODA_PE_VF) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- }
+ /*
+ * Setting table base here only for carrying iommu_group
+ * further down to let iommu_add_device() do the job.
+ * pnv_pci_ioda_dma_dev_setup will override it later anyway.
+ */
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+ iommu_add_device(&pe->pdev->dev);
+ } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
return;
fail:
@@ -1859,11 +1964,53 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
pe->tce32_seg = -1;
if (tce_mem)
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+ if (tbl) {
+ pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+ iommu_free_table(tbl, "pnv");
+ }
}
-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ struct pnv_phb *phb = pe->phb;
+ int64_t rc;
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+ const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+ const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+ pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num,
+ start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
+
+ /*
+ * Map TCE table through TVT. The TVE index is the PE number
+ * shifted by 1 bit for 32-bits DMA space.
+ */
+ rc = opal_pci_map_pe_dma_window(phb->opal_id,
+ pe->pe_number,
+ (pe->pe_number << 1) + num,
+ tbl->it_indirect_levels + 1,
+ __pa(tbl->it_base),
+ size << 3,
+ IOMMU_PAGE_SIZE(tbl));
+ if (rc) {
+ pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
+ return rc;
+ }
+
+ pnv_pci_link_table_and_group(phb->hose->node, num,
+ tbl, &pe->table_group);
+ pnv_pci_ioda2_tce_invalidate_entire(pe);
+
+ return 0;
+}
+
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
{
- struct pnv_ioda_pe *pe = tbl->data;
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;
@@ -1883,17 +2030,6 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
window_id,
pe->tce_bypass_base,
0);
-
- /*
- * EEH needs the mapping between IOMMU table and group
- * of those VFIO/KVM pass-through devices. We can postpone
- * resetting DMA ops until the DMA mask is configured in
- * host side.
- */
- if (pe->pdev)
- set_iommu_table_base(&pe->pdev->dev, tbl);
- else
- pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
}
if (rc)
pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
@@ -1901,106 +2037,363 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
pe->tce_bypass_enabled = enable;
}
-static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
+static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table *tbl);
+
+static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
{
- /* TVE #1 is selected by PCI address bit 59 */
- pe->tce_bypass_base = 1ull << 59;
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ int nid = pe->phb->hose->node;
+ __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start;
+ long ret;
+ struct iommu_table *tbl;
+
+ tbl = pnv_pci_table_alloc(nid);
+ if (!tbl)
+ return -ENOMEM;
- /* Install set_bypass callback for VFIO */
- pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
+ ret = pnv_pci_ioda2_table_alloc_pages(nid,
+ bus_offset, page_shift, window_size,
+ levels, tbl);
+ if (ret) {
+ iommu_free_table(tbl, "pnv");
+ return ret;
+ }
- /* Enable bypass by default */
- pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
+ tbl->it_ops = &pnv_ioda2_iommu_ops;
+ if (pe->phb->ioda.tce_inval_reg)
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+
+ *ptbl = tbl;
+
+ return 0;
}
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
+static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table *tbl = NULL;
+ long rc;
+
+ rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
+ IOMMU_PAGE_SHIFT_4K,
+ pe->table_group.tce32_size,
+ POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
+ if (rc) {
+ pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
+ rc);
+ return rc;
+ }
+
+ iommu_init_table(tbl, pe->phb->hose->node);
+
+ rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+ if (rc) {
+ pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
+ rc);
+ pnv_ioda2_table_free(tbl);
+ return rc;
+ }
+
+ if (!pnv_iommu_bypass_disabled)
+ pnv_pci_ioda2_set_bypass(pe, true);
+
+ /* OPAL variant of PHB3 invalidated TCEs */
+ if (pe->phb->ioda.tce_inval_reg)
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+
+ /*
+ * Setting table base here only for carrying iommu_group
+ * further down to let iommu_add_device() do the job.
+ * pnv_pci_ioda_dma_dev_setup will override it later anyway.
+ */
+ if (pe->flags & PNV_IODA_PE_DEV)
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+
+ return 0;
+}
+
+#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV)
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+ int num)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ struct pnv_phb *phb = pe->phb;
+ long ret;
+
+ pe_info(pe, "Removing DMA window #%d\n", num);
+
+ ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+ (pe->pe_number << 1) + num,
+ 0/* levels */, 0/* table address */,
+ 0/* table size */, 0/* page size */);
+ if (ret)
+ pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
+ else
+ pnv_pci_ioda2_tce_invalidate_entire(pe);
+
+ pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
+
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_IOMMU_API
+static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels)
+{
+ unsigned long bytes = 0;
+ const unsigned window_shift = ilog2(window_size);
+ unsigned entries_shift = window_shift - page_shift;
+ unsigned table_shift = entries_shift + 3;
+ unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift);
+ unsigned long direct_table_size;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
+ (window_size > memory_hotplug_max()) ||
+ !is_power_of_2(window_size))
+ return 0;
+
+ /* Calculate a direct table size from window_size and levels */
+ entries_shift = (entries_shift + levels - 1) / levels;
+ table_shift = entries_shift + 3;
+ table_shift = max_t(unsigned, table_shift, PAGE_SHIFT);
+ direct_table_size = 1UL << table_shift;
+
+ for ( ; levels; --levels) {
+ bytes += _ALIGN_UP(tce_table_size, direct_table_size);
+
+ tce_table_size /= direct_table_size;
+ tce_table_size <<= 3;
+ tce_table_size = _ALIGN_UP(tce_table_size, direct_table_size);
+ }
+
+ return bytes;
+}
+
+static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
+ struct iommu_table *tbl = pe->table_group.tables[0];
+
+ pnv_pci_ioda2_set_bypass(pe, false);
+ pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+ pnv_ioda2_table_free(tbl);
+}
+
+static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+
+ pnv_pci_ioda2_setup_default_config(pe);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_pci_ioda2_create_table,
+ .set_window = pnv_pci_ioda2_set_window,
+ .unset_window = pnv_pci_ioda2_unset_window,
+ .take_ownership = pnv_ioda2_take_ownership,
+ .release_ownership = pnv_ioda2_release_ownership,
+};
+#endif
+
+static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
{
- struct page *tce_mem = NULL;
- void *addr;
const __be64 *swinvp;
- struct iommu_table *tbl;
- unsigned int tce_table_size, end;
- int64_t rc;
- /* We shouldn't already have a 32-bit DMA associated */
- if (WARN_ON(pe->tce32_seg >= 0))
+ /* OPAL variant of PHB3 invalidated TCEs */
+ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+ if (!swinvp)
return;
- /* The PE will reserve all possible 32-bits space */
- pe->tce32_seg = 0;
- end = (1 << ilog2(phb->ioda.m32_pci_base));
- tce_table_size = (end / 0x1000) * 8;
- pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
- end);
+ phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp);
+ phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8);
+}
- /* Allocate TCE table */
- tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
- get_order(tce_table_size));
+static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
+ unsigned levels, unsigned long limit,
+ unsigned long *current_offset)
+{
+ struct page *tce_mem = NULL;
+ __be64 *addr, *tmp;
+ unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
+ unsigned long allocated = 1UL << (order + PAGE_SHIFT);
+ unsigned entries = 1UL << (shift - 3);
+ long i;
+
+ tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
if (!tce_mem) {
- pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
- goto fail;
+ pr_err("Failed to allocate a TCE memory, order=%d\n", order);
+ return NULL;
}
addr = page_address(tce_mem);
- memset(addr, 0, tce_table_size);
+ memset(addr, 0, allocated);
+
+ --levels;
+ if (!levels) {
+ *current_offset += allocated;
+ return addr;
+ }
+
+ for (i = 0; i < entries; ++i) {
+ tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
+ levels, limit, current_offset);
+ if (!tmp)
+ break;
+
+ addr[i] = cpu_to_be64(__pa(tmp) |
+ TCE_PCI_READ | TCE_PCI_WRITE);
+
+ if (*current_offset >= limit)
+ break;
+ }
+
+ return addr;
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned level);
+
+static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table *tbl)
+{
+ void *addr;
+ unsigned long offset = 0, level_shift;
+ const unsigned window_shift = ilog2(window_size);
+ unsigned entries_shift = window_shift - page_shift;
+ unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT);
+ const unsigned long tce_table_size = 1UL << table_shift;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+ return -EINVAL;
+
+ if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size))
+ return -EINVAL;
+
+ /* Adjust direct table size from window_size and levels */
+ entries_shift = (entries_shift + levels - 1) / levels;
+ level_shift = entries_shift + 3;
+ level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+
+ /* Allocate TCE table */
+ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+ levels, tce_table_size, &offset);
+
+ /* addr==NULL means that the first level allocation failed */
+ if (!addr)
+ return -ENOMEM;
/*
- * Map TCE table through TVT. The TVE index is the PE number
- * shifted by 1 bit for 32-bits DMA space.
+ * First level was allocated but some lower level failed as
+ * we did not allocate as much as we wanted,
+ * release partially allocated table.
*/
- rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
- pe->pe_number << 1, 1, __pa(addr),
- tce_table_size, 0x1000);
- if (rc) {
- pe_err(pe, "Failed to configure 32-bit TCE table,"
- " err %ld\n", rc);
- goto fail;
+ if (offset < tce_table_size) {
+ pnv_pci_ioda2_table_do_free_pages(addr,
+ 1ULL << (level_shift - 3), levels - 1);
+ return -ENOMEM;
}
/* Setup linux iommu table */
- tbl = pe->tce32_table;
- pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
- IOMMU_PAGE_SHIFT_4K);
+ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
+ page_shift);
+ tbl->it_level_size = 1ULL << (level_shift - 3);
+ tbl->it_indirect_levels = levels - 1;
+ tbl->it_allocated_size = offset;
- /* OPAL variant of PHB3 invalidated TCEs */
- swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
- if (swinvp) {
- /* We need a couple more fields -- an address and a data
- * to or. Since the bus is only printed out on table free
- * errors, and on the first pass the data will be a relative
- * bus number, print that out instead.
- */
- pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
- tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
- 8);
- tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+ pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
+ window_size, tce_table_size, bus_offset);
+
+ return 0;
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned level)
+{
+ const unsigned long addr_ul = (unsigned long) addr &
+ ~(TCE_PCI_READ | TCE_PCI_WRITE);
+
+ if (level) {
+ long i;
+ u64 *tmp = (u64 *) addr_ul;
+
+ for (i = 0; i < size; ++i) {
+ unsigned long hpa = be64_to_cpu(tmp[i]);
+
+ if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
+ continue;
+
+ pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+ level - 1);
+ }
}
- iommu_init_table(tbl, phb->hose->node);
- if (pe->flags & PNV_IODA_PE_DEV) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
- } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
- } else if (pe->flags & PNV_IODA_PE_VF) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- }
-
- /* Also create a bypass window */
- if (!pnv_iommu_bypass_disabled)
- pnv_pci_ioda2_setup_bypass_pe(phb, pe);
+ free_pages(addr_ul, get_order(size << 3));
+}
- return;
-fail:
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
- if (tce_mem)
- __free_pages(tce_mem, get_order(tce_table_size));
+static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+{
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+
+ if (!tbl->it_size)
+ return;
+
+ pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+ tbl->it_indirect_levels);
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
+{
+ int64_t rc;
+
+ /* We shouldn't already have a 32-bit DMA associated */
+ if (WARN_ON(pe->tce32_seg >= 0))
+ return;
+
+ /* TVE #1 is selected by PCI address bit 59 */
+ pe->tce_bypass_base = 1ull << 59;
+
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+
+ /* The PE will reserve all possible 32-bits space */
+ pe->tce32_seg = 0;
+ pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
+ phb->ioda.m32_pci_base);
+
+ /* Setup linux iommu table */
+ pe->table_group.tce32_start = 0;
+ pe->table_group.tce32_size = phb->ioda.m32_pci_base;
+ pe->table_group.max_dynamic_windows_supported =
+ IOMMU_TABLE_GROUP_MAX_TABLES;
+ pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
+ pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M;
+#ifdef CONFIG_IOMMU_API
+ pe->table_group.ops = &pnv_pci_ioda2_ops;
+#endif
+
+ rc = pnv_pci_ioda2_setup_default_config(pe);
+ if (rc) {
+ if (pe->tce32_seg >= 0)
+ pe->tce32_seg = -1;
+ return;
+ }
+
+ if (pe->flags & PNV_IODA_PE_DEV)
+ iommu_add_device(&pe->pdev->dev);
+ else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
}
static void pnv_ioda_setup_dma(struct pnv_phb *phb)
@@ -2025,6 +2418,8 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
pr_info("PCI: %d PE# for a total weight of %d\n",
phb->ioda.dma_pe_count, phb->ioda.dma_weight);
+ pnv_pci_ioda_setup_opal_tce_kill(phb);
+
/* Walk our PE list and configure their DMA segments, hand them
* out one base segment plus any residual segments based on
* weight
@@ -2643,8 +3038,10 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
}
-static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
+static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
+ struct pnv_phb *phb = hose->private_data;
+
opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
OPAL_ASSERT_RESET);
}
@@ -2659,6 +3056,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.dma_set_mask = pnv_pci_ioda_dma_set_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
};
static void __init pnv_pci_init_ioda_phb(struct device_node *np,
@@ -2806,9 +3204,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask;
- /* Setup shutdown function for kexec */
- phb->shutdown = pnv_pci_ioda_shutdown;
-
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 7f826e8cc1d4..f2bdfea3b68d 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -83,16 +83,32 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
#endif /* CONFIG_PCI_MSI */
+static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
+ .set = pnv_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .exchange = pnv_tce_xchg,
+#endif
+ .clear = pnv_tce_free,
+ .get = pnv_tce_get,
+};
+
static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
struct pci_dev *pdev)
{
- if (phb->p5ioc2.iommu_table.it_map == NULL) {
- iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
- iommu_register_group(&phb->p5ioc2.iommu_table,
+ struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0];
+
+ if (!tbl->it_map) {
+ tbl->it_ops = &pnv_p5ioc2_iommu_ops;
+ iommu_init_table(tbl, phb->hose->node);
+ iommu_register_group(&phb->p5ioc2.table_group,
pci_domain_nr(phb->hose->bus), phb->opal_id);
+ INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+ pnv_pci_link_table_and_group(phb->hose->node, 0,
+ tbl, &phb->p5ioc2.table_group);
}
- set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table);
+ set_iommu_table_base(&pdev->dev, tbl);
+ iommu_add_device(&pdev->dev);
}
static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = {
@@ -111,6 +127,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
u64 phb_id;
int64_t rc;
static int primary = 1;
+ struct iommu_table_group *table_group;
+ struct iommu_table *tbl;
pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
@@ -180,6 +198,15 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
tce_mem, tce_size, 0,
IOMMU_PAGE_SHIFT_4K);
+ /*
+ * We do not allocate iommu_table as we do not support
+ * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table()
+ * should not be called for phb->p5ioc2.table_group.tables[0] ever.
+ */
+ tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table;
+ table_group = &phb->p5ioc2.table_group;
+ table_group->tce32_start = tbl->it_offset << tbl->it_page_shift;
+ table_group->tce32_size = tbl->it_size << tbl->it_page_shift;
}
void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 8a557b5aabf7..765d8ed558d0 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -572,80 +572,152 @@ struct pci_ops pnv_pci_ops = {
.write = pnv_pci_write_config,
};
-static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr, enum dma_data_direction direction,
- struct dma_attrs *attrs, bool rm)
+static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
{
- u64 proto_tce;
- __be64 *tcep, *tces;
- u64 rpn;
-
- proto_tce = TCE_PCI_READ; // Read allowed
+ __be64 *tmp = ((__be64 *)tbl->it_base);
+ int level = tbl->it_indirect_levels;
+ const long shift = ilog2(tbl->it_level_size);
+ unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+
+ while (level) {
+ int n = (idx & mask) >> (level * shift);
+ unsigned long tce = be64_to_cpu(tmp[n]);
+
+ tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+ idx &= ~mask;
+ mask >>= shift;
+ --level;
+ }
- if (direction != DMA_TO_DEVICE)
- proto_tce |= TCE_PCI_WRITE;
+ return tmp + idx;
+}
- tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
- rpn = __pa(uaddr) >> tbl->it_page_shift;
+int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ u64 proto_tce = iommu_direction_to_tce_perm(direction);
+ u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+ long i;
- while (npages--)
- *(tcep++) = cpu_to_be64(proto_tce |
- (rpn++ << tbl->it_page_shift));
+ for (i = 0; i < npages; i++) {
+ unsigned long newtce = proto_tce |
+ ((rpn + i) << tbl->it_page_shift);
+ unsigned long idx = index - tbl->it_offset + i;
- /* Some implementations won't cache invalid TCEs and thus may not
- * need that flush. We'll probably turn it_type into a bit mask
- * of flags if that becomes the case
- */
- if (tbl->it_type & TCE_PCI_SWINV_CREATE)
- pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+ *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
+ }
return 0;
}
-static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr,
- enum dma_data_direction direction,
- struct dma_attrs *attrs)
+#ifdef CONFIG_IOMMU_API
+int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
{
- return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
- false);
+ u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+ unsigned long newtce = *hpa | proto_tce, oldtce;
+ unsigned long idx = index - tbl->it_offset;
+
+ BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
+ oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
+ *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ *direction = iommu_tce_direction(oldtce);
+
+ return 0;
}
+#endif
-static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
- bool rm)
+void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
{
- __be64 *tcep, *tces;
+ long i;
- tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
+ for (i = 0; i < npages; i++) {
+ unsigned long idx = index - tbl->it_offset + i;
- while (npages--)
- *(tcep++) = cpu_to_be64(0);
+ *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
+ }
+}
- if (tbl->it_type & TCE_PCI_SWINV_FREE)
- pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+ return *(pnv_tce(tbl, index - tbl->it_offset));
}
-static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
+struct iommu_table *pnv_pci_table_alloc(int nid)
{
- pnv_tce_free(tbl, index, npages, false);
+ struct iommu_table *tbl;
+
+ tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
+ INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+
+ return tbl;
}
-static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+long pnv_pci_link_table_and_group(int node, int num,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group)
{
- return ((u64 *)tbl->it_base)[index - tbl->it_offset];
+ struct iommu_table_group_link *tgl = NULL;
+
+ if (WARN_ON(!tbl || !table_group))
+ return -EINVAL;
+
+ tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+ node);
+ if (!tgl)
+ return -ENOMEM;
+
+ tgl->table_group = table_group;
+ list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+ table_group->tables[num] = tbl;
+
+ return 0;
}
-static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr,
- enum dma_data_direction direction,
- struct dma_attrs *attrs)
+static void pnv_iommu_table_group_link_free(struct rcu_head *head)
{
- return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
+ struct iommu_table_group_link *tgl = container_of(head,
+ struct iommu_table_group_link, rcu);
+
+ kfree(tgl);
}
-static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
+void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+ struct iommu_table_group *table_group)
{
- pnv_tce_free(tbl, index, npages, true);
+ long i;
+ bool found;
+ struct iommu_table_group_link *tgl;
+
+ if (!tbl || !table_group)
+ return;
+
+ /* Remove link to a group from table's list of attached groups */
+ found = false;
+ list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ if (tgl->table_group == table_group) {
+ list_del_rcu(&tgl->next);
+ call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+ found = true;
+ break;
+ }
+ }
+ if (WARN_ON(!found))
+ return;
+
+ /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
+ found = false;
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ if (table_group->tables[i] == tbl) {
+ table_group->tables[i] = NULL;
+ found = true;
+ break;
+ }
+ }
+ WARN_ON(!found);
}
void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
@@ -704,12 +776,9 @@ void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
- list_for_each_entry(hose, &hose_list, list_node) {
- struct pnv_phb *phb = hose->private_data;
-
- if (phb && phb->shutdown)
- phb->shutdown(phb);
- }
+ list_for_each_entry(hose, &hose_list, list_node)
+ if (hose->controller_ops.shutdown)
+ hose->controller_ops.shutdown(hose);
}
/* Fixup wrong class code in p7ioc and p8 root complex */
@@ -752,11 +821,6 @@ void __init pnv_pci_init(void)
pci_devs_phb_init();
/* Configure IOMMU DMA hooks */
- ppc_md.tce_build = pnv_tce_build_vm;
- ppc_md.tce_free = pnv_tce_free_vm;
- ppc_md.tce_build_rm = pnv_tce_build_rm;
- ppc_md.tce_free_rm = pnv_tce_free_rm;
- ppc_md.tce_get = pnv_tce_get;
set_pci_dma_ops(&dma_iommu_ops);
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index ac8686c853e6..8ef2d28aded0 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -57,8 +57,7 @@ struct pnv_ioda_pe {
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
int tce32_seg;
int tce32_segcount;
- struct iommu_table *tce32_table;
- phys_addr_t tce_inval_reg_phys;
+ struct iommu_table_group table_group;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
@@ -110,7 +109,6 @@ struct pnv_phb {
struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
- void (*shutdown)(struct pnv_phb *phb);
int (*init_m64)(struct pnv_phb *phb);
void (*reserve_m64_pe)(struct pnv_phb *phb);
int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
@@ -121,6 +119,7 @@ struct pnv_phb {
union {
struct {
struct iommu_table iommu_table;
+ struct iommu_table_group table_group;
} p5ioc2;
struct {
@@ -184,6 +183,12 @@ struct pnv_phb {
* boot for resource allocation purposes
*/
struct list_head pe_dma_list;
+
+ /* TCE cache invalidate registers (physical and
+ * remapped)
+ */
+ phys_addr_t tce_inval_reg_phys;
+ __be64 __iomem *tce_inval_reg;
} ioda;
};
@@ -198,6 +203,13 @@ struct pnv_phb {
};
extern struct pci_ops pnv_pci_ops;
+extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ struct dma_attrs *attrs);
+extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction);
+extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
unsigned char *log_buff);
@@ -205,6 +217,13 @@ int pnv_pci_cfg_read(struct pci_dn *pdn,
int where, int size, u32 *val);
int pnv_pci_cfg_write(struct pci_dn *pdn,
int where, int size, u32 val);
+extern struct iommu_table *pnv_pci_table_alloc(int nid);
+
+extern long pnv_pci_link_table_and_group(int node, int num,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group);
+extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+ struct iommu_table_group *table_group);
extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned page_shift);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 61d5a17f45c0..10510dea16b3 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -36,6 +36,8 @@
#include <linux/crash_dump.h>
#include <linux/memory.h>
#include <linux/of.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -51,6 +53,73 @@
#include "pseries.h"
+static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+{
+ struct iommu_table_group *table_group = NULL;
+ struct iommu_table *tbl = NULL;
+ struct iommu_table_group_link *tgl = NULL;
+
+ table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
+ node);
+ if (!table_group)
+ goto fail_exit;
+
+ tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
+ if (!tbl)
+ goto fail_exit;
+
+ tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+ node);
+ if (!tgl)
+ goto fail_exit;
+
+ INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+ tgl->table_group = table_group;
+ list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+ table_group->tables[0] = tbl;
+
+ return table_group;
+
+fail_exit:
+ kfree(tgl);
+ kfree(table_group);
+ kfree(tbl);
+
+ return NULL;
+}
+
+static void iommu_pseries_free_group(struct iommu_table_group *table_group,
+ const char *node_name)
+{
+ struct iommu_table *tbl;
+#ifdef CONFIG_IOMMU_API
+ struct iommu_table_group_link *tgl;
+#endif
+
+ if (!table_group)
+ return;
+
+ tbl = table_group->tables[0];
+#ifdef CONFIG_IOMMU_API
+ tgl = list_first_entry_or_null(&tbl->it_group_list,
+ struct iommu_table_group_link, next);
+
+ WARN_ON_ONCE(!tgl);
+ if (tgl) {
+ list_del_rcu(&tgl->next);
+ kfree(tgl);
+ }
+ if (table_group->group) {
+ iommu_group_put(table_group->group);
+ BUG_ON(table_group->group);
+ }
+#endif
+ iommu_free_table(tbl, node_name);
+
+ kfree(table_group);
+}
+
static void tce_invalidate_pSeries_sw(struct iommu_table *tbl,
__be64 *startp, __be64 *endp)
{
@@ -193,7 +262,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
int ret = 0;
unsigned long flags;
- if (npages == 1) {
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
direction, attrs);
}
@@ -285,6 +354,9 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
{
u64 rc;
+ if (!firmware_has_feature(FW_FEATURE_MULTITCE))
+ return tce_free_pSeriesLP(tbl, tcenum, npages);
+
rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
if (rc && printk_ratelimit()) {
@@ -460,7 +532,6 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
}
-
#ifdef CONFIG_PCI
static void iommu_table_setparms(struct pci_controller *phb,
struct device_node *dn,
@@ -546,6 +617,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
tbl->it_size = size >> tbl->it_page_shift;
}
+struct iommu_table_ops iommu_table_pseries_ops = {
+ .set = tce_build_pSeries,
+ .clear = tce_free_pSeries,
+ .get = tce_get_pseries
+};
+
static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
{
struct device_node *dn;
@@ -610,12 +687,13 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
pci->phb->dma_window_size = 0x8000000ul;
pci->phb->dma_window_base_cur = 0x8000000ul;
- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
- pci->phb->node);
+ pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+ tbl = pci->table_group->tables[0];
iommu_table_setparms(pci->phb, dn, tbl);
- pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
- iommu_register_group(tbl, pci_domain_nr(bus), 0);
+ tbl->it_ops = &iommu_table_pseries_ops;
+ iommu_init_table(tbl, pci->phb->node);
+ iommu_register_group(pci->table_group, pci_domain_nr(bus), 0);
/* Divide the rest (1.75GB) among the children */
pci->phb->dma_window_size = 0x80000000ul;
@@ -625,6 +703,11 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
}
+struct iommu_table_ops iommu_table_lpar_multi_ops = {
+ .set = tce_buildmulti_pSeriesLP,
+ .clear = tce_freemulti_pSeriesLP,
+ .get = tce_get_pSeriesLP
+};
static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
{
@@ -653,15 +736,17 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
ppci = PCI_DN(pdn);
pr_debug(" parent is %s, iommu_table: 0x%p\n",
- pdn->full_name, ppci->iommu_table);
+ pdn->full_name, ppci->table_group);
- if (!ppci->iommu_table) {
- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
- ppci->phb->node);
+ if (!ppci->table_group) {
+ ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
+ tbl = ppci->table_group->tables[0];
iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
- ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
- iommu_register_group(tbl, pci_domain_nr(bus), 0);
- pr_debug(" created table: %p\n", ppci->iommu_table);
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ iommu_init_table(tbl, ppci->phb->node);
+ iommu_register_group(ppci->table_group,
+ pci_domain_nr(bus), 0);
+ pr_debug(" created table: %p\n", ppci->table_group);
}
}
@@ -683,13 +768,15 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
struct pci_controller *phb = PCI_DN(dn)->phb;
pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
- phb->node);
+ PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
+ tbl = PCI_DN(dn)->table_group->tables[0];
iommu_table_setparms(phb, dn, tbl);
- PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
- iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
- set_iommu_table_base_and_group(&dev->dev,
- PCI_DN(dn)->iommu_table);
+ tbl->it_ops = &iommu_table_pseries_ops;
+ iommu_init_table(tbl, phb->node);
+ iommu_register_group(PCI_DN(dn)->table_group,
+ pci_domain_nr(phb->bus), 0);
+ set_iommu_table_base(&dev->dev, tbl);
+ iommu_add_device(&dev->dev);
return;
}
@@ -697,13 +784,14 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
* an already allocated iommu table is found and use that.
*/
- while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL)
+ while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
dn = dn->parent;
- if (dn && PCI_DN(dn))
- set_iommu_table_base_and_group(&dev->dev,
- PCI_DN(dn)->iommu_table);
- else
+ if (dn && PCI_DN(dn)) {
+ set_iommu_table_base(&dev->dev,
+ PCI_DN(dn)->table_group->tables[0]);
+ iommu_add_device(&dev->dev);
+ } else
printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
pci_name(dev));
}
@@ -1088,7 +1176,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
dn = pci_device_to_OF_node(dev);
pr_debug(" node is %s\n", dn->full_name);
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
pdn = pdn->parent) {
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
if (dma_window)
@@ -1104,18 +1192,21 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
pr_debug(" parent is %s\n", pdn->full_name);
pci = PCI_DN(pdn);
- if (!pci->iommu_table) {
- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
- pci->phb->node);
+ if (!pci->table_group) {
+ pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+ tbl = pci->table_group->tables[0];
iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
- pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
- iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0);
- pr_debug(" created table: %p\n", pci->iommu_table);
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ iommu_init_table(tbl, pci->phb->node);
+ iommu_register_group(pci->table_group,
+ pci_domain_nr(pci->phb->bus), 0);
+ pr_debug(" created table: %p\n", pci->table_group);
} else {
- pr_debug(" found DMA window, table: %p\n", pci->iommu_table);
+ pr_debug(" found DMA window, table: %p\n", pci->table_group);
}
- set_iommu_table_base_and_group(&dev->dev, pci->iommu_table);
+ set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
+ iommu_add_device(&dev->dev);
}
static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
@@ -1145,7 +1236,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
* search upwards in the tree until we either hit a dma-window
* property, OR find a parent with a table already allocated.
*/
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
pdn = pdn->parent) {
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
if (dma_window)
@@ -1189,7 +1280,7 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
dn = pci_device_to_OF_node(pdev);
/* search upwards for ibm,dma-window */
- for (; dn && PCI_DN(dn) && !PCI_DN(dn)->iommu_table;
+ for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
dn = dn->parent)
if (of_get_property(dn, "ibm,dma-window", NULL))
break;
@@ -1269,8 +1360,9 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
* the device node.
*/
remove_ddw(np, false);
- if (pci && pci->iommu_table)
- iommu_free_table(pci->iommu_table, np->full_name);
+ if (pci && pci->table_group)
+ iommu_pseries_free_group(pci->table_group,
+ np->full_name);
spin_lock(&direct_window_list_lock);
list_for_each_entry(window, &direct_window_list, list) {
@@ -1300,22 +1392,11 @@ void iommu_init_early_pSeries(void)
return;
if (firmware_has_feature(FW_FEATURE_LPAR)) {
- if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
- ppc_md.tce_build = tce_buildmulti_pSeriesLP;
- ppc_md.tce_free = tce_freemulti_pSeriesLP;
- } else {
- ppc_md.tce_build = tce_build_pSeriesLP;
- ppc_md.tce_free = tce_free_pSeriesLP;
- }
- ppc_md.tce_get = tce_get_pSeriesLP;
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
} else {
- ppc_md.tce_build = tce_build_pSeries;
- ppc_md.tce_free = tce_free_pSeries;
- ppc_md.tce_get = tce_get_pseries;
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
}
@@ -1333,8 +1414,6 @@ static int __init disable_multitce(char *str)
firmware_has_feature(FW_FEATURE_LPAR) &&
firmware_has_feature(FW_FEATURE_MULTITCE)) {
printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
- ppc_md.tce_build = tce_build_pSeriesLP;
- ppc_md.tce_free = tce_free_pSeriesLP;
powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
}
return 1;
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index d00a5663e312..90bcdfeedf48 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -286,6 +286,12 @@ static int __init dart_init(struct device_node *dart_node)
return 0;
}
+static struct iommu_table_ops iommu_dart_ops = {
+ .set = dart_build,
+ .clear = dart_free,
+ .flush = dart_flush,
+};
+
static void iommu_table_dart_setup(void)
{
iommu_table_dart.it_busno = 0;
@@ -298,6 +304,7 @@ static void iommu_table_dart_setup(void)
iommu_table_dart.it_base = (unsigned long)dart_vbase;
iommu_table_dart.it_index = 0;
iommu_table_dart.it_blocksize = 1;
+ iommu_table_dart.it_ops = &iommu_dart_ops;
iommu_init_table(&iommu_table_dart, -1);
/* Reserve the last page of the DART to avoid possible prefetch
@@ -386,11 +393,6 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
if (dart_init(dn) != 0)
goto bail;
- /* Setup low level TCE operations for the core IOMMU code */
- ppc_md.tce_build = dart_build;
- ppc_md.tce_free = dart_free;
- ppc_md.tce_flush = dart_flush;
-
/* Setup bypass if supported */
if (dart_is_u4)
ppc_md.dma_set_mask = dart_dma_set_mask;
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 1cd057f11725..d77345338671 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -198,7 +198,7 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct irq_data *idata = irq_desc_get_irq_data(desc);
- struct uic *uic = irq_get_handler_data(virq);
+ struct uic *uic = irq_desc_get_handler_data(desc);
u32 msr;
int src;
int subvirq;
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 2fc4cf1b7557..eae32654bdf2 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -147,12 +147,16 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
{
kvmppc_set_host_ipi(cpu, 1);
#ifdef CONFIG_PPC_DOORBELL
- if (cpu_has_feature(CPU_FTR_DBELL) &&
- (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id()))))
- doorbell_cause_ipi(cpu, data);
- else
+ if (cpu_has_feature(CPU_FTR_DBELL)) {
+ if (cpumask_test_cpu(cpu, cpu_sibling_mask(get_cpu()))) {
+ doorbell_cause_ipi(cpu, data);
+ put_cpu();
+ return;
+ }
+ put_cpu();
+ }
#endif
- icp_native_set_qirr(cpu, IPI_PRIORITY);
+ icp_native_set_qirr(cpu, IPI_PRIORITY);
}
/*
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 5bc5889d4acc..08c248eb491b 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -227,7 +227,7 @@ void xics_migrate_irqs_away(void)
/* Locate interrupt server */
server = -1;
- ics = irq_get_chip_data(virq);
+ ics = irq_desc_get_chip_data(desc);
if (ics)
server = ics->get_server(ics, irq);
if (server < 0) {