summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig32
-rw-r--r--arch/s390/appldata/appldata_mem.c18
-rw-r--r--arch/s390/appldata/appldata_net_sum.c18
-rw-r--r--arch/s390/boot/compressed/Makefile9
-rw-r--r--arch/s390/boot/compressed/misc.c4
-rw-r--r--arch/s390/defconfig39
-rw-r--r--arch/s390/hypfs/hypfs.h13
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c2
-rw-r--r--arch/s390/hypfs/hypfs_diag.c58
-rw-r--r--arch/s390/hypfs/hypfs_vm.c65
-rw-r--r--arch/s390/hypfs/inode.c36
-rw-r--r--arch/s390/include/asm/airq.h82
-rw-r--r--arch/s390/include/asm/bitops.h14
-rw-r--r--arch/s390/include/asm/cio.h1
-rw-r--r--arch/s390/include/asm/cputime.h3
-rw-r--r--arch/s390/include/asm/dma-mapping.h3
-rw-r--r--arch/s390/include/asm/facility.h17
-rw-r--r--arch/s390/include/asm/hardirq.h5
-rw-r--r--arch/s390/include/asm/hugetlb.h135
-rw-r--r--arch/s390/include/asm/hw_irq.h17
-rw-r--r--arch/s390/include/asm/io.h22
-rw-r--r--arch/s390/include/asm/irq.h49
-rw-r--r--arch/s390/include/asm/kprobes.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h26
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h22
-rw-r--r--arch/s390/include/asm/mutex.h2
-rw-r--r--arch/s390/include/asm/page.h19
-rw-r--r--arch/s390/include/asm/pci.h60
-rw-r--r--arch/s390/include/asm/pci_insn.h12
-rw-r--r--arch/s390/include/asm/pci_io.h10
-rw-r--r--arch/s390/include/asm/perf_event.h10
-rw-r--r--arch/s390/include/asm/pgalloc.h3
-rw-r--r--arch/s390/include/asm/pgtable.h733
-rw-r--r--arch/s390/include/asm/processor.h14
-rw-r--r--arch/s390/include/asm/ptrace.h1
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/serial.h6
-rw-r--r--arch/s390/include/asm/spinlock.h5
-rw-r--r--arch/s390/include/asm/switch_to.h13
-rw-r--r--arch/s390/include/asm/tlb.h11
-rw-r--r--arch/s390/include/asm/tlbflush.h6
-rw-r--r--arch/s390/include/asm/vtime.h7
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/chsc.h13
-rw-r--r--arch/s390/include/uapi/asm/dasd.h4
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h1
-rw-r--r--arch/s390/include/uapi/asm/sclp_ctl.h24
-rw-r--r--arch/s390/include/uapi/asm/socket.h2
-rw-r--r--arch/s390/kernel/asm-offsets.c4
-rw-r--r--arch/s390/kernel/cache.c15
-rw-r--r--arch/s390/kernel/compat_linux.c9
-rw-r--r--arch/s390/kernel/compat_signal.c10
-rw-r--r--arch/s390/kernel/crash_dump.c268
-rw-r--r--arch/s390/kernel/dumpstack.c32
-rw-r--r--arch/s390/kernel/entry.S28
-rw-r--r--arch/s390/kernel/entry.h18
-rw-r--r--arch/s390/kernel/entry64.S108
-rw-r--r--arch/s390/kernel/ftrace.c5
-rw-r--r--arch/s390/kernel/ipl.c8
-rw-r--r--arch/s390/kernel/irq.c173
-rw-r--r--arch/s390/kernel/kprobes.c165
-rw-r--r--arch/s390/kernel/machine_kexec.c2
-rw-r--r--arch/s390/kernel/nmi.c5
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c8
-rw-r--r--arch/s390/kernel/perf_event.c58
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/processor.c2
-rw-r--r--arch/s390/kernel/ptrace.c52
-rw-r--r--arch/s390/kernel/runtime_instr.c4
-rw-r--r--arch/s390/kernel/s390_ksyms.c1
-rw-r--r--arch/s390/kernel/sclp.S2
-rw-r--r--arch/s390/kernel/setup.c5
-rw-r--r--arch/s390/kernel/smp.c24
-rw-r--r--arch/s390/kernel/suspend.c12
-rw-r--r--arch/s390/kernel/swsusp_asm64.S7
-rw-r--r--arch/s390/kernel/sysinfo.c2
-rw-r--r--arch/s390/kernel/time.c1
-rw-r--r--arch/s390/kernel/vdso.c6
-rw-r--r--arch/s390/kernel/vtime.c7
-rw-r--r--arch/s390/kvm/Makefile3
-rw-r--r--arch/s390/kvm/diag.c20
-rw-r--r--arch/s390/kvm/gaccess.h12
-rw-r--r--arch/s390/kvm/intercept.c124
-rw-r--r--arch/s390/kvm/interrupt.c18
-rw-r--r--arch/s390/kvm/kvm-s390.c151
-rw-r--r--arch/s390/kvm/kvm-s390.h24
-rw-r--r--arch/s390/kvm/priv.c298
-rw-r--r--arch/s390/kvm/sigp.c19
-rw-r--r--arch/s390/lib/delay.c2
-rw-r--r--arch/s390/lib/uaccess_pt.c16
-rw-r--r--arch/s390/mm/dump_pagetables.c18
-rw-r--r--arch/s390/mm/fault.c8
-rw-r--r--arch/s390/mm/gup.c6
-rw-r--r--arch/s390/mm/hugetlbpage.c129
-rw-r--r--arch/s390/mm/init.c25
-rw-r--r--arch/s390/mm/maccess.c1
-rw-r--r--arch/s390/mm/mem_detect.c3
-rw-r--r--arch/s390/mm/mmap.c4
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/pgtable.c307
-rw-r--r--arch/s390/mm/vmem.c15
-rw-r--r--arch/s390/net/bpf_jit_comp.c113
-rw-r--r--arch/s390/oprofile/hwsampler.c6
-rw-r--r--arch/s390/oprofile/hwsampler.h4
-rw-r--r--arch/s390/oprofile/init.c37
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c679
-rw-r--r--arch/s390/pci/pci_clp.c147
-rw-r--r--arch/s390/pci/pci_debug.c29
-rw-r--r--arch/s390/pci/pci_dma.c22
-rw-r--r--arch/s390/pci/pci_event.c2
-rw-r--r--arch/s390/pci/pci_insn.c18
-rw-r--r--arch/s390/pci/pci_msi.c142
-rw-r--r--arch/s390/pci/pci_sysfs.c47
115 files changed, 2967 insertions, 2182 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index da183c5a103c..7143793859fa 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -62,6 +62,7 @@ config S390
def_bool y
select ARCH_DISCARD_MEMBLOCK
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+ select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_INLINE_READ_LOCK
select ARCH_INLINE_READ_LOCK_BH
@@ -91,8 +92,8 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
- select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS2
@@ -102,7 +103,6 @@ config S390
select GENERIC_TIME_VSYSCALL_OLD
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
- select HAVE_ARCH_MUTEX_CPU_RELAX
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
@@ -118,6 +118,7 @@ config S390
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZ4
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
select HAVE_KERNEL_XZ
@@ -133,15 +134,15 @@ config S390
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UID16 if 32BIT
select HAVE_VIRT_CPU_ACCOUNTING
- select VIRT_TO_BUS
select INIT_ALL_POSSIBLE
select KTIME_SCALAR if 32BIT
select MODULES_USE_ELF_RELA
- select OLD_SIGSUSPEND3
select OLD_SIGACTION
+ select OLD_SIGSUSPEND3
select SYSCTL_EXCEPTION_TRACE
select USE_GENERIC_SMP_HELPERS if SMP
select VIRT_CPU_ACCOUNTING
+ select VIRT_TO_BUS
config SCHED_OMIT_FRAME_POINTER
def_bool y
@@ -227,11 +228,12 @@ config MARCH_Z196
not work on older machines.
config MARCH_ZEC12
- bool "IBM zEC12"
+ bool "IBM zBC12 and zEC12"
select HAVE_MARCH_ZEC12_FEATURES if 64BIT
help
- Select this to enable optimizations for IBM zEC12 (2827 series). The
- kernel will be slightly faster but will not work on older machines.
+ Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
+ 2827 series). The kernel will be slightly faster but will not work on
+ older machines.
endchoice
@@ -301,7 +303,6 @@ config HOTPLUG_CPU
def_bool y
prompt "Support for hot-pluggable CPUs"
depends on SMP
- select HOTPLUG
help
Say Y here to be able to turn CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu/cpu#.
@@ -429,7 +430,6 @@ menuconfig PCI
bool "PCI support"
default n
depends on 64BIT
- select ARCH_SUPPORTS_MSI
select PCI_MSI
help
Enable PCI support.
@@ -444,6 +444,16 @@ config PCI_NR_FUNCTIONS
This allows you to specify the maximum number of PCI functions which
this kernel will support.
+config PCI_NR_MSI
+ int "Maximum number of MSI interrupts (64-32768)"
+ range 64 32768
+ default "256"
+ help
+ This defines the number of virtual interrupts the kernel will
+ provide for MSI interrupts. If you configure your system to have
+ too few drivers will fail to allocate MSI interrupts for all
+ PCI devices.
+
source "drivers/pci/Kconfig"
source "drivers/pci/pcie/Kconfig"
source "drivers/pci/hotplug/Kconfig"
@@ -515,6 +525,7 @@ config CRASH_DUMP
bool "kernel crash dumps"
depends on 64BIT && SMP
select KEXEC
+ select ZFCPDUMP
help
Generate crash dump after being started by kexec.
Crash dump kernels are loaded in the main kernel with kexec-tools
@@ -525,7 +536,7 @@ config CRASH_DUMP
config ZFCPDUMP
def_bool n
prompt "zfcpdump support"
- select SMP
+ depends on SMP
help
Select this option if you want to build an zfcpdump enabled kernel.
Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
@@ -710,6 +721,7 @@ config S390_GUEST
def_bool y
prompt "s390 support for virtio devices"
depends on 64BIT
+ select TTY
select VIRTUALIZATION
select VIRTIO
select VIRTIO_CONSOLE
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 7ef60b52d6e0..42be53743133 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -32,7 +32,7 @@
* book:
* http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
-static struct appldata_mem_data {
+struct appldata_mem_data {
u64 timestamp;
u32 sync_count_1; /* after VM collected the record data, */
u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
@@ -63,7 +63,7 @@ static struct appldata_mem_data {
u64 pgmajfault; /* page faults (major only) */
// <-- New in 2.6
-} __attribute__((packed)) appldata_mem_data;
+} __packed;
/*
@@ -118,7 +118,6 @@ static struct appldata_ops ops = {
.record_nr = APPLDATA_RECORD_MEM_ID,
.size = sizeof(struct appldata_mem_data),
.callback = &appldata_get_mem_data,
- .data = &appldata_mem_data,
.owner = THIS_MODULE,
.mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */
};
@@ -131,7 +130,17 @@ static struct appldata_ops ops = {
*/
static int __init appldata_mem_init(void)
{
- return appldata_register_ops(&ops);
+ int ret;
+
+ ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL);
+ if (!ops.data)
+ return -ENOMEM;
+
+ ret = appldata_register_ops(&ops);
+ if (ret)
+ kfree(ops.data);
+
+ return ret;
}
/*
@@ -142,6 +151,7 @@ static int __init appldata_mem_init(void)
static void __exit appldata_mem_exit(void)
{
appldata_unregister_ops(&ops);
+ kfree(ops.data);
}
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 2d224b945355..66037d2622b4 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -29,7 +29,7 @@
* book:
* http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
-static struct appldata_net_sum_data {
+struct appldata_net_sum_data {
u64 timestamp;
u32 sync_count_1; /* after VM collected the record data, */
u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
@@ -51,7 +51,7 @@ static struct appldata_net_sum_data {
u64 rx_dropped; /* no space in linux buffers */
u64 tx_dropped; /* no space available in linux */
u64 collisions; /* collisions while transmitting */
-} __attribute__((packed)) appldata_net_sum_data;
+} __packed;
/*
@@ -121,7 +121,6 @@ static struct appldata_ops ops = {
.record_nr = APPLDATA_RECORD_NET_SUM_ID,
.size = sizeof(struct appldata_net_sum_data),
.callback = &appldata_get_net_sum_data,
- .data = &appldata_net_sum_data,
.owner = THIS_MODULE,
.mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */
};
@@ -134,7 +133,17 @@ static struct appldata_ops ops = {
*/
static int __init appldata_net_init(void)
{
- return appldata_register_ops(&ops);
+ int ret;
+
+ ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL);
+ if (!ops.data)
+ return -ENOMEM;
+
+ ret = appldata_register_ops(&ops);
+ if (ret)
+ kfree(ops.data);
+
+ return ret;
}
/*
@@ -145,6 +154,7 @@ static int __init appldata_net_init(void)
static void __exit appldata_net_exit(void)
{
appldata_unregister_ops(&ops);
+ kfree(ops.data);
}
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 3ad8f61c9985..866ecbe670e4 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -6,9 +6,9 @@
BITS := $(if $(CONFIG_64BIT),64,31)
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
- vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \
- sizes.h head$(BITS).o
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += misc.o piggy.o sizes.h head$(BITS).o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
@@ -48,6 +48,7 @@ vmlinux.bin.all-y := $(obj)/vmlinux.bin
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
suffix-$(CONFIG_KERNEL_LZMA) := lzma
suffix-$(CONFIG_KERNEL_LZO) := lzo
suffix-$(CONFIG_KERNEL_XZ) := xz
@@ -56,6 +57,8 @@ $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
$(call if_changed,gzip)
$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
$(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+ $(call if_changed,lz4)
$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
$(call if_changed,lzma)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index c4c6a1cf221b..57cbaff1f397 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -47,6 +47,10 @@ static unsigned long free_mem_end_ptr;
#include "../../../../lib/decompress_bunzip2.c"
#endif
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
#ifdef CONFIG_KERNEL_LZMA
#include "../../../../lib/decompress_unlzma.c"
#endif
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index b74400e3e035..d204c65bf722 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -1,14 +1,13 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -27,6 +26,7 @@ CONFIG_RD_BZIP2=y
CONFIG_RD_LZMA=y
CONFIG_RD_XZ=y
CONFIG_RD_LZO=y
+CONFIG_RD_LZ4=y
CONFIG_EXPERT=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
@@ -38,11 +38,13 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
+# CONFIG_EFI_PARTITION is not set
CONFIG_DEFAULT_DEADLINE=y
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
@@ -92,40 +94,49 @@ CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SCAN_ASYNC=y
CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=y
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_EQUALIZER=m
CONFIG_TUN=m
CONFIG_VIRTIO_NET=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
CONFIG_RAW_DRIVER=m
CONFIG_VIRTIO_BALLOON=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_FANOTIFY=y
+CONFIG_FUSE_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
CONFIG_TIMER_STATS=y
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_PROVE_RCU=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_RCU_TRACE=y
-CONFIG_KPROBES_SANITY_TEST=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_PAGEALLOC=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_KPROBES_SANITY_TEST=y
# CONFIG_STRICT_DEVMEM is not set
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
CONFIG_CRYPTO_TEST=m
@@ -137,8 +148,10 @@ CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
@@ -165,6 +178,8 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_ZLIB=m
CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
CONFIG_ZCRYPT=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index f41e0ef7fdf9..79f2ac55253f 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -18,26 +18,23 @@
#define UPDATE_FILE_MODE 0220
#define DIR_MODE 0550
-extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
- const char *name);
+extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
-extern struct dentry *hypfs_create_u64(struct super_block *sb,
- struct dentry *dir, const char *name,
+extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
__u64 value);
-extern struct dentry *hypfs_create_str(struct super_block *sb,
- struct dentry *dir, const char *name,
+extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
char *string);
/* LPAR Hypervisor */
extern int hypfs_diag_init(void);
extern void hypfs_diag_exit(void);
-extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root);
+extern int hypfs_diag_create_files(struct dentry *root);
/* VM Hypervisor */
extern int hypfs_vm_init(void);
extern void hypfs_vm_exit(void);
-extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root);
+extern int hypfs_vm_create_files(struct dentry *root);
/* debugfs interface */
struct hypfs_dbfs_file;
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index bb5dd496614f..17ab8b7b53cc 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -105,7 +105,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
int hypfs_dbfs_init(void)
{
dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
- return PTR_RET(dbfs_dir);
+ return PTR_ERR_OR_ZERO(dbfs_dir);
}
void hypfs_dbfs_exit(void)
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 7fd3690b6760..5eeffeefae06 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -623,8 +623,7 @@ void hypfs_diag_exit(void)
* *******************************************
*/
-static int hypfs_create_cpu_files(struct super_block *sb,
- struct dentry *cpus_dir, void *cpu_info)
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
{
struct dentry *cpu_dir;
char buffer[TMP_SIZE];
@@ -632,32 +631,29 @@ static int hypfs_create_cpu_files(struct super_block *sb,
snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
cpu_info));
- cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
- rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+ cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+ rc = hypfs_create_u64(cpu_dir, "mgmtime",
cpu_info__acc_time(diag204_info_type, cpu_info) -
cpu_info__lp_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
- rc = hypfs_create_u64(sb, cpu_dir, "cputime",
+ rc = hypfs_create_u64(cpu_dir, "cputime",
cpu_info__lp_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
if (diag204_info_type == INFO_EXT) {
- rc = hypfs_create_u64(sb, cpu_dir, "onlinetime",
+ rc = hypfs_create_u64(cpu_dir, "onlinetime",
cpu_info__online_time(diag204_info_type,
cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
}
diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
- rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
- if (IS_ERR(rc))
- return PTR_ERR(rc);
- return 0;
+ rc = hypfs_create_str(cpu_dir, "type", buffer);
+ return PTR_RET(rc);
}
-static void *hypfs_create_lpar_files(struct super_block *sb,
- struct dentry *systems_dir, void *part_hdr)
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
{
struct dentry *cpus_dir;
struct dentry *lpar_dir;
@@ -667,16 +663,16 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
lpar_name[LPAR_NAME_LEN] = 0;
- lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name);
+ lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
if (IS_ERR(lpar_dir))
return lpar_dir;
- cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus");
+ cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
if (IS_ERR(cpus_dir))
return cpus_dir;
cpu_info = part_hdr + part_hdr__size(diag204_info_type);
for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
int rc;
- rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info);
+ rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
if (rc)
return ERR_PTR(rc);
cpu_info += cpu_info__size(diag204_info_type);
@@ -684,8 +680,7 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
return cpu_info;
}
-static int hypfs_create_phys_cpu_files(struct super_block *sb,
- struct dentry *cpus_dir, void *cpu_info)
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
{
struct dentry *cpu_dir;
char buffer[TMP_SIZE];
@@ -693,34 +688,31 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb,
snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
cpu_info));
- cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
+ cpu_dir = hypfs_mkdir(cpus_dir, buffer);
if (IS_ERR(cpu_dir))
return PTR_ERR(cpu_dir);
- rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+ rc = hypfs_create_u64(cpu_dir, "mgmtime",
phys_cpu__mgm_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
- rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
- if (IS_ERR(rc))
- return PTR_ERR(rc);
- return 0;
+ rc = hypfs_create_str(cpu_dir, "type", buffer);
+ return PTR_RET(rc);
}
-static void *hypfs_create_phys_files(struct super_block *sb,
- struct dentry *parent_dir, void *phys_hdr)
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
{
int i;
void *cpu_info;
struct dentry *cpus_dir;
- cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus");
+ cpus_dir = hypfs_mkdir(parent_dir, "cpus");
if (IS_ERR(cpus_dir))
return cpus_dir;
cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
int rc;
- rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info);
+ rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
if (rc)
return ERR_PTR(rc);
cpu_info += phys_cpu__size(diag204_info_type);
@@ -728,7 +720,7 @@ static void *hypfs_create_phys_files(struct super_block *sb,
return cpu_info;
}
-int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
+int hypfs_diag_create_files(struct dentry *root)
{
struct dentry *systems_dir, *hyp_dir;
void *time_hdr, *part_hdr;
@@ -739,7 +731,7 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
if (IS_ERR(buffer))
return PTR_ERR(buffer);
- systems_dir = hypfs_mkdir(sb, root, "systems");
+ systems_dir = hypfs_mkdir(root, "systems");
if (IS_ERR(systems_dir)) {
rc = PTR_ERR(systems_dir);
goto err_out;
@@ -747,25 +739,25 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
time_hdr = (struct x_info_blk_hdr *)buffer;
part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
- part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr);
+ part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
if (IS_ERR(part_hdr)) {
rc = PTR_ERR(part_hdr);
goto err_out;
}
}
if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
- ptr = hypfs_create_phys_files(sb, root, part_hdr);
+ ptr = hypfs_create_phys_files(root, part_hdr);
if (IS_ERR(ptr)) {
rc = PTR_ERR(ptr);
goto err_out;
}
}
- hyp_dir = hypfs_mkdir(sb, root, "hyp");
+ hyp_dir = hypfs_mkdir(root, "hyp");
if (IS_ERR(hyp_dir)) {
rc = PTR_ERR(hyp_dir);
goto err_out;
}
- ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor");
+ ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
if (IS_ERR(ptr)) {
rc = PTR_ERR(ptr);
goto err_out;
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index f364dcf77e8e..24908ce149f1 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -107,16 +107,15 @@ static void diag2fc_free(const void *data)
vfree(data);
}
-#define ATTRIBUTE(sb, dir, name, member) \
+#define ATTRIBUTE(dir, name, member) \
do { \
void *rc; \
- rc = hypfs_create_u64(sb, dir, name, member); \
+ rc = hypfs_create_u64(dir, name, member); \
if (IS_ERR(rc)) \
return PTR_ERR(rc); \
} while(0)
-static int hpyfs_vm_create_guest(struct super_block *sb,
- struct dentry *systems_dir,
+static int hpyfs_vm_create_guest(struct dentry *systems_dir,
struct diag2fc_data *data)
{
char guest_name[NAME_LEN + 1] = {};
@@ -130,46 +129,46 @@ static int hpyfs_vm_create_guest(struct super_block *sb,
memcpy(guest_name, data->guest_name, NAME_LEN);
EBCASC(guest_name, NAME_LEN);
strim(guest_name);
- guest_dir = hypfs_mkdir(sb, systems_dir, guest_name);
+ guest_dir = hypfs_mkdir(systems_dir, guest_name);
if (IS_ERR(guest_dir))
return PTR_ERR(guest_dir);
- ATTRIBUTE(sb, guest_dir, "onlinetime_us", data->el_time);
+ ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
/* logical cpu information */
- cpus_dir = hypfs_mkdir(sb, guest_dir, "cpus");
+ cpus_dir = hypfs_mkdir(guest_dir, "cpus");
if (IS_ERR(cpus_dir))
return PTR_ERR(cpus_dir);
- ATTRIBUTE(sb, cpus_dir, "cputime_us", data->used_cpu);
- ATTRIBUTE(sb, cpus_dir, "capped", capped_value);
- ATTRIBUTE(sb, cpus_dir, "dedicated", dedicated_flag);
- ATTRIBUTE(sb, cpus_dir, "count", data->vcpus);
- ATTRIBUTE(sb, cpus_dir, "weight_min", data->cpu_min);
- ATTRIBUTE(sb, cpus_dir, "weight_max", data->cpu_max);
- ATTRIBUTE(sb, cpus_dir, "weight_cur", data->cpu_shares);
+ ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+ ATTRIBUTE(cpus_dir, "capped", capped_value);
+ ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+ ATTRIBUTE(cpus_dir, "count", data->vcpus);
+ ATTRIBUTE(cpus_dir, "weight_min", data->cpu_min);
+ ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+ ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
/* memory information */
- mem_dir = hypfs_mkdir(sb, guest_dir, "mem");
+ mem_dir = hypfs_mkdir(guest_dir, "mem");
if (IS_ERR(mem_dir))
return PTR_ERR(mem_dir);
- ATTRIBUTE(sb, mem_dir, "min_KiB", data->mem_min_kb);
- ATTRIBUTE(sb, mem_dir, "max_KiB", data->mem_max_kb);
- ATTRIBUTE(sb, mem_dir, "used_KiB", data->mem_used_kb);
- ATTRIBUTE(sb, mem_dir, "share_KiB", data->mem_share_kb);
+ ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+ ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+ ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+ ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
/* samples */
- samples_dir = hypfs_mkdir(sb, guest_dir, "samples");
+ samples_dir = hypfs_mkdir(guest_dir, "samples");
if (IS_ERR(samples_dir))
return PTR_ERR(samples_dir);
- ATTRIBUTE(sb, samples_dir, "cpu_using", data->cpu_use_samp);
- ATTRIBUTE(sb, samples_dir, "cpu_delay", data->cpu_delay_samp);
- ATTRIBUTE(sb, samples_dir, "mem_delay", data->page_wait_samp);
- ATTRIBUTE(sb, samples_dir, "idle", data->idle_samp);
- ATTRIBUTE(sb, samples_dir, "other", data->other_samp);
- ATTRIBUTE(sb, samples_dir, "total", data->total_samp);
+ ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+ ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+ ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+ ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+ ATTRIBUTE(samples_dir, "other", data->other_samp);
+ ATTRIBUTE(samples_dir, "total", data->total_samp);
return 0;
}
-int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
+int hypfs_vm_create_files(struct dentry *root)
{
struct dentry *dir, *file;
struct diag2fc_data *data;
@@ -181,38 +180,38 @@ int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
return PTR_ERR(data);
/* Hpervisor Info */
- dir = hypfs_mkdir(sb, root, "hyp");
+ dir = hypfs_mkdir(root, "hyp");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
- file = hypfs_create_str(sb, dir, "type", "z/VM Hypervisor");
+ file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
if (IS_ERR(file)) {
rc = PTR_ERR(file);
goto failed;
}
/* physical cpus */
- dir = hypfs_mkdir(sb, root, "cpus");
+ dir = hypfs_mkdir(root, "cpus");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
- file = hypfs_create_u64(sb, dir, "count", data->lcpus);
+ file = hypfs_create_u64(dir, "count", data->lcpus);
if (IS_ERR(file)) {
rc = PTR_ERR(file);
goto failed;
}
/* guests */
- dir = hypfs_mkdir(sb, root, "systems");
+ dir = hypfs_mkdir(root, "systems");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
for (i = 0; i < count; i++) {
- rc = hpyfs_vm_create_guest(sb, dir, &(data[i]));
+ rc = hpyfs_vm_create_guest(dir, &(data[i]));
if (rc)
goto failed;
}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 7a539f4f5e30..ddfe09b45134 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -28,8 +28,7 @@
#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */
#define TMP_SIZE 64 /* size of temporary buffers */
-static struct dentry *hypfs_create_update_file(struct super_block *sb,
- struct dentry *dir);
+static struct dentry *hypfs_create_update_file(struct dentry *dir);
struct hypfs_sb_info {
kuid_t uid; /* uid used for files and dirs */
@@ -193,9 +192,9 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
}
hypfs_delete_tree(sb->s_root);
if (MACHINE_IS_VM)
- rc = hypfs_vm_create_files(sb, sb->s_root);
+ rc = hypfs_vm_create_files(sb->s_root);
else
- rc = hypfs_diag_create_files(sb, sb->s_root);
+ rc = hypfs_diag_create_files(sb->s_root);
if (rc) {
pr_err("Updating the hypfs tree failed\n");
hypfs_delete_tree(sb->s_root);
@@ -302,12 +301,12 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
if (!root_dentry)
return -ENOMEM;
if (MACHINE_IS_VM)
- rc = hypfs_vm_create_files(sb, root_dentry);
+ rc = hypfs_vm_create_files(root_dentry);
else
- rc = hypfs_diag_create_files(sb, root_dentry);
+ rc = hypfs_diag_create_files(root_dentry);
if (rc)
return rc;
- sbi->update_file = hypfs_create_update_file(sb, root_dentry);
+ sbi->update_file = hypfs_create_update_file(root_dentry);
if (IS_ERR(sbi->update_file))
return PTR_ERR(sbi->update_file);
hypfs_update_update(sb);
@@ -334,8 +333,7 @@ static void hypfs_kill_super(struct super_block *sb)
kill_litter_super(sb);
}
-static struct dentry *hypfs_create_file(struct super_block *sb,
- struct dentry *parent, const char *name,
+static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
char *data, umode_t mode)
{
struct dentry *dentry;
@@ -347,7 +345,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
dentry = ERR_PTR(-ENOMEM);
goto fail;
}
- inode = hypfs_make_inode(sb, mode);
+ inode = hypfs_make_inode(parent->d_sb, mode);
if (!inode) {
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
@@ -373,24 +371,22 @@ fail:
return dentry;
}
-struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
- const char *name)
+struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
{
struct dentry *dentry;
- dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE);
+ dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE);
if (IS_ERR(dentry))
return dentry;
hypfs_add_dentry(dentry);
return dentry;
}
-static struct dentry *hypfs_create_update_file(struct super_block *sb,
- struct dentry *dir)
+static struct dentry *hypfs_create_update_file(struct dentry *dir)
{
struct dentry *dentry;
- dentry = hypfs_create_file(sb, dir, "update", NULL,
+ dentry = hypfs_create_file(dir, "update", NULL,
S_IFREG | UPDATE_FILE_MODE);
/*
* We do not put the update file on the 'delete' list with
@@ -400,7 +396,7 @@ static struct dentry *hypfs_create_update_file(struct super_block *sb,
return dentry;
}
-struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
+struct dentry *hypfs_create_u64(struct dentry *dir,
const char *name, __u64 value)
{
char *buffer;
@@ -412,7 +408,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
if (!buffer)
return ERR_PTR(-ENOMEM);
dentry =
- hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
+ hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
if (IS_ERR(dentry)) {
kfree(buffer);
return ERR_PTR(-ENOMEM);
@@ -421,7 +417,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
return dentry;
}
-struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
+struct dentry *hypfs_create_str(struct dentry *dir,
const char *name, char *string)
{
char *buffer;
@@ -432,7 +428,7 @@ struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
return ERR_PTR(-ENOMEM);
sprintf(buffer, "%s\n", string);
dentry =
- hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
+ hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
if (IS_ERR(dentry)) {
kfree(buffer);
return ERR_PTR(-ENOMEM);
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 9819891ed7a2..4bbb5957ed1b 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -9,9 +9,85 @@
#ifndef _ASM_S390_AIRQ_H
#define _ASM_S390_AIRQ_H
-typedef void (*adapter_int_handler_t)(void *, void *);
+#include <linux/bit_spinlock.h>
-void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8);
-void s390_unregister_adapter_interrupt(void *, u8);
+struct airq_struct {
+ struct hlist_node list; /* Handler queueing. */
+ void (*handler)(struct airq_struct *); /* Thin-interrupt handler */
+ u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
+ u8 lsi_mask; /* Local-Summary-Indicator mask */
+ u8 isc; /* Interrupt-subclass */
+ u8 flags;
+};
+
+#define AIRQ_PTR_ALLOCATED 0x01
+
+int register_adapter_interrupt(struct airq_struct *airq);
+void unregister_adapter_interrupt(struct airq_struct *airq);
+
+/* Adapter interrupt bit vector */
+struct airq_iv {
+ unsigned long *vector; /* Adapter interrupt bit vector */
+ unsigned long *avail; /* Allocation bit mask for the bit vector */
+ unsigned long *bitlock; /* Lock bit mask for the bit vector */
+ unsigned long *ptr; /* Pointer associated with each bit */
+ unsigned int *data; /* 32 bit value associated with each bit */
+ unsigned long bits; /* Number of bits in the vector */
+ unsigned long end; /* Number of highest allocated bit + 1 */
+ spinlock_t lock; /* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
+#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
+#define AIRQ_IV_DATA 8 /* Allocate the data array */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc_bit(struct airq_iv *iv);
+void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+ unsigned long end);
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+ return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+ unsigned int data)
+{
+ iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+ unsigned long ptr)
+{
+ iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->ptr[bit];
+}
#endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 4d8604e311f3..10135a38673c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -216,7 +216,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
asm volatile(
" oc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+ : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
}
static inline void
@@ -244,7 +244,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr)
addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
asm volatile(
" nc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" );
+ : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc");
}
static inline void
@@ -271,7 +271,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
asm volatile(
" xc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+ : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
}
static inline void
@@ -301,7 +301,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr)
ch = *(unsigned char *) addr;
asm volatile(
" oc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
+ : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
: "cc", "memory");
return (ch >> (nr & 7)) & 1;
}
@@ -320,7 +320,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr)
ch = *(unsigned char *) addr;
asm volatile(
" nc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7])
+ : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7])
: "cc", "memory");
return (ch >> (nr & 7)) & 1;
}
@@ -339,7 +339,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr)
ch = *(unsigned char *) addr;
asm volatile(
" xc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
+ : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
: "cc", "memory");
return (ch >> (nr & 7)) & 1;
}
@@ -693,7 +693,7 @@ static inline int find_next_bit_left(const unsigned long *addr,
size -= offset;
p = addr + offset / BITS_PER_LONG;
if (bit) {
- set = __flo_word(0, *p & (~0UL << bit));
+ set = __flo_word(0, *p & (~0UL >> bit));
if (set >= size)
return size + offset;
if (set < BITS_PER_LONG)
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index ffb898961c8d..d42625053c37 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -296,6 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
return 0;
}
+void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
extern void reipl_ccw_dev(struct ccw_dev_id *id);
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d2ff41370c0c..f65bd3634519 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
#include <asm/div64.h>
-#define __ARCH_HAS_VTIME_ACCOUNT
-#define __ARCH_HAS_VTIME_TASK_SWITCH
-
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index 886ac7d4937a..3fbc67d9e197 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -50,9 +50,10 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
struct dma_map_ops *dma_ops = get_dma_ops(dev);
+ debug_dma_mapping_error(dev, dma_addr);
if (dma_ops->mapping_error)
return dma_ops->mapping_error(dev, dma_addr);
- return (dma_addr == 0UL);
+ return dma_addr == DMA_ERROR_CODE;
}
static inline void *dma_alloc_coherent(struct device *dev, size_t size,
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 2ee66a65f2d4..0aa6a7ed95a3 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -13,6 +13,16 @@
#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */
+static inline int __test_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return 0;
+ ptr = (unsigned char *) facilities + (nr >> 3);
+ return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
/*
* The test_facility function uses the bit odering where the MSB is bit 0.
* That makes it easier to query facility bits with the bit number as
@@ -20,12 +30,7 @@
*/
static inline int test_facility(unsigned long nr)
{
- unsigned char *ptr;
-
- if (nr >= MAX_FACILITY_BIT)
- return 0;
- ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3);
- return (*ptr & (0x80 >> (nr & 7))) != 0;
+ return __test_facility(nr, &S390_lowcore.stfle_fac_list);
}
/**
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 0c82ba86e997..a908d2941c5d 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -20,4 +20,9 @@
#define HARDIRQ_BITS 8
+static inline void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index bd90359d6d22..11eae5f55b70 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -17,6 +17,9 @@
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
/*
* If the arch doesn't supply something else, assume that hugepage
@@ -38,147 +41,75 @@ static inline int prepare_hugepage_range(struct file *file,
int arch_prepare_hugepage(struct page *page);
void arch_release_hugepage(struct page *page);
-static inline pte_t huge_pte_wrprotect(pte_t pte)
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- pte_val(pte) |= _PAGE_RO;
- return pte;
+ pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
}
-static inline int huge_pte_none(pte_t pte)
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
- return (pte_val(pte) & _SEGMENT_ENTRY_INV) &&
- !(pte_val(pte) & _SEGMENT_ENTRY_RO);
+ huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
}
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
{
- pte_t pte = *ptep;
- unsigned long mask;
-
- if (!MACHINE_HAS_HPAGE) {
- ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN);
- if (ptep) {
- mask = pte_val(pte) &
- (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
- pte = pte_mkhuge(*ptep);
- pte_val(pte) |= mask;
- }
+ int changed = !pte_same(huge_ptep_get(ptep), pte);
+ if (changed) {
+ huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
- return pte;
+ return changed;
}
-static inline void __pmd_csp(pmd_t *pmdp)
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- register unsigned long reg2 asm("2") = pmd_val(*pmdp);
- register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
- _SEGMENT_ENTRY_INV;
- register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
- asm volatile(
- " csp %1,%3"
- : "=m" (*pmdp)
- : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+ pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+ set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
-static inline void huge_ptep_invalidate(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
-{
- pmd_t *pmdp = (pmd_t *) ptep;
-
- if (MACHINE_HAS_IDTE)
- __pmd_idte(address, pmdp);
- else
- __pmd_csp(pmdp);
- pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- pte_t pte = huge_ptep_get(ptep);
-
- huge_ptep_invalidate(mm, addr, ptep);
- return pte;
-}
-
-#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
-({ \
- int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \
- if (__changed) { \
- huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \
- set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \
- } \
- __changed; \
-})
-
-#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \
-({ \
- pte_t __pte = huge_ptep_get(__ptep); \
- if (huge_pte_write(__pte)) { \
- huge_ptep_invalidate(__mm, __addr, __ptep); \
- set_huge_pte_at(__mm, __addr, __ptep, \
- huge_pte_wrprotect(__pte)); \
- } \
-})
-
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
{
- huge_ptep_invalidate(vma->vm_mm, address, ptep);
+ return mk_pte(page, pgprot);
}
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+static inline int huge_pte_none(pte_t pte)
{
- pte_t pte;
- pmd_t pmd;
-
- pmd = mk_pmd_phys(page_to_phys(page), pgprot);
- pte_val(pte) = pmd_val(pmd);
- return pte;
+ return pte_none(pte);
}
static inline int huge_pte_write(pte_t pte)
{
- pmd_t pmd;
-
- pmd_val(pmd) = pte_val(pte);
- return pmd_write(pmd);
+ return pte_write(pte);
}
static inline int huge_pte_dirty(pte_t pte)
{
- /* No dirty bit in the segment table entry. */
- return 0;
+ return pte_dirty(pte);
}
static inline pte_t huge_pte_mkwrite(pte_t pte)
{
- pmd_t pmd;
-
- pmd_val(pmd) = pte_val(pte);
- pte_val(pte) = pmd_val(pmd_mkwrite(pmd));
- return pte;
+ return pte_mkwrite(pte);
}
static inline pte_t huge_pte_mkdirty(pte_t pte)
{
- /* No dirty bit in the segment table entry. */
- return pte;
+ return pte_mkdirty(pte);
}
-static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+static inline pte_t huge_pte_wrprotect(pte_t pte)
{
- pmd_t pmd;
-
- pmd_val(pmd) = pte_val(pte);
- pte_val(pte) = pmd_val(pmd_modify(pmd, newprot));
- return pte;
+ return pte_wrprotect(pte);
}
-static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
{
- pmd_clear((pmd_t *) ptep);
+ return pte_modify(pte, newprot);
}
#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index 7e3d2586c1ff..ee96a8b697f9 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -4,19 +4,8 @@
#include <linux/msi.h>
#include <linux/pci.h>
-static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
-{
- return __irq_get_msi_desc(irq);
-}
-
-/* Must be called with msi map lock held */
-static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi)
-{
- if (!msi)
- return -EINVAL;
-
- msi->irq = irq;
- return 0;
-}
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+void __init init_ext_interrupts(void);
#endif
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index fd9be010f9b2..cd6b9ee7b69c 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -13,28 +13,6 @@
#include <asm/page.h>
#include <asm/pci_io.h>
-/*
- * Change virtual addresses to physical addresses and vv.
- * These are pretty trivial
- */
-static inline unsigned long virt_to_phys(volatile void * address)
-{
- unsigned long real_address;
- asm volatile(
- " lra %0,0(%1)\n"
- " jz 0f\n"
- " la %0,0\n"
- "0:"
- : "=a" (real_address) : "a" (address) : "cc");
- return real_address;
-}
-#define virt_to_phys virt_to_phys
-
-static inline void * phys_to_virt(unsigned long address)
-{
- return (void *) address;
-}
-
void *xlate_dev_mem_ptr(unsigned long phys);
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 87c17bfb2968..5f8bcc5fe423 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -1,17 +1,28 @@
#ifndef _ASM_IRQ_H
#define _ASM_IRQ_H
+#define EXT_INTERRUPT 1
+#define IO_INTERRUPT 2
+#define THIN_INTERRUPT 3
+
+#define NR_IRQS_BASE 4
+
+#ifdef CONFIG_PCI_NR_MSI
+# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
+#else
+# define NR_IRQS NR_IRQS_BASE
+#endif
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ 0
+
+#ifndef __ASSEMBLY__
+
#include <linux/hardirq.h>
#include <linux/percpu.h>
#include <linux/cache.h>
#include <linux/types.h>
-enum interruption_main_class {
- EXTERNAL_INTERRUPT,
- IO_INTERRUPT,
- NR_IRQS
-};
-
enum interruption_class {
IRQEXT_CLK,
IRQEXT_EXC,
@@ -67,19 +78,17 @@ typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
int register_external_interrupt(u16 code, ext_int_handler_t handler);
int unregister_external_interrupt(u16 code, ext_int_handler_t handler);
-void service_subclass_irq_register(void);
-void service_subclass_irq_unregister(void);
-void measurement_alert_subclass_register(void);
-void measurement_alert_subclass_unregister(void);
-
-#ifdef CONFIG_LOCKDEP
-# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq)
-# define disable_irq_nosync_lockdep_irqsave(irq, flags) \
- disable_irq_nosync(irq)
-# define disable_irq_lockdep(irq) disable_irq(irq)
-# define enable_irq_lockdep(irq) enable_irq(irq)
-# define enable_irq_lockdep_irqrestore(irq, flags) \
- enable_irq(irq)
-#endif
+
+enum irq_subclass {
+ IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
+ IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
+};
+
+void irq_subclass_register(enum irq_subclass subclass);
+void irq_subclass_unregister(enum irq_subclass subclass);
+
+#define irq_canonicalize(irq) (irq)
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index dcf6948a875c..4176dfe0fba1 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -31,6 +31,8 @@
#include <linux/ptrace.h>
#include <linux/percpu.h>
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
struct pt_regs;
struct kprobe;
@@ -57,7 +59,7 @@ typedef u16 kprobe_opcode_t;
/* Architecture specific copy of original instruction */
struct arch_specific_insn {
/* copy of original instruction */
- kprobe_opcode_t insn[MAX_INSN_SIZE];
+ kprobe_opcode_t *insn;
};
struct prev_kprobe {
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 16bd5d169cdb..e87ecaa2c569 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -62,13 +62,20 @@ struct sca_block {
#define CPUSTAT_MCDS 0x00000100
#define CPUSTAT_SM 0x00000080
#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
#define CPUSTAT_J 0x00000002
#define CPUSTAT_P 0x00000001
struct kvm_s390_sie_block {
atomic_t cpuflags; /* 0x0000 */
__u32 prefix; /* 0x0004 */
- __u8 reserved8[32]; /* 0x0008 */
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+ __u32 prog0c; /* 0x000c */
+ __u8 reserved10[16]; /* 0x0010 */
+#define PROG_BLOCK_SIE 0x00000001
+ atomic_t prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
__u64 cputm; /* 0x0028 */
__u64 ckc; /* 0x0030 */
__u64 epoch; /* 0x0038 */
@@ -90,7 +97,8 @@ struct kvm_s390_sie_block {
__u32 scaoh; /* 0x005c */
__u8 reserved60; /* 0x0060 */
__u8 ecb; /* 0x0061 */
- __u8 reserved62[2]; /* 0x0062 */
+ __u8 ecb2; /* 0x0062 */
+ __u8 reserved63[1]; /* 0x0063 */
__u32 scaol; /* 0x0064 */
__u8 reserved68[4]; /* 0x0068 */
__u32 todpr; /* 0x006c */
@@ -130,6 +138,7 @@ struct kvm_vcpu_stat {
u32 deliver_program_int;
u32 deliver_io_int;
u32 exit_wait_state;
+ u32 instruction_pfmf;
u32 instruction_stidp;
u32 instruction_spx;
u32 instruction_stpx;
@@ -166,7 +175,7 @@ struct kvm_s390_ext_info {
};
#define PGM_OPERATION 0x01
-#define PGM_PRIVILEGED_OPERATION 0x02
+#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
#define PGM_PROTECTION 0x04
#define PGM_ADDRESSING 0x05
@@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt {
atomic_t active;
struct kvm_s390_float_interrupt *float_int;
int timer_due; /* event indicator for waitqueue below */
- wait_queue_head_t wq;
+ wait_queue_head_t *wq;
atomic_t *cpuflags;
unsigned int action_bits;
};
@@ -265,5 +274,14 @@ struct kvm_arch{
int css_support;
};
+#define KVM_HVA_ERR_BAD (-1UL)
+#define KVM_HVA_ERR_RO_BAD (-2UL)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+ return IS_ERR_VALUE(addr);
+}
+
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+extern char sie_exit;
#endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 6340178748bf..ff132ac64ddd 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -12,8 +12,6 @@ typedef struct {
unsigned long asce_bits;
unsigned long asce_limit;
unsigned long vdso_base;
- /* Cloned contexts will be created with extended page tables. */
- unsigned int alloc_pgste:1;
/* The mmu context has extended page tables. */
unsigned int has_pgste:1;
} mm_context_t;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 084e7755ed9b..9f973d8de90e 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -21,24 +21,7 @@ static inline int init_new_context(struct task_struct *tsk,
#ifdef CONFIG_64BIT
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif
- if (current->mm && current->mm->context.alloc_pgste) {
- /*
- * alloc_pgste indicates, that any NEW context will be created
- * with extended page tables. The old context is unchanged. The
- * page table allocation and the page table operations will
- * look at has_pgste to distinguish normal and extended page
- * tables. The only way to create extended page tables is to
- * set alloc_pgste and then create a new context (e.g. dup_mm).
- * The page table allocation is called after init_new_context
- * and if has_pgste is set, it will create extended page
- * tables.
- */
- mm->context.has_pgste = 1;
- mm->context.alloc_pgste = 1;
- } else {
- mm->context.has_pgste = 0;
- mm->context.alloc_pgste = 0;
- }
+ mm->context.has_pgste = 0;
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
@@ -77,8 +60,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
WARN_ON(atomic_read(&prev->context.attach_count) < 0);
atomic_inc(&next->context.attach_count);
/* Check for TLBs not flushed yet */
- if (next->context.flush_mm)
- __tlb_flush_mm(next);
+ __tlb_flush_mm_lazy(next);
}
#define enter_lazy_tlb(mm,tsk) do { } while (0)
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
index 688271f5f2e4..458c1f7fbc18 100644
--- a/arch/s390/include/asm/mutex.h
+++ b/arch/s390/include/asm/mutex.h
@@ -7,5 +7,3 @@
*/
#include <asm-generic/mutex-dec.h>
-
-#define arch_mutex_cpu_relax() barrier()
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 5d64fb7619cc..1e51f2915b2e 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -32,16 +32,6 @@
void storage_key_init_range(unsigned long start, unsigned long end);
-static inline unsigned long pfmf(unsigned long function, unsigned long address)
-{
- asm volatile(
- " .insn rre,0xb9af0000,%[function],%[address]"
- : [address] "+a" (address)
- : [function] "d" (function)
- : "memory");
- return address;
-}
-
static inline void clear_page(void *page)
{
register unsigned long reg1 asm ("1") = 0;
@@ -150,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr)
#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
-/*
- * Test and clear referenced bit in storage key.
- */
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
-static inline int page_test_and_clear_young(unsigned long pfn)
-{
- return page_reset_referenced(pfn << PAGE_SHIFT);
-}
-
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6c1801235db9..1cc185da9d38 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,6 +6,7 @@
/* must be set before including pci_clp.h */
#define PCI_BAR_COUNT 6
+#include <linux/pci.h>
#include <asm-generic/pci.h>
#include <asm-generic/pci-dma-compat.h>
#include <asm/pci_clp.h>
@@ -21,10 +22,6 @@ void pci_iounmap(struct pci_dev *, void __iomem *);
int pci_domain_nr(struct pci_bus *);
int pci_proc_domain(struct pci_bus *);
-/* MSI arch hooks */
-#define arch_setup_msi_irqs arch_setup_msi_irqs
-#define arch_teardown_msi_irqs arch_teardown_msi_irqs
-
#define ZPCI_BUS_NR 0 /* default bus number */
#define ZPCI_DEVFN 0 /* default device number */
@@ -53,14 +50,9 @@ struct zpci_fmb {
atomic64_t unmapped_pages;
} __packed __aligned(16);
-struct msi_map {
- unsigned long irq;
- struct msi_desc *msi;
- struct hlist_node msi_chain;
-};
-
-#define ZPCI_NR_MSI_VECS 64
-#define ZPCI_MSI_MASK (ZPCI_NR_MSI_VECS - 1)
+#define ZPCI_MSI_VEC_BITS 11
+#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS)
+#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1)
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
@@ -91,8 +83,7 @@ struct zpci_dev {
/* IRQ stuff */
u64 msi_addr; /* MSI address */
- struct zdev_irq_map *irq_map;
- struct msi_map *msi_map[ZPCI_NR_MSI_VECS];
+ struct airq_iv *aibv; /* adapter interrupt bit vector */
unsigned int aisb; /* number of the summary bit */
/* DMA stuff */
@@ -120,12 +111,6 @@ struct zpci_dev {
struct dentry *debugfs_dev;
struct dentry *debugfs_perf;
- struct dentry *debugfs_debug;
-};
-
-struct pci_hp_callback_ops {
- int (*create_slot) (struct zpci_dev *zdev);
- void (*remove_slot) (struct zpci_dev *zdev);
};
static inline bool zdev_enabled(struct zpci_dev *zdev)
@@ -143,37 +128,42 @@ int zpci_enable_device(struct zpci_dev *);
int zpci_disable_device(struct zpci_dev *);
void zpci_stop_device(struct zpci_dev *);
void zpci_free_device(struct zpci_dev *);
-int zpci_scan_device(struct zpci_dev *);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
/* CLP */
-int clp_find_pci_devices(void);
+int clp_scan_pci_devices(void);
+int clp_rescan_pci_devices(void);
+int clp_rescan_pci_devices_simple(void);
int clp_add_pci_device(u32, u32, int);
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
-/* MSI */
-struct msi_desc *__irq_get_msi_desc(unsigned int);
-int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32);
-int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int);
-void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *);
-int zpci_msihash_init(void);
-void zpci_msihash_exit(void);
-
#ifdef CONFIG_PCI
/* Error handling and recovery */
void zpci_event_error(void *);
void zpci_event_availability(void *);
+void zpci_rescan(void);
#else /* CONFIG_PCI */
static inline void zpci_event_error(void *e) {}
static inline void zpci_event_availability(void *e) {}
+static inline void zpci_rescan(void) {}
#endif /* CONFIG_PCI */
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+ return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
+
/* Helpers */
struct zpci_dev *get_zdev(struct pci_dev *);
struct zpci_dev *get_zdev_by_fid(u32);
-bool zpci_fid_present(u32);
/* sysfs */
int zpci_sysfs_add_device(struct device *);
@@ -183,14 +173,6 @@ void zpci_sysfs_remove_device(struct device *);
int zpci_dma_init(void);
void zpci_dma_exit(void);
-/* Hotplug */
-extern struct mutex zpci_list_lock;
-extern struct list_head zpci_list;
-extern unsigned int s390_pci_probe;
-
-void zpci_register_hp_ops(struct pci_hp_callback_ops *);
-void zpci_deregister_hp_ops(void);
-
/* FMB */
int zpci_fmb_enable_device(struct zpci_dev *);
int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index e6a2bdd4d705..df6eac9f0cb4 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -79,11 +79,11 @@ struct zpci_fib {
} __packed;
-int s390pci_mod_fc(u64 req, struct zpci_fib *fib);
-int s390pci_refresh_trans(u64 fn, u64 addr, u64 range);
-int s390pci_load(u64 *data, u64 req, u64 offset);
-int s390pci_store(u64 data, u64 req, u64 offset);
-int s390pci_store_block(const u64 *data, u64 req, u64 offset);
-void set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int zpci_mod_fc(u64 req, struct zpci_fib *fib);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 83a9caa6ae53..d194d544d694 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
u64 data; \
int rc; \
\
- rc = s390pci_load(&data, req, ZPCI_OFFSET(addr)); \
+ rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \
if (rc) \
data = -1ULL; \
return (RETTYPE) data; \
@@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \
u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
u64 data = (VALTYPE) val; \
\
- s390pci_store(data, req, ZPCI_OFFSET(addr)); \
+ zpci_store(data, req, ZPCI_OFFSET(addr)); \
}
zpci_read(8, u64)
@@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len
val = 0; /* let FW report error */
break;
}
- return s390pci_store(val, req, offset);
+ return zpci_store(val, req, offset);
}
static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
@@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
u64 data;
int cc;
- cc = s390pci_load(&data, req, offset);
+ cc = zpci_load(&data, req, offset);
if (cc)
goto out;
@@ -115,7 +115,7 @@ out:
static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
{
- return s390pci_store_block(data, req, offset);
+ return zpci_store_block(data, req, offset);
}
static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 5f0173a31693..1141fb3e7b21 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -14,3 +14,13 @@
/* Per-CPU flags for PMU states */
#define PMU_F_RESERVED 0x1000
#define PMU_F_ENABLED 0x2000
+
+#ifdef CONFIG_64BIT
+
+/* Perf callbacks */
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+#endif /* CONFIG_64BIT */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 590c3219c634..e1408ddb94f8 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -22,6 +22,9 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned long key, bool nq);
+
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
typedef struct { char _[n]; } addrtype;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index ac01463038f1..9b60a36c348d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -58,9 +58,6 @@ extern unsigned long zero_page_mask;
#define __HAVE_COLOR_ZERO_PAGE
/* TODO: s390 cannot support io_remap_pfn_range... */
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
-
#endif /* !__ASSEMBLY__ */
/*
@@ -220,63 +217,57 @@ extern unsigned long MODULES_END;
/* Hardware bits in the page table entry */
#define _PAGE_CO 0x100 /* HW Change-bit override */
-#define _PAGE_RO 0x200 /* HW read-only bit */
+#define _PAGE_PROTECT 0x200 /* HW read-only bit */
#define _PAGE_INVALID 0x400 /* HW invalid bit */
+#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */
/* Software bits in the page table entry */
-#define _PAGE_SWT 0x001 /* SW pte type bit t */
-#define _PAGE_SWX 0x002 /* SW pte type bit x */
-#define _PAGE_SWC 0x004 /* SW pte changed bit */
-#define _PAGE_SWR 0x008 /* SW pte referenced bit */
-#define _PAGE_SWW 0x010 /* SW pte write bit */
-#define _PAGE_SPECIAL 0x020 /* SW associated with special page */
+#define _PAGE_PRESENT 0x001 /* SW pte present bit */
+#define _PAGE_TYPE 0x002 /* SW pte type bit */
+#define _PAGE_YOUNG 0x004 /* SW pte young bit */
+#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
+#define _PAGE_READ 0x010 /* SW pte read bit */
+#define _PAGE_WRITE 0x020 /* SW pte write bit */
+#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
#define __HAVE_ARCH_PTE_SPECIAL
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
- _PAGE_SWC | _PAGE_SWR)
-
-/* Six different types of pages. */
-#define _PAGE_TYPE_EMPTY 0x400
-#define _PAGE_TYPE_NONE 0x401
-#define _PAGE_TYPE_SWAP 0x403
-#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */
-#define _PAGE_TYPE_RO 0x200
-#define _PAGE_TYPE_RW 0x000
-
-/*
- * Only four types for huge pages, using the invalid bit and protection bit
- * of a segment table entry.
- */
-#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */
-#define _HPAGE_TYPE_NONE 0x220
-#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */
-#define _HPAGE_TYPE_RW 0x000
+ _PAGE_DIRTY | _PAGE_YOUNG)
/*
- * PTE type bits are rather complicated. handle_pte_fault uses pte_present,
- * pte_none and pte_file to find out the pte type WITHOUT holding the page
- * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to
- * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs
- * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards.
- * This change is done while holding the lock, but the intermediate step
- * of a previously valid pte with the hw invalid bit set can be observed by
- * handle_pte_fault. That makes it necessary that all valid pte types with
- * the hw invalid bit set must be distinguishable from the four pte types
- * empty, none, swap and file.
+ * handle_pte_fault uses pte_present, pte_none and pte_file to find out the
+ * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit
+ * is used to distinguish present from not-present ptes. It is changed only
+ * with the page table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte:
*
- * irxt ipte irxt
- * _PAGE_TYPE_EMPTY 1000 -> 1000
- * _PAGE_TYPE_NONE 1001 -> 1001
- * _PAGE_TYPE_SWAP 1011 -> 1011
- * _PAGE_TYPE_FILE 11?1 -> 11?1
- * _PAGE_TYPE_RO 0100 -> 1100
- * _PAGE_TYPE_RW 0000 -> 1000
+ * 842100000000
+ * 000084210000
+ * 000000008421
+ * .IR...wrdytp
+ * empty .10...000000
+ * swap .10...xxxx10
+ * file .11...xxxxx0
+ * prot-none, clean, old .11...000001
+ * prot-none, clean, young .11...000101
+ * prot-none, dirty, old .10...001001
+ * prot-none, dirty, young .10...001101
+ * read-only, clean, old .11...010001
+ * read-only, clean, young .01...010101
+ * read-only, dirty, old .11...011001
+ * read-only, dirty, young .01...011101
+ * read-write, clean, old .11...110001
+ * read-write, clean, young .01...110101
+ * read-write, dirty, old .10...111001
+ * read-write, dirty, young .00...111101
*
- * pte_none is true for bits combinations 1000, 1010, 1100, 1110
- * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
- * pte_file is true for bits combinations 1101, 1111
- * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
+ * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
+ * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
+ * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
+ * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
*/
#ifndef CONFIG_64BIT
@@ -289,28 +280,35 @@ extern unsigned long MODULES_END;
#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */
/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */
#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */
-#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */
-#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
+#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
-#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
+
+/*
+ * Segment table entry encoding (I = invalid, R = read-only bit):
+ * ..R...I.....
+ * prot-none ..1...1.....
+ * read-only ..1...0.....
+ * read-write ..0...0.....
+ * empty ..0...1.....
+ */
/* Page status table bits for virtualization */
-#define RCP_ACC_BITS 0xf0000000UL
-#define RCP_FP_BIT 0x08000000UL
-#define RCP_PCL_BIT 0x00800000UL
-#define RCP_HR_BIT 0x00400000UL
-#define RCP_HC_BIT 0x00200000UL
-#define RCP_GR_BIT 0x00040000UL
-#define RCP_GC_BIT 0x00020000UL
-#define RCP_IN_BIT 0x00002000UL /* IPTE notify bit */
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT 0x00008000UL
-#define KVM_UC_BIT 0x00004000UL
+#define PGSTE_ACC_BITS 0xf0000000UL
+#define PGSTE_FP_BIT 0x08000000UL
+#define PGSTE_PCL_BIT 0x00800000UL
+#define PGSTE_HR_BIT 0x00400000UL
+#define PGSTE_HC_BIT 0x00200000UL
+#define PGSTE_GR_BIT 0x00040000UL
+#define PGSTE_GC_BIT 0x00020000UL
+#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */
#else /* CONFIG_64BIT */
@@ -329,8 +327,8 @@ extern unsigned long MODULES_END;
/* Bits in the region table entry */
#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */
-#define _REGION_ENTRY_RO 0x200 /* region protection bit */
-#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */
+#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */
+#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */
#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */
#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */
@@ -338,47 +336,61 @@ extern unsigned long MODULES_END;
#define _REGION_ENTRY_LENGTH 0x03 /* region third length */
#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
-#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV)
+#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
-#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV)
+#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
-#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV)
+#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */
#define _REGION3_ENTRY_RO 0x200 /* page protection bit */
#define _REGION3_ENTRY_CO 0x100 /* change-recording override */
/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
-#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */
-#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY (0)
-#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
+#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
+#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
+#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
+
+/*
+ * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ * ..R...I...y.
+ * prot-none, old ..0...1...1.
+ * prot-none, young ..1...1...1.
+ * read-only, old ..1...1...0.
+ * read-only, young ..1...0...1.
+ * read-write, old ..0...1...0.
+ * read-write, young ..0...0...1.
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ */
+
#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
-#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT)
/* Set of bits not changed in pmd_modify */
#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
| _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
/* Page status table bits for virtualization */
-#define RCP_ACC_BITS 0xf000000000000000UL
-#define RCP_FP_BIT 0x0800000000000000UL
-#define RCP_PCL_BIT 0x0080000000000000UL
-#define RCP_HR_BIT 0x0040000000000000UL
-#define RCP_HC_BIT 0x0020000000000000UL
-#define RCP_GR_BIT 0x0004000000000000UL
-#define RCP_GC_BIT 0x0002000000000000UL
-#define RCP_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT 0x0000800000000000UL
-#define KVM_UC_BIT 0x0000400000000000UL
+#define PGSTE_ACC_BITS 0xf000000000000000UL
+#define PGSTE_FP_BIT 0x0800000000000000UL
+#define PGSTE_PCL_BIT 0x0080000000000000UL
+#define PGSTE_HR_BIT 0x0040000000000000UL
+#define PGSTE_HC_BIT 0x0020000000000000UL
+#define PGSTE_GR_BIT 0x0004000000000000UL
+#define PGSTE_GC_BIT 0x0002000000000000UL
+#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
#endif /* CONFIG_64BIT */
@@ -393,14 +405,18 @@ extern unsigned long MODULES_END;
/*
* Page protection definitions.
*/
-#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE)
-#define PAGE_RO __pgprot(_PAGE_TYPE_RO)
-#define PAGE_RW __pgprot(_PAGE_TYPE_RO | _PAGE_SWW)
-#define PAGE_RWC __pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC)
-
-#define PAGE_KERNEL PAGE_RWC
-#define PAGE_SHARED PAGE_KERNEL
-#define PAGE_COPY PAGE_RO
+#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID)
+#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+ _PAGE_PROTECT)
/*
* On s390 the page table entry has an invalid bit and a read-only bit.
@@ -409,35 +425,31 @@ extern unsigned long MODULES_END;
*/
/*xwr*/
#define __P000 PAGE_NONE
-#define __P001 PAGE_RO
-#define __P010 PAGE_RO
-#define __P011 PAGE_RO
-#define __P100 PAGE_RO
-#define __P101 PAGE_RO
-#define __P110 PAGE_RO
-#define __P111 PAGE_RO
+#define __P001 PAGE_READ
+#define __P010 PAGE_READ
+#define __P011 PAGE_READ
+#define __P100 PAGE_READ
+#define __P101 PAGE_READ
+#define __P110 PAGE_READ
+#define __P111 PAGE_READ
#define __S000 PAGE_NONE
-#define __S001 PAGE_RO
-#define __S010 PAGE_RW
-#define __S011 PAGE_RW
-#define __S100 PAGE_RO
-#define __S101 PAGE_RO
-#define __S110 PAGE_RW
-#define __S111 PAGE_RW
+#define __S001 PAGE_READ
+#define __S010 PAGE_WRITE
+#define __S011 PAGE_WRITE
+#define __S100 PAGE_READ
+#define __S101 PAGE_READ
+#define __S110 PAGE_WRITE
+#define __S111 PAGE_WRITE
/*
* Segment entry (large page) protection definitions.
*/
-#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE)
-#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO)
-#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW)
-
-static inline int mm_exclusive(struct mm_struct *mm)
-{
- return likely(mm == current->active_mm &&
- atomic_read(&mm->context.attach_count) <= 1);
-}
+#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_NONE)
+#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
static inline int mm_has_pgste(struct mm_struct *mm)
{
@@ -474,7 +486,7 @@ static inline int pgd_none(pgd_t pgd)
{
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
return 0;
- return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL;
+ return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
}
static inline int pgd_bad(pgd_t pgd)
@@ -485,7 +497,7 @@ static inline int pgd_bad(pgd_t pgd)
* invalid for either table entry.
*/
unsigned long mask =
- ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
return (pgd_val(pgd) & mask) != 0;
}
@@ -501,7 +513,7 @@ static inline int pud_none(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
return 0;
- return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL;
+ return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
}
static inline int pud_large(pud_t pud)
@@ -519,7 +531,7 @@ static inline int pud_bad(pud_t pud)
* invalid for either table entry.
*/
unsigned long mask =
- ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
return (pud_val(pud) & mask) != 0;
}
@@ -528,30 +540,36 @@ static inline int pud_bad(pud_t pud)
static inline int pmd_present(pmd_t pmd)
{
- unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO;
- return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE ||
- !(pmd_val(pmd) & _SEGMENT_ENTRY_INV);
+ return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
}
static inline int pmd_none(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) &&
- !(pmd_val(pmd) & _SEGMENT_ENTRY_RO);
+ return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
}
static inline int pmd_large(pmd_t pmd)
{
#ifdef CONFIG_64BIT
- return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
#else
return 0;
#endif
}
+static inline int pmd_prot_none(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
+ (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+}
+
static inline int pmd_bad(pmd_t pmd)
{
- unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV;
- return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
+#ifdef CONFIG_64BIT
+ if (pmd_large(pmd))
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+#endif
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
@@ -570,31 +588,40 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
+ if (pmd_prot_none(pmd))
+ return 0;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
}
static inline int pmd_young(pmd_t pmd)
{
- return 0;
+ int young = 0;
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd))
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
+ else
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+#endif
+ return young;
}
-static inline int pte_none(pte_t pte)
+static inline int pte_present(pte_t pte)
{
- return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
+ /* Bit pattern: (pte & 0x001) == 0x001 */
+ return (pte_val(pte) & _PAGE_PRESENT) != 0;
}
-static inline int pte_present(pte_t pte)
+static inline int pte_none(pte_t pte)
{
- unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX;
- return (pte_val(pte) & mask) == _PAGE_TYPE_NONE ||
- (!(pte_val(pte) & _PAGE_INVALID) &&
- !(pte_val(pte) & _PAGE_SWT));
+ /* Bit pattern: pte == 0x400 */
+ return pte_val(pte) == _PAGE_INVALID;
}
static inline int pte_file(pte_t pte)
{
- unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT;
- return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
+ /* Bit pattern: (pte & 0x601) == 0x600 */
+ return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
+ == (_PAGE_INVALID | _PAGE_PROTECT);
}
static inline int pte_special(pte_t pte)
@@ -618,12 +645,12 @@ static inline pgste_t pgste_get_lock(pte_t *ptep)
asm(
" lg %0,%2\n"
"0: lgr %1,%0\n"
- " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */
- " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */
+ " nihh %0,0xff7f\n" /* clear PCL bit in old */
+ " oihh %1,0x0080\n" /* set PCL bit in new */
" csg %0,%1,%2\n"
" jl 0b\n"
: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
- : "Q" (ptep[PTRS_PER_PTE]) : "cc");
+ : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
#endif
return __pgste(new);
}
@@ -632,44 +659,56 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
asm(
- " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */
+ " nihh %1,0xff7f\n" /* clear PCL bit */
" stg %1,%0\n"
: "=Q" (ptep[PTRS_PER_PTE])
- : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc");
+ : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+ : "cc", "memory");
preempt_enable();
#endif
}
+static inline pgste_t pgste_get(pte_t *ptep)
+{
+ unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+ pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+ return __pgste(pgste);
+}
+
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
+
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- unsigned long address, bits;
- unsigned char skey;
+ unsigned long address, bits, skey;
if (pte_val(*ptep) & _PAGE_INVALID)
return pgste;
address = pte_val(*ptep) & PAGE_MASK;
- skey = page_get_storage_key(address);
+ skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
- /* Clear page changed & referenced bit in the storage key */
- if (bits & _PAGE_CHANGED)
+ if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
+ /* Transfer dirty + referenced bit to host bits in pgste */
+ pgste_val(pgste) |= bits << 52;
page_set_storage_key(address, skey ^ bits, 0);
- else if (bits)
+ } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
+ (bits & _PAGE_REFERENCED)) {
+ /* Transfer referenced bit to host bit in pgste */
+ pgste_val(pgste) |= PGSTE_HR_BIT;
page_reset_referenced(address);
+ }
/* Transfer page changed & referenced bit to guest bits in pgste */
- pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
- /* Get host changed & referenced bits from pgste */
- bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
- /* Transfer page changed & referenced bit to kvm user bits */
- pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */
- /* Clear relevant host bits in pgste. */
- pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
- pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
+ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
/* Copy page access key and fetch protection bit to pgste */
- pgste_val(pgste) |=
- (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
- /* Transfer referenced bit to pte */
- pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1;
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
#endif
return pgste;
@@ -678,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- int young;
-
if (pte_val(*ptep) & _PAGE_INVALID)
return pgste;
/* Get referenced bit from storage key */
- young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
- if (young)
- pgste_val(pgste) |= RCP_GR_BIT;
- /* Get host referenced bit from pgste */
- if (pgste_val(pgste) & RCP_HR_BIT) {
- pgste_val(pgste) &= ~RCP_HR_BIT;
- young = 1;
- }
- /* Transfer referenced bit to kvm user bits and pte */
- if (young) {
- pgste_val(pgste) |= KVM_UR_BIT;
- pte_val(*ptep) |= _PAGE_SWR;
- }
+ if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
+ pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
#endif
return pgste;
}
@@ -704,29 +730,31 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
{
#ifdef CONFIG_PGSTE
unsigned long address;
- unsigned long okey, nkey;
+ unsigned long nkey;
if (pte_val(entry) & _PAGE_INVALID)
return;
+ VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
address = pte_val(entry) & PAGE_MASK;
- okey = nkey = page_get_storage_key(address);
- nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
- /* Set page access key and fetch protection bit from pgste */
- nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
- if (okey != nkey)
- page_set_storage_key(address, nkey, 0);
+ /*
+ * Set page access key and fetch protection bit from pgste.
+ * The guest C/R information is still in the PGSTE, set real
+ * key C/R to 0.
+ */
+ nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ page_set_storage_key(address, nkey, 0);
#endif
}
static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
{
- if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) {
+ if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) {
/*
* Without enhanced suppression-on-protection force
* the dirty bit on for all writable ptes.
*/
- pte_val(entry) |= _PAGE_SWC;
- pte_val(entry) &= ~_PAGE_RO;
+ pte_val(entry) |= _PAGE_DIRTY;
+ pte_val(entry) &= ~_PAGE_PROTECT;
}
*ptep = entry;
}
@@ -743,6 +771,7 @@ struct gmap {
struct mm_struct *mm;
unsigned long *table;
unsigned long asce;
+ void *private;
struct list_head crst_list;
};
@@ -801,8 +830,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- if (pgste_val(pgste) & RCP_IN_BIT) {
- pgste_val(pgste) &= ~RCP_IN_BIT;
+ if (pgste_val(pgste) & PGSTE_IN_BIT) {
+ pgste_val(pgste) &= ~PGSTE_IN_BIT;
gmap_do_ipte_notify(mm, addr, ptep);
}
#endif
@@ -837,21 +866,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
*/
static inline int pte_write(pte_t pte)
{
- return (pte_val(pte) & _PAGE_SWW) != 0;
+ return (pte_val(pte) & _PAGE_WRITE) != 0;
}
static inline int pte_dirty(pte_t pte)
{
- return (pte_val(pte) & _PAGE_SWC) != 0;
+ return (pte_val(pte) & _PAGE_DIRTY) != 0;
}
static inline int pte_young(pte_t pte)
{
-#ifdef CONFIG_PGSTE
- if (pte_val(pte) & _PAGE_SWR)
- return 1;
-#endif
- return 0;
+ return (pte_val(pte) & _PAGE_YOUNG) != 0;
}
/*
@@ -876,12 +901,12 @@ static inline void pud_clear(pud_t *pud)
static inline void pmd_clear(pmd_t *pmdp)
{
- pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
}
/*
@@ -892,55 +917,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pte_val(pte) &= _PAGE_CHG_MASK;
pte_val(pte) |= pgprot_val(newprot);
- if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW))
- pte_val(pte) &= ~_PAGE_RO;
+ /*
+ * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
+ * invalid bit set, clear it again for readable, young pages
+ */
+ if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+ pte_val(pte) &= ~_PAGE_INVALID;
+ /*
+ * newprot for PAGE_READ and PAGE_WRITE has the page protection
+ * bit set, clear it again for writable, dirty pages
+ */
+ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_wrprotect(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_SWW;
- /* Do not clobber _PAGE_TYPE_NONE pages! */
- if (!(pte_val(pte) & _PAGE_INVALID))
- pte_val(pte) |= _PAGE_RO;
+ pte_val(pte) &= ~_PAGE_WRITE;
+ pte_val(pte) |= _PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkwrite(pte_t pte)
{
- pte_val(pte) |= _PAGE_SWW;
- if (pte_val(pte) & _PAGE_SWC)
- pte_val(pte) &= ~_PAGE_RO;
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pte_val(pte) & _PAGE_DIRTY)
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkclean(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_SWC;
- /* Do not clobber _PAGE_TYPE_NONE pages! */
- if (!(pte_val(pte) & _PAGE_INVALID))
- pte_val(pte) |= _PAGE_RO;
+ pte_val(pte) &= ~_PAGE_DIRTY;
+ pte_val(pte) |= _PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkdirty(pte_t pte)
{
- pte_val(pte) |= _PAGE_SWC;
- if (pte_val(pte) & _PAGE_SWW)
- pte_val(pte) &= ~_PAGE_RO;
+ pte_val(pte) |= _PAGE_DIRTY;
+ if (pte_val(pte) & _PAGE_WRITE)
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkold(pte_t pte)
{
-#ifdef CONFIG_PGSTE
- pte_val(pte) &= ~_PAGE_SWR;
-#endif
+ pte_val(pte) &= ~_PAGE_YOUNG;
+ pte_val(pte) |= _PAGE_INVALID;
return pte;
}
static inline pte_t pte_mkyoung(pte_t pte)
{
+ pte_val(pte) |= _PAGE_YOUNG;
+ if (pte_val(pte) & _PAGE_READ)
+ pte_val(pte) &= ~_PAGE_INVALID;
return pte;
}
@@ -953,7 +986,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
#ifdef CONFIG_HUGETLB_PAGE
static inline pte_t pte_mkhuge(pte_t pte)
{
- pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO);
+ pte_val(pte) |= _PAGE_LARGE;
return pte;
}
#endif
@@ -970,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_all(ptep, pgste);
- dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
- pgste_val(pgste) &= ~KVM_UC_BIT;
+ dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
+ pgste_val(pgste) &= ~PGSTE_HC_BIT;
pgste_set_unlock(ptep, pgste);
return dirty;
}
@@ -990,59 +1023,75 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_young(ptep, pgste);
- young = !!(pgste_val(pgste) & KVM_UR_BIT);
- pgste_val(pgste) &= ~KVM_UR_BIT;
+ young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
+ pgste_val(pgste) &= ~PGSTE_HR_BIT;
pgste_set_unlock(ptep, pgste);
}
return young;
}
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+{
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+#ifndef CONFIG_64BIT
+ /* pto must point to the start of the segment table */
+ pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
+#else
+ /* ipte in zarch mode can do the math */
+ pte_t *pto = ptep;
+#endif
+ asm volatile(
+ " ipte %2,%3"
+ : "=m" (*ptep) : "m" (*ptep),
+ "a" (pto), "a" (address));
+ }
+}
+
+static inline void ptep_flush_lazy(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ int active = (mm == current->active_mm) ? 1 : 0;
+
+ if (atomic_read(&mm->context.attach_count) > active)
+ __ptep_ipte(address, ptep);
+ else
+ mm->context.flush_mm = 1;
+}
+
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pgste_t pgste;
pte_t pte;
+ int young;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_update_young(ptep, pgste);
- pte = *ptep;
- *ptep = pte_mkold(pte);
- pgste_set_unlock(ptep, pgste);
- return pte_young(pte);
+ pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
}
- return 0;
+
+ pte = *ptep;
+ __ptep_ipte(addr, ptep);
+ young = pte_young(pte);
+ pte = pte_mkold(pte);
+
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste_set_pte(ptep, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = pte;
+
+ return young;
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- /* No need to flush TLB
- * On s390 reference bits are in storage key and never in TLB
- * With virtualization we handle the reference bit, without we
- * we can simply return */
return ptep_test_and_clear_young(vma, address, ptep);
}
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
-{
- if (!(pte_val(*ptep) & _PAGE_INVALID)) {
-#ifndef CONFIG_64BIT
- /* pto must point to the start of the segment table */
- pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
-#else
- /* ipte in zarch mode can do the math */
- pte_t *pto = ptep;
-#endif
- asm volatile(
- " ipte %2,%3"
- : "=m" (*ptep) : "m" (*ptep),
- "a" (pto), "a" (address));
- }
-}
-
/*
* This is hard to understand. ptep_get_and_clear and ptep_clear_flush
* both clear the TLB for the unmapped pte. The reason is that
@@ -1063,16 +1112,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
pgste_t pgste;
pte_t pte;
- mm->context.flush_mm = 1;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
@@ -1089,18 +1136,19 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
pgste_t pgste;
pte_t pte;
- mm->context.flush_mm = 1;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) |= _PAGE_INVALID;
- if (mm_has_pgste(mm))
+ if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
+ pgste_set(ptep, pgste);
+ }
return pte;
}
@@ -1111,7 +1159,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
pgste_t pgste;
if (mm_has_pgste(mm)) {
- pgste = *(pgste_t *)(ptep + PTRS_PER_PTE);
+ pgste = pgste_get(ptep);
pgste_set_key(ptep, pgste, pte);
pgste_set_pte(ptep, pte);
pgste_set_unlock(ptep, pgste);
@@ -1133,7 +1181,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
pte = *ptep;
__ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_update_all(&pte, pgste);
@@ -1157,18 +1205,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
pgste_t pgste;
pte_t pte;
- if (mm_has_pgste(mm)) {
+ if (!full && mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- if (!full)
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
if (!full)
- __ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
- if (mm_has_pgste(mm)) {
+ if (!full && mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
pgste_set_unlock(ptep, pgste);
}
@@ -1183,14 +1230,12 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
pte_t pte = *ptep;
if (pte_write(pte)) {
- mm->context.flush_mm = 1;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
+ ptep_flush_lazy(mm, address, ptep);
pte = pte_wrprotect(pte);
if (mm_has_pgste(mm)) {
@@ -1234,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
pte_t __pte;
pte_val(__pte) = physpage + pgprot_val(pgprot);
- return __pte;
+ return pte_mkyoung(__pte);
}
static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
@@ -1242,10 +1287,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
unsigned long physpage = page_to_phys(page);
pte_t __pte = mk_pte_phys(physpage, pgprot);
- if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) {
- pte_val(__pte) |= _PAGE_SWC;
- pte_val(__pte) &= ~_PAGE_RO;
- }
+ if (pte_write(__pte) && PageDirty(page))
+ __pte = pte_mkdirty(__pte);
return __pte;
}
@@ -1307,7 +1350,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
unsigned long sto = (unsigned long) pmdp -
pmd_index(address) * sizeof(pmd_t);
- if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
+ if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
asm volatile(
" .insn rrf,0xb98e0000,%2,%3,0,0"
: "=m" (*pmdp)
@@ -1318,24 +1361,68 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
}
}
+static inline void __pmd_csp(pmd_t *pmdp)
+{
+ register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+ register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+ _SEGMENT_ENTRY_INVALID;
+ register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+
+ asm volatile(
+ " csp %1,%3"
+ : "=m" (*pmdp)
+ : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+}
+
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
{
/*
- * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx)
+ * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
* Convert to segment table entry format.
*/
if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
return pgprot_val(SEGMENT_NONE);
- if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
- return pgprot_val(SEGMENT_RO);
- return pgprot_val(SEGMENT_RW);
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+ return pgprot_val(SEGMENT_READ);
+ return pgprot_val(SEGMENT_WRITE);
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ } else {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+ }
+#endif
+ return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ } else {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ }
+#endif
+ return pmd;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
+ int young;
+
+ young = pmd_young(pmd);
pmd_val(pmd) &= _SEGMENT_CHG_MASK;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (young)
+ pmd = pmd_mkyoung(pmd);
return pmd;
}
@@ -1343,25 +1430,37 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
{
pmd_t __pmd;
pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
- return __pmd;
+ return pmd_mkyoung(__pmd);
}
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
- /* Do not clobber _HPAGE_TYPE_NONE pages! */
- if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV))
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+ /* Do not clobber PROT_NONE segments! */
+ if (!pmd_prot_none(pmd))
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
return pmd;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
+static inline void pmdp_flush_lazy(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ int active = (mm == current->active_mm) ? 1 : 0;
+
+ if ((atomic_read(&mm->context.attach_count) & 0xffff) > active)
+ __pmd_idte(address, pmdp);
+ else
+ mm->context.flush_mm = 1;
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
static inline int pmd_trans_splitting(pmd_t pmd)
{
@@ -1371,7 +1470,7 @@ static inline int pmd_trans_splitting(pmd_t pmd)
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
- if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1)
+ if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
pmd_val(entry) |= _SEGMENT_ENTRY_CO;
*pmdp = entry;
}
@@ -1384,7 +1483,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
+ /* Do not clobber PROT_NONE segments! */
+ if (!pmd_prot_none(pmd))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd;
}
@@ -1394,50 +1495,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd;
}
-static inline pmd_t pmd_mkold(pmd_t pmd)
-{
- /* No referenced bit in the segment table entry. */
- return pmd;
-}
-
-static inline pmd_t pmd_mkyoung(pmd_t pmd)
-{
- /* No referenced bit in the segment table entry. */
- return pmd;
-}
-
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
- unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
- long tmp, rc;
- int counter;
+ pmd_t pmd;
- rc = 0;
- if (MACHINE_HAS_RRBM) {
- counter = PTRS_PER_PTE >> 6;
- asm volatile(
- "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
- " ogr %1,%0\n"
- " la %3,0(%4,%3)\n"
- " brct %2,0b\n"
- : "=&d" (tmp), "+&d" (rc), "+d" (counter),
- "+a" (pmd_addr)
- : "a" (64 * 4096UL) : "cc");
- rc = !!rc;
- } else {
- counter = PTRS_PER_PTE;
- asm volatile(
- "0: rrbe 0,%2\n"
- " la %2,0(%3,%2)\n"
- " brc 12,1f\n"
- " lhi %0,1\n"
- "1: brct %1,0b\n"
- : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
- : "a" (4096UL) : "cc");
- }
- return rc;
+ pmd = *pmdp;
+ __pmd_idte(address, pmdp);
+ *pmdp = pmd_mkold(pmd);
+ return pmd_young(pmd);
}
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
@@ -1503,10 +1570,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
* exception will occur instead of a page translation exception. The
* specifiation exception has the bad habit not to store necessary
* information in the lowcore.
- * Bit 21 and bit 22 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 30 and 31 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 21, 22, 30 and 31 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
* This leaves the bits 1-19 and bits 24-29 to store type and offset.
* We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
* plus 24 for the offset.
@@ -1520,10 +1585,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
* exception will occur instead of a page translation exception. The
* specifiation exception has the bad habit not to store necessary
* information in the lowcore.
- * Bit 53 and bit 54 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 62 and 63 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 53, 54, 62 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
* This leaves the bits 0-51 and bits 56-61 to store type and offset.
* We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
* plus 56 for the offset.
@@ -1540,7 +1603,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{
pte_t pte;
offset &= __SWP_OFFSET_MASK;
- pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) |
+ pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
return pte;
}
@@ -1563,7 +1626,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define pgoff_to_pte(__off) \
((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
- | _PAGE_TYPE_FILE })
+ | _PAGE_INVALID | _PAGE_PROTECT })
#endif /* !__ASSEMBLY__ */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 6b499870662f..ca7821f07260 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -43,6 +43,7 @@ extern void execve_tail(void);
#ifndef CONFIG_64BIT
#define TASK_SIZE (1UL << 31)
+#define TASK_MAX_SIZE (1UL << 31)
#define TASK_UNMAPPED_BASE (1UL << 30)
#else /* CONFIG_64BIT */
@@ -51,6 +52,7 @@ extern void execve_tail(void);
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(1UL << 30) : (1UL << 41))
#define TASK_SIZE TASK_SIZE_OF(current)
+#define TASK_MAX_SIZE (1UL << 53)
#endif /* CONFIG_64BIT */
@@ -91,7 +93,15 @@ struct thread_struct {
#endif
};
-#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE 1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND 2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND 4UL
typedef struct thread_struct thread_struct;
@@ -188,6 +198,8 @@ static inline void cpu_relax(void)
barrier();
}
+#define arch_mutex_cpu_relax() barrier()
+
static inline void psw_set_key(unsigned int key)
{
asm volatile("spka 0(%0)" : : "d" (key));
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 559512a455da..52b56533c57c 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -24,6 +24,7 @@ struct pt_regs
unsigned long gprs[NUM_GPRS];
unsigned long orig_gpr2;
unsigned int int_code;
+ unsigned int int_parm;
unsigned long int_parm_long;
};
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 06a136136047..7dc7f9c63b65 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -56,5 +56,6 @@ bool sclp_has_linemode(void);
bool sclp_has_vt220(void);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
+int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
new file mode 100644
index 000000000000..5b3e48ef534b
--- /dev/null
+++ b/arch/s390/include/asm/serial.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SERIAL_H
+#define _ASM_S390_SERIAL_H
+
+#define BASE_BAUD 0
+
+#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 701fe8c59e1f..83e5d216105e 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
extern int arch_spin_trylock_retry(arch_spinlock_t *);
extern void arch_spin_relax(arch_spinlock_t *lock);
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+ return lock.owner_cpu == 0;
+}
+
static inline void arch_spin_lock(arch_spinlock_t *lp)
{
int old;
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index f3a9e0f92704..6dbd559763c9 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,9 +8,10 @@
#define __ASM_SWITCH_TO_H
#include <linux/thread_info.h>
+#include <asm/ptrace.h>
extern struct task_struct *__switch_to(void *, void *);
-extern void update_per_regs(struct task_struct *task);
+extern void update_cr_regs(struct task_struct *task);
static inline void save_fp_regs(s390_fp_regs *fpregs)
{
@@ -68,12 +69,16 @@ static inline void restore_fp_regs(s390_fp_regs *fpregs)
static inline void save_access_regs(unsigned int *acrs)
{
- asm volatile("stam 0,15,%0" : "=Q" (*acrs));
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
}
static inline void restore_access_regs(unsigned int *acrs)
{
- asm volatile("lam 0,15,%0" : : "Q" (*acrs));
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
}
#define switch_to(prev,next,last) do { \
@@ -86,7 +91,7 @@ static inline void restore_access_regs(unsigned int *acrs)
restore_fp_regs(&next->thread.fp_regs); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- update_per_regs(next); \
+ update_cr_regs(next); \
} \
prev = __switch_to(prev,next); \
} while (0)
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b75d7d686684..2cb846c4b37f 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,6 +32,7 @@ struct mmu_gather {
struct mm_struct *mm;
struct mmu_table_batch *batch;
unsigned int fullmm;
+ unsigned long start, end;
};
struct mmu_table_batch {
@@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
static inline void tlb_gather_mmu(struct mmu_gather *tlb,
struct mm_struct *mm,
- unsigned int full_mm_flush)
+ unsigned long start,
+ unsigned long end)
{
tlb->mm = mm;
- tlb->fullmm = full_mm_flush;
+ tlb->start = start;
+ tlb->end = end;
+ tlb->fullmm = !(start | (end+1));
tlb->batch = NULL;
if (tlb->fullmm)
__tlb_flush_mm(mm);
@@ -59,13 +63,14 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb,
static inline void tlb_flush_mmu(struct mmu_gather *tlb)
{
+ __tlb_flush_mm_lazy(tlb->mm);
tlb_table_flush(tlb);
}
static inline void tlb_finish_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end)
{
- tlb_table_flush(tlb);
+ tlb_flush_mmu(tlb);
}
/*
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 6b32af30878c..f9fef0425fee 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -86,7 +86,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
__tlb_flush_full(mm);
}
-static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{
if (mm->context.flush_mm) {
__tlb_flush_mm(mm);
@@ -118,13 +118,13 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
static inline void flush_tlb_mm(struct mm_struct *mm)
{
- __tlb_flush_mm_cond(mm);
+ __tlb_flush_mm_lazy(mm);
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- __tlb_flush_mm_cond(vma->vm_mm);
+ __tlb_flush_mm_lazy(vma->vm_mm);
}
static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000000..af9896c53eb3
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 9ccd1905bdad..6a9a9eb645f5 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -35,6 +35,7 @@ header-y += siginfo.h
header-y += signal.h
header-y += socket.h
header-y += sockios.h
+header-y += sclp_ctl.h
header-y += stat.h
header-y += statfs.h
header-y += swab.h
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
index 1c6a7f85a581..65dc694725a8 100644
--- a/arch/s390/include/uapi/asm/chsc.h
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -29,6 +29,16 @@ struct chsc_async_area {
__u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
} __attribute__ ((packed));
+struct chsc_header {
+ __u16 length;
+ __u16 code;
+} __attribute__ ((packed));
+
+struct chsc_sync_area {
+ struct chsc_header header;
+ __u8 data[CHSC_SIZE - sizeof(struct chsc_header)];
+} __attribute__ ((packed));
+
struct chsc_response_struct {
__u16 length;
__u16 code;
@@ -126,5 +136,8 @@ struct chsc_cpd_info {
#define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list)
#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
+#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area)
+#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area)
+#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b)
#endif
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 38eca3ba40e2..5812a3b2df9e 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -261,6 +261,10 @@ struct dasd_snid_ioctl_data {
#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6)
/* Resume IO on device */
#define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7)
+/* Abort all I/O on a device */
+#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240)
+/* Allow I/O on a device */
+#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241)
/* retrieve API version number */
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index 3aa9f1ec5b29..7a84619e315e 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -400,6 +400,7 @@ typedef struct
#define PTRACE_POKE_SYSTEM_CALL 0x5008
#define PTRACE_ENABLE_TE 0x5009
#define PTRACE_DISABLE_TE 0x5010
+#define PTRACE_TE_ABORT_RAND 0x5011
/*
* PT_PROT definition is loosely based on hppa bsd definition in
diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h
new file mode 100644
index 000000000000..f2818613ee41
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sclp_ctl.h
@@ -0,0 +1,24 @@
+/*
+ * IOCTL interface for SCLP
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_SCLP_CTL_H
+#define _ASM_SCLP_CTL_H
+
+#include <linux/types.h>
+
+struct sclp_ctl_sccb {
+ __u32 cmdw;
+ __u64 sccb;
+} __attribute__((packed));
+
+#define SCLP_CTL_IOCTL_MAGIC 0x10
+
+#define SCLP_CTL_SCCB \
+ _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 2dacb306835c..92494494692e 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@
#define SO_SELECT_ERR_QUEUE 45
+#define SO_BUSY_POLL 46
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 7a82f9f70100..2416138ebd3e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
#define ASM_OFFSETS_C
#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
#include <linux/sched.h>
#include <asm/cputime.h>
#include <asm/vdso.h>
@@ -47,6 +48,7 @@ int main(void)
DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
+ DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
@@ -161,6 +163,8 @@ int main(void)
DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
+ DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
+ DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
#endif /* CONFIG_32BIT */
return 0;
}
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 64b24650e4f8..dd62071624be 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -173,7 +173,7 @@ error:
}
}
-static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu)
+static struct cache_dir *cache_create_cache_dir(int cpu)
{
struct cache_dir *cache_dir;
struct kobject *kobj = NULL;
@@ -289,9 +289,8 @@ static struct kobj_type cache_index_type = {
.default_attrs = cache_index_default_attrs,
};
-static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir,
- struct cache *cache, int index,
- int cpu)
+static int cache_create_index_dir(struct cache_dir *cache_dir,
+ struct cache *cache, int index, int cpu)
{
struct cache_index_dir *index_dir;
int rc;
@@ -313,7 +312,7 @@ out:
return rc;
}
-static int __cpuinit cache_add_cpu(int cpu)
+static int cache_add_cpu(int cpu)
{
struct cache_dir *cache_dir;
struct cache *cache;
@@ -335,7 +334,7 @@ static int __cpuinit cache_add_cpu(int cpu)
return 0;
}
-static void __cpuinit cache_remove_cpu(int cpu)
+static void cache_remove_cpu(int cpu)
{
struct cache_index_dir *index, *next;
struct cache_dir *cache_dir;
@@ -354,8 +353,8 @@ static void __cpuinit cache_remove_cpu(int cpu)
cache_dir_cpu[cpu] = NULL;
}
-static int __cpuinit cache_hotplug(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int cache_hotplug(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
{
int cpu = (long)hcpu;
int rc = 0;
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 8b6e4f5288a2..1f1b8c70ab97 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -221,25 +221,26 @@ static int groups16_from_user(struct group_info *group_info, u16 __user *groupli
asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist)
{
+ const struct cred *cred = current_cred();
int i;
if (gidsetsize < 0)
return -EINVAL;
- get_group_info(current->cred->group_info);
- i = current->cred->group_info->ngroups;
+ get_group_info(cred->group_info);
+ i = cred->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
i = -EINVAL;
goto out;
}
- if (groups16_to_user(grouplist, current->cred->group_info)) {
+ if (groups16_to_user(grouplist, cred->group_info)) {
i = -EFAULT;
goto out;
}
}
out:
- put_group_info(current->cred->group_info);
+ put_group_info(cred->group_info);
return i;
}
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index c439ac9ced09..1389b637dae5 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -332,9 +332,9 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
+ regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE;
+ regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
(u16 __force __user *)(frame->retcode)))
goto give_sigsegv;
@@ -400,9 +400,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
+ regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE;
+ regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
(u16 __force __user *)(frame->retcode));
}
@@ -417,7 +417,7 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->psw.mask = PSW_MASK_BA |
(psw_user_bits & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__u64) ka->sa.sa_handler;
+ regs->psw.addr = (__u64 __force) ka->sa.sa_handler;
regs->gprs[2] = map_signal(sig);
regs->gprs[3] = (__force __u64) &frame->info;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f703d91bf720..c84f33d51f7b 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -16,38 +16,187 @@
#include <asm/os_info.h>
#include <asm/elf.h>
#include <asm/ipl.h>
+#include <asm/sclp.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
/*
- * Copy one page from "oldmem"
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+ unsigned long real_addr;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ " jz 0f\n"
+ " la %0,0\n"
+ "0:"
+ : "=a" (real_addr) : "a" (addr) : "cc");
+ return (void *)real_addr;
+}
+
+/*
+ * Copy up to one page to vmalloc or real memory
+ */
+static ssize_t copy_page_real(void *buf, void *src, size_t csize)
+{
+ size_t size;
+
+ if (is_vmalloc_addr(buf)) {
+ BUG_ON(csize >= PAGE_SIZE);
+ /* If buf is not page aligned, copy first part */
+ size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize);
+ if (size) {
+ if (memcpy_real(load_real_addr(buf), src, size))
+ return -EFAULT;
+ buf += size;
+ src += size;
+ }
+ /* Copy second part */
+ size = csize - size;
+ return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0;
+ } else {
+ return memcpy_real(buf, src, csize);
+ }
+}
+
+/*
+ * Pointer to ELF header in new kernel
+ */
+static void *elfcorehdr_newmem;
+
+/*
+ * Copy one page from zfcpdump "oldmem"
+ *
+ * For pages below ZFCPDUMP_HSA_SIZE memory from the HSA is copied. Otherwise
+ * real memory copy is used.
+ */
+static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
+ unsigned long src, int userbuf)
+{
+ int rc;
+
+ if (src < ZFCPDUMP_HSA_SIZE) {
+ rc = memcpy_hsa(buf, src, csize, userbuf);
+ } else {
+ if (userbuf)
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
+ else
+ rc = memcpy_real(buf, (void *) src, csize);
+ }
+ return rc ? rc : csize;
+}
+
+/*
+ * Copy one page from kdump "oldmem"
*
* For the kdump reserved memory this functions performs a swap operation:
* - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
* - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
*/
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
-{
- unsigned long src;
+static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
+ unsigned long src, int userbuf)
- if (!csize)
- return 0;
+{
+ int rc;
- src = (pfn << PAGE_SHIFT) + offset;
if (src < OLDMEM_SIZE)
src += OLDMEM_BASE;
else if (src > OLDMEM_BASE &&
src < OLDMEM_BASE + OLDMEM_SIZE)
src -= OLDMEM_BASE;
if (userbuf)
- copy_to_user_real((void __force __user *) buf, (void *) src,
- csize);
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
+ else
+ rc = copy_page_real(buf, (void *) src, csize);
+ return (rc == 0) ? rc : csize;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
+{
+ unsigned long src;
+
+ if (!csize)
+ return 0;
+ src = (pfn << PAGE_SHIFT) + offset;
+ if (OLDMEM_BASE)
+ return copy_oldmem_page_kdump(buf, csize, src, userbuf);
+ else
+ return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf);
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long size_old;
+ int rc;
+
+ if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) {
+ size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT));
+ rc = remap_pfn_range(vma, from,
+ pfn + (OLDMEM_BASE >> PAGE_SHIFT),
+ size_old, prot);
+ if (rc || size == size_old)
+ return rc;
+ size -= size_old;
+ from += size_old;
+ pfn += size_old >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcpdump
+ *
+ * We only map available memory above ZFCPDUMP_HSA_SIZE. Memory below
+ * ZFCPDUMP_HSA_SIZE is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+ unsigned long from,
+ unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long size_hsa;
+
+ if (pfn < ZFCPDUMP_HSA_SIZE >> PAGE_SHIFT) {
+ size_hsa = min(size, ZFCPDUMP_HSA_SIZE - (pfn << PAGE_SHIFT));
+ if (size == size_hsa)
+ return 0;
+ size -= size_hsa;
+ from += size_hsa;
+ pfn += size_hsa >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcpdump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ if (OLDMEM_BASE)
+ return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
else
- memcpy_real(buf, (void *) src, csize);
- return csize;
+ return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+ prot);
}
/*
@@ -58,11 +207,21 @@ int copy_from_oldmem(void *dest, void *src, size_t count)
unsigned long copied = 0;
int rc;
- if ((unsigned long) src < OLDMEM_SIZE) {
- copied = min(count, OLDMEM_SIZE - (unsigned long) src);
- rc = memcpy_real(dest, src + OLDMEM_BASE, copied);
- if (rc)
- return rc;
+ if (OLDMEM_BASE) {
+ if ((unsigned long) src < OLDMEM_SIZE) {
+ copied = min(count, OLDMEM_SIZE - (unsigned long) src);
+ rc = memcpy_real(dest, src + OLDMEM_BASE, copied);
+ if (rc)
+ return rc;
+ }
+ } else {
+ if ((unsigned long) src < ZFCPDUMP_HSA_SIZE) {
+ copied = min(count,
+ ZFCPDUMP_HSA_SIZE - (unsigned long) src);
+ rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
+ if (rc)
+ return rc;
+ }
}
return memcpy_real(dest + copied, src + copied, count - copied);
}
@@ -325,14 +484,6 @@ static int get_mem_chunk_cnt(void)
}
/*
- * Relocate pointer in order to allow vmcore code access the data
- */
-static inline unsigned long relocate(unsigned long addr)
-{
- return OLDMEM_BASE + addr;
-}
-
-/*
* Initialize ELF loads (new kernel)
*/
static int loads_init(Elf64_Phdr *phdr, u64 loads_offset)
@@ -383,7 +534,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
ptr = nt_vmcoreinfo(ptr);
memset(phdr, 0, sizeof(*phdr));
phdr->p_type = PT_NOTE;
- phdr->p_offset = relocate(notes_offset);
+ phdr->p_offset = notes_offset;
phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
phdr->p_memsz = phdr->p_filesz;
return ptr;
@@ -392,7 +543,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
/*
* Create ELF core header (new kernel)
*/
-static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
Elf64_Phdr *phdr_notes, *phdr_loads;
int mem_chunk_cnt;
@@ -400,6 +551,12 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
u32 alloc_size;
u64 hdr_off;
+ /* If we are not in kdump or zfcpdump mode return */
+ if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+ return 0;
+ /* If elfcorehdr= has been passed via cmdline, we use that one */
+ if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
+ return 0;
mem_chunk_cnt = get_mem_chunk_cnt();
alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
@@ -417,27 +574,52 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
/* Init loads */
hdr_off = PTR_DIFF(ptr, hdr);
- loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off);
- *elfcorebuf_sz = hdr_off;
- *elfcorebuf = (void *) relocate((unsigned long) hdr);
- BUG_ON(*elfcorebuf_sz > alloc_size);
+ loads_init(phdr_loads, hdr_off);
+ *addr = (unsigned long long) hdr;
+ elfcorehdr_newmem = hdr;
+ *size = (unsigned long long) hdr_off;
+ BUG_ON(elfcorehdr_size > alloc_size);
+ return 0;
}
/*
- * Create kdump ELF core header in new kernel, if it has not been passed via
- * the "elfcorehdr" kernel parameter
+ * Free ELF core header (new kernel)
*/
-static int setup_kdump_elfcorehdr(void)
+void elfcorehdr_free(unsigned long long addr)
{
- size_t elfcorebuf_sz;
- char *elfcorebuf;
+ if (!elfcorehdr_newmem)
+ return;
+ kfree((void *)(unsigned long)addr);
+}
- if (!OLDMEM_BASE || is_kdump_kernel())
- return -EINVAL;
- s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz);
- elfcorehdr_addr = (unsigned long long) elfcorebuf;
- elfcorehdr_size = elfcorebuf_sz;
- return 0;
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+
+ src = elfcorehdr_newmem ? src : src - OLDMEM_BASE;
+ memcpy(buf, src, count);
+ *ppos += count;
+ return count;
}
-subsys_initcall(setup_kdump_elfcorehdr);
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+ int rc;
+
+ if (elfcorehdr_newmem) {
+ memcpy(buf, src, count);
+ } else {
+ rc = copy_from_oldmem(buf, src, count);
+ if (rc)
+ return rc;
+ }
+ *ppos += count;
+ return count;
+}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 298297477257..99e7f6035895 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -40,14 +40,15 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
{
struct stack_frame *sf;
struct pt_regs *regs;
+ unsigned long addr;
while (1) {
sp = sp & PSW_ADDR_INSN;
if (sp < low || sp > high - sizeof(*sf))
return sp;
sf = (struct stack_frame *) sp;
- printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN);
- print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN);
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
/* Follow the backchain. */
while (1) {
low = sp;
@@ -57,16 +58,16 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
if (sp <= low || sp > high - sizeof(*sf))
return sp;
sf = (struct stack_frame *) sp;
- printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN);
- print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN);
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
}
/* Zero backchain detected, check for interrupt frame. */
sp = (unsigned long) (sf + 1);
if (sp <= low || sp > high - sizeof(*regs))
return sp;
regs = (struct pt_regs *) sp;
- printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN);
+ addr = regs->psw.addr & PSW_ADDR_INSN;
+ printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
low = sp;
sp = regs->gprs[15];
}
@@ -74,6 +75,8 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
static void show_trace(struct task_struct *task, unsigned long *stack)
{
+ const unsigned long frame_size =
+ STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
register unsigned long __r15 asm ("15");
unsigned long sp;
@@ -82,11 +85,13 @@ static void show_trace(struct task_struct *task, unsigned long *stack)
sp = task ? task->thread.ksp : __r15;
printk("Call Trace:\n");
#ifdef CONFIG_CHECK_STACK
- sp = __show_trace(sp, S390_lowcore.panic_stack - 4096,
- S390_lowcore.panic_stack);
+ sp = __show_trace(sp,
+ S390_lowcore.panic_stack + frame_size - 4096,
+ S390_lowcore.panic_stack + frame_size);
#endif
- sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE,
- S390_lowcore.async_stack);
+ sp = __show_trace(sp,
+ S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+ S390_lowcore.async_stack + frame_size);
if (task)
__show_trace(sp, (unsigned long) task_stack_page(task),
(unsigned long) task_stack_page(task) + THREAD_SIZE);
@@ -124,8 +129,7 @@ static void show_last_breaking_event(struct pt_regs *regs)
{
#ifdef CONFIG_64BIT
printk("Last Breaking-Event-Address:\n");
- printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN);
+ printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
#endif
}
@@ -139,10 +143,10 @@ void show_registers(struct pt_regs *regs)
char *mode;
mode = user_mode(regs) ? "User" : "Krnl";
- printk("%s PSW : %p %p",
+ printk("%s PSW : %p %p (%pSR)\n",
mode, (void *) regs->psw.mask,
+ (void *) regs->psw.addr,
(void *) regs->psw.addr);
- print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN);
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
"P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 4d5e6f8a7978..cc30d1fb000c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -18,6 +18,7 @@
#include <asm/unistd.h>
#include <asm/page.h>
#include <asm/sigp.h>
+#include <asm/irq.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 4
@@ -429,11 +430,24 @@ io_skip:
stm %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
stm %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+io_loop:
l %r1,BASED(.Ldo_IRQ)
lr %r2,%r11 # pass pointer to pt_regs
+ lhi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz io_call
+ lhi %r3,THIN_INTERRUPT
+io_call:
basr %r14,%r1 # call do_IRQ
+ tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR
+ jz io_return
+ tpi 0
+ jz io_return
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ j io_loop
io_return:
LOCKDEP_SYS_EXIT
TRACE_IRQS_ON
@@ -573,12 +587,13 @@ ext_skip:
stm %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
stm %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+ mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
TRACE_IRQS_OFF
+ l %r1,BASED(.Ldo_IRQ)
lr %r2,%r11 # pass pointer to pt_regs
- l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code
- l %r4,__LC_EXT_PARAMS # get external parameters
- l %r1,BASED(.Ldo_extint)
- basr %r14,%r1 # call do_extint
+ lhi %r3,EXT_INTERRUPT
+ basr %r14,%r1 # call do_IRQ
j io_return
/*
@@ -871,13 +886,13 @@ cleanup_idle:
stm %r9,%r10,__LC_SYSTEM_TIMER
mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
# prepare return psw
- n %r8,BASED(cleanup_idle_wait) # clear wait state bit
+ n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits
l %r9,24(%r11) # return from psw_idle
br %r14
cleanup_idle_insn:
.long psw_idle_lpsw + 0x80000000
cleanup_idle_wait:
- .long 0xfffdffff
+ .long 0xfcfdffff
/*
* Integer constants
@@ -894,7 +909,6 @@ cleanup_idle_wait:
.Ldo_machine_check: .long s390_do_machine_check
.Lhandle_mcck: .long s390_handle_mcck
.Ldo_IRQ: .long do_IRQ
-.Ldo_extint: .long do_extint
.Ldo_signal: .long do_signal
.Ldo_notify_resume: .long do_notify_resume
.Ldo_per_trap: .long do_per_trap
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index aa0ab02e9595..e9b04c33d383 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -53,27 +53,21 @@ void handle_signal32(unsigned long sig, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
void do_notify_resume(struct pt_regs *regs);
-struct ext_code;
-void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long);
+void __init init_IRQ(void);
+void do_IRQ(struct pt_regs *regs, int irq);
void do_restart(void);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
-
+int setup_profiling_timer(unsigned int multiplier);
void __init time_init(void);
+int pfn_is_nosave(unsigned long);
+void s390_early_resume(void);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
struct old_sigaction;
-long sys_mmap2(struct s390_mmap_arg_struct __user *arg);
-long sys_s390_ipc(uint call, int first, unsigned long second,
- unsigned long third, void __user *ptr);
-long sys_s390_personality(unsigned int personality);
-long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
- size_t len, int advice);
-long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
-long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high,
- u32 len_low);
long sys_sigreturn(void);
long sys_rt_sigreturn(void);
long sys32_sigreturn(void);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 4c17eece707e..2b2188b97c6a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -19,6 +19,7 @@
#include <asm/unistd.h>
#include <asm/page.h>
#include <asm/sigp.h>
+#include <asm/irq.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 8
@@ -47,7 +48,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_MCCK_PENDING)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
-_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
#define BASED(name) name-system_call(%r13)
@@ -81,23 +81,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
#endif
.endm
- .macro HANDLE_SIE_INTERCEPT scratch,pgmcheck
+ .macro HANDLE_SIE_INTERCEPT scratch,reason
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
tmhh %r8,0x0001 # interrupting from user ?
- jnz .+42
+ jnz .+62
lgr \scratch,%r9
- slg \scratch,BASED(.Lsie_loop)
- clg \scratch,BASED(.Lsie_length)
- .if \pgmcheck
+ slg \scratch,BASED(.Lsie_critical)
+ clg \scratch,BASED(.Lsie_critical_length)
+ .if \reason==1
# Some program interrupts are suppressing (e.g. protection).
# We must also check the instruction after SIE in that case.
# do_protection_exception will rewind to rewind_pad
- jh .+22
+ jh .+42
.else
- jhe .+22
+ jhe .+42
.endif
- lg %r9,BASED(.Lsie_loop)
- LPP BASED(.Lhost_id) # set host id
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ LPP __SF_EMPTY+16(%r15) # set host id
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ mvi __SF_EMPTY+31(%r15),\reason # set exit reason
#endif
.endm
@@ -450,7 +454,7 @@ ENTRY(io_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_IO_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,2
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
tmhh %r8,0x0001 # interrupting from user?
jz io_skip
@@ -460,10 +464,23 @@ io_skip:
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+io_loop:
lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz io_call
+ lghi %r3,THIN_INTERRUPT
+io_call:
brasl %r14,do_IRQ
+ tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR
+ jz io_return
+ tpi 0
+ jz io_return
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ j io_loop
io_return:
LOCKDEP_SYS_EXIT
TRACE_IRQS_ON
@@ -595,7 +612,7 @@ ENTRY(ext_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_EXT_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,3
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
tmhh %r8,0x0001 # interrupting from user ?
jz ext_skip
@@ -605,14 +622,15 @@ ext_skip:
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
+ lghi %r1,__LC_EXT_PARAMS2
+ mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+ mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+ mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lghi %r1,4096
lgr %r2,%r11 # pass pointer to pt_regs
- llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code
- llgf %r4,__LC_EXT_PARAMS # get external parameter
- lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter
- brasl %r14,do_extint
+ lghi %r3,EXT_INTERRUPT
+ brasl %r14,do_IRQ
j io_return
/*
@@ -643,7 +661,7 @@ ENTRY(mcck_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_MCK_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,4
tm __LC_MCCK_CODE,0x80 # system damage?
jo mcck_panic # yes -> rest of mcck code invalid
lghi %r14,__LC_CPU_TIMER_SAVE_AREA
@@ -911,7 +929,7 @@ cleanup_idle:
stg %r9,__LC_SYSTEM_TIMER
mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
# prepare return psw
- nihh %r8,0xfffd # clear wait state bit
+ nihh %r8,0xfcfd # clear irq & wait state bits
lg %r9,48(%r11) # return from psw_idle
br %r14
cleanup_idle_insn:
@@ -937,56 +955,50 @@ ENTRY(sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
stg %r2,__SF_EMPTY(%r15) # save control block pointer
stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
- xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0
+ xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
lmg %r0,%r13,0(%r3) # load guest gprs 0-13
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions in the sie_loop should not cause program interrupts. So
-# lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-rewind_pad:
- nop 0
-sie_loop:
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
- tm __TI_flags+7(%r14),_TIF_EXIT_SIE
- jnz sie_exit
lg %r14,__LC_GMAP # get gmap pointer
ltgr %r14,%r14
jz sie_gmap
lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
sie_gmap:
lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),1 # last exit...
+ jnz sie_done
LPP __SF_EMPTY(%r15) # set guest id
sie 0(%r14)
sie_done:
LPP __SF_EMPTY+16(%r15) # set host id
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
-sie_exit:
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions beween sie64a and sie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also HANDLE_SIE_INTERCEPT
+rewind_pad:
+ nop 0
+ .globl sie_exit
+sie_exit:
lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lghi %r2,0
+ lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
br %r14
sie_fault:
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
- lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
- stmg %r0,%r13,0(%r14) # save guest gprs 0-13
- lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lghi %r2,-EFAULT
- br %r14
+ lghi %r14,-EFAULT
+ stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
+ j sie_exit
.align 8
-.Lsie_loop:
- .quad sie_loop
-.Lsie_length:
- .quad sie_done - sie_loop
-.Lhost_id:
- .quad 0
+.Lsie_critical:
+ .quad sie_gmap
+.Lsie_critical_length:
+ .quad sie_done - sie_gmap
EX_TABLE(rewind_pad,sie_fault)
- EX_TABLE(sie_loop,sie_fault)
+ EX_TABLE(sie_exit,sie_fault)
#endif
.section .rodata, "a"
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index e3043aef87a9..1014ad5f7693 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -15,6 +15,7 @@
#include <linux/kprobes.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
+#include "entry.h"
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -177,7 +178,7 @@ int ftrace_enable_ftrace_graph_caller(void)
offset = ((void *) prepare_ftrace_return -
(void *) ftrace_graph_caller) / 2;
- return probe_kernel_write(ftrace_graph_caller + 2,
+ return probe_kernel_write((void *) ftrace_graph_caller + 2,
&offset, sizeof(offset));
}
@@ -185,7 +186,7 @@ int ftrace_disable_ftrace_graph_caller(void)
{
static unsigned short offset = 0x0002;
- return probe_kernel_write(ftrace_graph_caller + 2,
+ return probe_kernel_write((void *) ftrace_graph_caller + 2,
&offset, sizeof(offset));
}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index d8a6a385d048..feb719d3c851 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -754,9 +754,9 @@ static struct bin_attribute sys_reipl_fcp_scp_data_attr = {
.write = reipl_fcp_scpdata_write,
};
-DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
reipl_block_fcp->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
reipl_block_fcp->ipl_info.fcp.lun);
DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
reipl_block_fcp->ipl_info.fcp.bootprog);
@@ -1323,9 +1323,9 @@ static struct shutdown_action __refdata reipl_action = {
/* FCP dump device attributes */
-DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
dump_block_fcp->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
dump_block_fcp->ipl_info.fcp.lun);
DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
dump_block_fcp->ipl_info.fcp.bootprog);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f7fb58903f6a..8ac2097f13d4 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -22,6 +22,7 @@
#include <asm/cputime.h>
#include <asm/lowcore.h>
#include <asm/irq.h>
+#include <asm/hw_irq.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -42,9 +43,10 @@ struct irq_class {
* Since the external and I/O interrupt fields are already sums we would end
* up with having a sum which accounts each interrupt twice.
*/
-static const struct irq_class irqclass_main_desc[NR_IRQS] = {
- [EXTERNAL_INTERRUPT] = {.name = "EXT"},
- [IO_INTERRUPT] = {.name = "I/O"}
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+ [EXT_INTERRUPT] = {.name = "EXT"},
+ [IO_INTERRUPT] = {.name = "I/O"},
+ [THIN_INTERRUPT] = {.name = "AIO"},
};
/*
@@ -86,6 +88,28 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
[CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
};
+void __init init_IRQ(void)
+{
+ irq_reserve_irqs(0, THIN_INTERRUPT);
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+ struct pt_regs *old_regs;
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+ if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ generic_handle_irq(irq);
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
/*
* show_interrupts is needed by /proc/interrupts.
*/
@@ -100,27 +124,36 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(cpu)
seq_printf(p, "CPU%d ", cpu);
seq_putc(p, '\n');
+ goto out;
}
if (irq < NR_IRQS) {
+ if (irq >= NR_IRQS_BASE)
+ goto out;
seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_putc(p, '\n');
- goto skip_arch_irqs;
+ goto out;
}
for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]);
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat, cpu).irqs[irq]);
if (irqclass_sub_desc[irq].desc)
seq_printf(p, " %s", irqclass_sub_desc[irq].desc);
seq_putc(p, '\n');
}
-skip_arch_irqs:
+out:
put_online_cpus();
return 0;
}
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+ return 0;
+}
+
/*
* Switch to the asynchronous interrupt stack for softirq execution.
*/
@@ -159,41 +192,27 @@ asmlinkage void do_softirq(void)
local_irq_restore(flags);
}
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
- if (proc_mkdir("irq", NULL))
- create_prof_cpu_mask();
-}
-#endif
-
/*
* ext_int_hash[index] is the list head for all external interrupts that hash
* to this index.
*/
-static struct list_head ext_int_hash[256];
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
struct ext_int_info {
ext_int_handler_t handler;
- u16 code;
- struct list_head entry;
+ struct hlist_node entry;
struct rcu_head rcu;
+ u16 code;
};
/* ext_int_hash_lock protects the handler lists for external interrupts */
-DEFINE_SPINLOCK(ext_int_hash_lock);
-
-static void __init init_external_interrupts(void)
-{
- int idx;
-
- for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
- INIT_LIST_HEAD(&ext_int_hash[idx]);
-}
+static DEFINE_SPINLOCK(ext_int_hash_lock);
static inline int ext_hash(u16 code)
{
- return (code + (code >> 9)) & 0xff;
+ BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+ return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
}
int register_external_interrupt(u16 code, ext_int_handler_t handler)
@@ -210,7 +229,7 @@ int register_external_interrupt(u16 code, ext_int_handler_t handler)
index = ext_hash(code);
spin_lock_irqsave(&ext_int_hash_lock, flags);
- list_add_rcu(&p->entry, &ext_int_hash[index]);
+ hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
}
@@ -223,9 +242,9 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
int index = ext_hash(code);
spin_lock_irqsave(&ext_int_hash_lock, flags);
- list_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
if (p->code == code && p->handler == handler) {
- list_del_rcu(&p->entry);
+ hlist_del_rcu(&p->entry);
kfree_rcu(p, rcu);
}
}
@@ -234,80 +253,64 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
}
EXPORT_SYMBOL(unregister_external_interrupt);
-void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code,
- unsigned int param32, unsigned long param64)
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
{
- struct pt_regs *old_regs;
+ struct pt_regs *regs = get_irq_regs();
+ struct ext_code ext_code;
struct ext_int_info *p;
int index;
- old_regs = set_irq_regs(regs);
- irq_enter();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) {
- /* Serve timer interrupts first. */
- clock_comparator_work();
- }
- kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL);
+ ext_code = *(struct ext_code *) &regs->int_code;
if (ext_code.code != 0x1004)
__get_cpu_var(s390_idle).nohz_delay = 1;
index = ext_hash(ext_code.code);
rcu_read_lock();
- list_for_each_entry_rcu(p, &ext_int_hash[index], entry)
- if (likely(p->code == ext_code.code))
- p->handler(ext_code, param32, param64);
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (unlikely(p->code != ext_code.code))
+ continue;
+ p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+ }
rcu_read_unlock();
- irq_exit();
- set_irq_regs(old_regs);
-}
-
-void __init init_IRQ(void)
-{
- init_external_interrupts();
+ return IRQ_HANDLED;
}
-static DEFINE_SPINLOCK(sc_irq_lock);
-static int sc_irq_refcount;
+static struct irqaction external_interrupt = {
+ .name = "EXT",
+ .handler = do_ext_interrupt,
+};
-void service_subclass_irq_register(void)
+void __init init_ext_interrupts(void)
{
- spin_lock(&sc_irq_lock);
- if (!sc_irq_refcount)
- ctl_set_bit(0, 9);
- sc_irq_refcount++;
- spin_unlock(&sc_irq_lock);
-}
-EXPORT_SYMBOL(service_subclass_irq_register);
+ int idx;
-void service_subclass_irq_unregister(void)
-{
- spin_lock(&sc_irq_lock);
- sc_irq_refcount--;
- if (!sc_irq_refcount)
- ctl_clear_bit(0, 9);
- spin_unlock(&sc_irq_lock);
+ for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+ INIT_HLIST_HEAD(&ext_int_hash[idx]);
+
+ irq_set_chip_and_handler(EXT_INTERRUPT,
+ &dummy_irq_chip, handle_percpu_irq);
+ setup_irq(EXT_INTERRUPT, &external_interrupt);
}
-EXPORT_SYMBOL(service_subclass_irq_unregister);
-static DEFINE_SPINLOCK(ma_subclass_lock);
-static int ma_subclass_refcount;
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
-void measurement_alert_subclass_register(void)
+void irq_subclass_register(enum irq_subclass subclass)
{
- spin_lock(&ma_subclass_lock);
- if (!ma_subclass_refcount)
- ctl_set_bit(0, 5);
- ma_subclass_refcount++;
- spin_unlock(&ma_subclass_lock);
+ spin_lock(&irq_subclass_lock);
+ if (!irq_subclass_refcount[subclass])
+ ctl_set_bit(0, subclass);
+ irq_subclass_refcount[subclass]++;
+ spin_unlock(&irq_subclass_lock);
}
-EXPORT_SYMBOL(measurement_alert_subclass_register);
+EXPORT_SYMBOL(irq_subclass_register);
-void measurement_alert_subclass_unregister(void)
+void irq_subclass_unregister(enum irq_subclass subclass)
{
- spin_lock(&ma_subclass_lock);
- ma_subclass_refcount--;
- if (!ma_subclass_refcount)
- ctl_clear_bit(0, 5);
- spin_unlock(&ma_subclass_lock);
+ spin_lock(&irq_subclass_lock);
+ irq_subclass_refcount[subclass]--;
+ if (!irq_subclass_refcount[subclass])
+ ctl_clear_bit(0, subclass);
+ spin_unlock(&irq_subclass_lock);
}
-EXPORT_SYMBOL(measurement_alert_subclass_unregister);
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 3388b2b2a07d..0ce9fb245034 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -37,6 +37,26 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = { };
+DEFINE_INSN_CACHE_OPS(dmainsn);
+
+static void *alloc_dmainsn_page(void)
+{
+ return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+}
+
+static void free_dmainsn_page(void *page)
+{
+ free_page((unsigned long)page);
+}
+
+struct kprobe_insn_cache kprobe_dmainsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
+ .alloc = alloc_dmainsn_page,
+ .free = free_dmainsn_page,
+ .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+ .insn_size = MAX_INSN_SIZE,
+};
+
static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
{
switch (insn[0] >> 8) {
@@ -100,35 +120,161 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xc0:
- if ((insn[0] & 0x0f) == 0x00 || /* larl */
- (insn[0] & 0x0f) == 0x05) /* brasl */
- fixup |= FIXUP_RETURN_REGISTER;
+ if ((insn[0] & 0x0f) == 0x05) /* brasl */
+ fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xeb:
- if ((insn[2] & 0xff) == 0x44 || /* bxhg */
- (insn[2] & 0xff) == 0x45) /* bxleg */
+ switch (insn[2] & 0xff) {
+ case 0x44: /* bxhg */
+ case 0x45: /* bxleg */
fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
break;
case 0xe3: /* bctg */
if ((insn[2] & 0xff) == 0x46)
fixup = FIXUP_BRANCH_NOT_TAKEN;
break;
+ case 0xec:
+ switch (insn[2] & 0xff) {
+ case 0xe5: /* clgrb */
+ case 0xe6: /* cgrb */
+ case 0xf6: /* crb */
+ case 0xf7: /* clrb */
+ case 0xfc: /* cgib */
+ case 0xfd: /* cglib */
+ case 0xfe: /* cib */
+ case 0xff: /* clib */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
+ break;
}
return fixup;
}
+static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
+{
+ /* Check if we have a RIL-b or RIL-c format instruction which
+ * we need to modify in order to avoid instruction emulation. */
+ switch (insn[0] >> 8) {
+ case 0xc0:
+ if ((insn[0] & 0x0f) == 0x00) /* larl */
+ return true;
+ break;
+ case 0xc4:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* llhrl */
+ case 0x04: /* lghrl */
+ case 0x05: /* lhrl */
+ case 0x06: /* llghrl */
+ case 0x07: /* sthrl */
+ case 0x08: /* lgrl */
+ case 0x0b: /* stgrl */
+ case 0x0c: /* lgfrl */
+ case 0x0d: /* lrl */
+ case 0x0e: /* llgfrl */
+ case 0x0f: /* strl */
+ return true;
+ }
+ break;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x00: /* exrl */
+ case 0x02: /* pfdrl */
+ case 0x04: /* cghrl */
+ case 0x05: /* chrl */
+ case 0x06: /* clghrl */
+ case 0x07: /* clhrl */
+ case 0x08: /* cgrl */
+ case 0x0a: /* clgrl */
+ case 0x0c: /* cgfrl */
+ case 0x0d: /* crl */
+ case 0x0e: /* clgfrl */
+ case 0x0f: /* clrl */
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+
+static void __kprobes copy_instruction(struct kprobe *p)
+{
+ s64 disp, new_disp;
+ u64 addr, new_addr;
+
+ memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
+ if (!is_insn_relative_long(p->ainsn.insn))
+ return;
+ /*
+ * For pc-relative instructions in RIL-b or RIL-c format patch the
+ * RI2 displacement field. We have already made sure that the insn
+ * slot for the patched instruction is within the same 2GB area
+ * as the original instruction (either kernel image or module area).
+ * Therefore the new displacement will always fit.
+ */
+ disp = *(s32 *)&p->ainsn.insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&p->ainsn.insn[1] = new_disp;
+}
+
+static inline int is_kernel_addr(void *addr)
+{
+ return addr < (void *)_end;
+}
+
+static inline int is_module_addr(void *addr)
+{
+#ifdef CONFIG_64BIT
+ BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+ if (addr < (void *)MODULES_VADDR)
+ return 0;
+ if (addr > (void *)MODULES_END)
+ return 0;
+#endif
+ return 1;
+}
+
+static int __kprobes s390_get_insn_slot(struct kprobe *p)
+{
+ /*
+ * Get an insn slot that is within the same 2GB area like the original
+ * instruction. That way instructions with a 32bit signed displacement
+ * field can be patched and executed within the insn slot.
+ */
+ p->ainsn.insn = NULL;
+ if (is_kernel_addr(p->addr))
+ p->ainsn.insn = get_dmainsn_slot();
+ if (is_module_addr(p->addr))
+ p->ainsn.insn = get_insn_slot();
+ return p->ainsn.insn ? 0 : -ENOMEM;
+}
+
+static void __kprobes s390_free_insn_slot(struct kprobe *p)
+{
+ if (!p->ainsn.insn)
+ return;
+ if (is_kernel_addr(p->addr))
+ free_dmainsn_slot(p->ainsn.insn, 0);
+ else
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+}
+
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
if ((unsigned long) p->addr & 0x01)
return -EINVAL;
-
/* Make sure the probe isn't going on a difficult instruction */
if (is_prohibited_opcode(p->addr))
return -EINVAL;
-
+ if (s390_get_insn_slot(p))
+ return -ENOMEM;
p->opcode = *p->addr;
- memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
-
+ copy_instruction(p);
return 0;
}
@@ -169,6 +315,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
+ s390_free_insn_slot(p);
}
static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ac2178161ec3..719e27b2cf22 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -50,7 +50,7 @@ static void add_elf_notes(int cpu)
/*
* Initialize CPU ELF notes
*/
-void setup_regs(void)
+static void setup_regs(void)
{
unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
int cpu, this_cpu;
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 504175ebf8b0..c4c033819879 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
: "0", "cc");
#endif
/* Revalidate clock comparator register */
- if (S390_lowcore.clock_comparator == -1)
- set_clock_comparator(S390_lowcore.mcck_clock);
- else
- set_clock_comparator(S390_lowcore.clock_comparator);
+ set_clock_comparator(S390_lowcore.clock_comparator);
/* Check if old PSW is valid */
if (!mci->wp)
/*
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 390d9ae57bb2..1105502bf6e9 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -274,7 +274,7 @@ static int reserve_pmc_hardware(void)
int flags = PMC_INIT;
on_each_cpu(setup_pmc_cpu, &flags, 1);
- measurement_alert_subclass_register();
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
return 0;
}
@@ -285,7 +285,7 @@ static void release_pmc_hardware(void)
int flags = PMC_RELEASE;
on_each_cpu(setup_pmc_cpu, &flags, 1);
- measurement_alert_subclass_unregister();
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
}
/* Release the PMU if event is the last perf event */
@@ -639,8 +639,8 @@ static struct pmu cpumf_pmu = {
.cancel_txn = cpumf_pmu_cancel_txn,
};
-static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (long) hcpu;
int flags;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index f58f37f66824..2343c218b8f9 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
#include <linux/percpu.h>
#include <linux/export.h>
#include <asm/irq.h>
@@ -39,6 +40,58 @@ int perf_num_counters(void)
}
EXPORT_SYMBOL(perf_num_counters);
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+ struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+ if (!stack)
+ return NULL;
+
+ return (struct kvm_s390_sie_block *) stack->empty1[0];
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return false;
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+ return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+ return false;
+#endif
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ return is_in_guest(regs) ? instruction_pointer_guest(regs)
+ : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+ return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ if (is_in_guest(regs))
+ return perf_misc_guest_flags(regs);
+
+ return user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+}
+
void perf_event_print_debug(void)
{
struct cpumf_ctr_info cf_info;
@@ -52,13 +105,10 @@ void perf_event_print_debug(void)
cpu = smp_processor_id();
memset(&cf_info, 0, sizeof(cf_info));
- if (!qctri(&cf_info)) {
+ if (!qctri(&cf_info))
pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
cpu, cf_info.cfvn, cf_info.csvn,
cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
- print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET,
- &cf_info, sizeof(cf_info));
- }
local_irq_restore(flags);
}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 2bc3eddae34a..c5dbb335716d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -71,6 +71,7 @@ void arch_cpu_idle(void)
}
/* Halt the cpu and keep track of cpu time accounting. */
vtime_stop_cpu();
+ local_irq_enable();
}
void arch_cpu_idle_exit(void)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 753c41d0ffd3..24612029f450 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id);
/*
* cpu_init - initializes state that is per-CPU.
*/
-void __cpuinit cpu_init(void)
+void cpu_init(void)
{
struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
struct cpuid *id = &__get_cpu_var(cpu_id);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index a314c57f4e94..9556905bd3ce 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -47,7 +47,7 @@ enum s390_regset {
REGSET_GENERAL_EXTENDED,
};
-void update_per_regs(struct task_struct *task)
+void update_cr_regs(struct task_struct *task)
{
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
@@ -56,17 +56,25 @@ void update_per_regs(struct task_struct *task)
#ifdef CONFIG_64BIT
/* Take care of the enable/disable of transactional execution. */
if (MACHINE_HAS_TE) {
- unsigned long cr0, cr0_new;
+ unsigned long cr[3], cr_new[3];
- __ctl_store(cr0, 0, 0);
- /* set or clear transaction execution bits 8 and 9. */
+ __ctl_store(cr, 0, 2);
+ cr_new[1] = cr[1];
+ /* Set or clear transaction execution TXC bit 8. */
if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr0_new = cr0 & ~(3UL << 54);
+ cr_new[0] = cr[0] & ~(1UL << 55);
else
- cr0_new = cr0 | (3UL << 54);
- /* Only load control register 0 if necessary. */
- if (cr0 != cr0_new)
- __ctl_load(cr0_new, 0, 0);
+ cr_new[0] = cr[0] | (1UL << 55);
+ /* Set or clear transaction execution TDC bits 62 and 63. */
+ cr_new[2] = cr[2] & ~3UL;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+ cr_new[2] |= 1UL;
+ else
+ cr_new[2] |= 2UL;
+ }
+ if (memcmp(&cr_new, &cr, sizeof(cr)))
+ __ctl_load(cr_new, 0, 2);
}
#endif
/* Copy user specified PER registers */
@@ -100,14 +108,14 @@ void user_enable_single_step(struct task_struct *task)
{
set_tsk_thread_flag(task, TIF_SINGLE_STEP);
if (task == current)
- update_per_regs(task);
+ update_cr_regs(task);
}
void user_disable_single_step(struct task_struct *task)
{
clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
if (task == current)
- update_per_regs(task);
+ update_cr_regs(task);
}
/*
@@ -447,6 +455,26 @@ long arch_ptrace(struct task_struct *child, long request,
if (!MACHINE_HAS_TE)
return -EIO;
child->thread.per_flags |= PER_FLAG_NO_TE;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ return 0;
+ case PTRACE_TE_ABORT_RAND:
+ if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ return -EIO;
+ switch (data) {
+ case 0UL:
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ break;
+ case 1UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ case 2UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ default:
+ return -EINVAL;
+ }
return 0;
default:
/* Removing high order bit from addr (only for 31 bit). */
@@ -1271,7 +1299,7 @@ int regs_query_register_offset(const char *name)
if (!name || *name != 'r')
return -EINVAL;
- if (strict_strtoul(name + 1, 10, &offset))
+ if (kstrtoul(name + 1, 10, &offset))
return -EINVAL;
if (offset >= NUM_GPRS)
return -EINVAL;
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 077a99389b07..e1c9d1c292fa 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -139,10 +139,10 @@ static int __init runtime_instr_init(void)
if (!runtime_instr_avail())
return 0;
- measurement_alert_subclass_register();
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
rc = register_external_interrupt(0x1407, runtime_instr_int_handler);
if (rc)
- measurement_alert_subclass_unregister();
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
else
pr_info("Runtime instrumentation facility initialized\n");
return rc;
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 9bdbcef1da9e..3bac589844a7 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount);
#endif
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
EXPORT_SYMBOL(sie64a);
+EXPORT_SYMBOL(sie_exit);
#endif
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
index b6506ee32a36..29bd7bec4176 100644
--- a/arch/s390/kernel/sclp.S
+++ b/arch/s390/kernel/sclp.S
@@ -225,7 +225,7 @@ _sclp_print:
ahi %r2,1
ltr %r0,%r0 # end of string?
jz .LfinalizemtoS4
- chi %r0,0x15 # end of line (NL)?
+ chi %r0,0x0a # end of line (NL)?
jz .LfinalizemtoS4
stc %r0,0(%r6,%r7) # copy to mto
la %r11,0(%r6,%r7)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 0a49095104c9..aeed8a61fa0d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -719,10 +719,6 @@ static void reserve_oldmem(void)
}
create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE);
create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE);
- if (OLDMEM_BASE + OLDMEM_SIZE == real_size)
- saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
- else
- saved_max_pfn = PFN_DOWN(real_size) - 1;
#endif
}
@@ -998,6 +994,7 @@ static void __init setup_hwcaps(void)
strcpy(elf_platform, "z196");
break;
case 0x2827:
+ case 0x2828:
strcpy(elf_platform, "zEC12");
break;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 4f977d0d25c2..1a4313a1b60f 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -49,7 +49,6 @@
enum {
ec_schedule = 0,
- ec_call_function,
ec_call_function_single,
ec_stop_cpu,
};
@@ -166,7 +165,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
pcpu_sigp_retry(pcpu, order, 0);
}
-static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
struct _lowcore *lc;
@@ -363,7 +362,7 @@ void smp_yield_cpu(int cpu)
* Send cpus emergency shutdown signal. This gives the cpus the
* opportunity to complete outstanding interrupts.
*/
-void smp_emergency_stop(cpumask_t *cpumask)
+static void smp_emergency_stop(cpumask_t *cpumask)
{
u64 end;
int cpu;
@@ -438,8 +437,6 @@ static void smp_handle_ext_call(void)
smp_stop_cpu();
if (test_bit(ec_schedule, &bits))
scheduler_ipi();
- if (test_bit(ec_call_function, &bits))
- generic_smp_call_function_interrupt();
if (test_bit(ec_call_function_single, &bits))
generic_smp_call_function_single_interrupt();
}
@@ -456,7 +453,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function);
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
@@ -619,10 +616,9 @@ static struct sclp_cpu_info *smp_get_cpu_info(void)
return info;
}
-static int __cpuinit smp_add_present_cpu(int cpu);
+static int smp_add_present_cpu(int cpu);
-static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info,
- int sysfs_add)
+static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
{
struct pcpu *pcpu;
cpumask_t avail;
@@ -688,7 +684,7 @@ static void __init smp_detect_cpus(void)
/*
* Activate a secondary processor.
*/
-static void __cpuinit smp_start_secondary(void *cpuvoid)
+static void smp_start_secondary(void *cpuvoid)
{
S390_lowcore.last_update_clock = get_tod_clock();
S390_lowcore.restart_stack = (unsigned long) restart_stack;
@@ -711,7 +707,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid)
}
/* Upping and downing of CPUs */
-int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
struct pcpu *pcpu;
int rc;
@@ -967,8 +963,8 @@ static struct attribute_group cpu_online_attr_group = {
.attrs = cpu_online_attrs,
};
-static int __cpuinit smp_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
struct cpu *c = &pcpu_devices[cpu].cpu;
@@ -986,7 +982,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
return notifier_from_errno(err);
}
-static int __cpuinit smp_add_present_cpu(int cpu)
+static int smp_add_present_cpu(int cpu)
{
struct cpu *c = &pcpu_devices[cpu].cpu;
struct device *s = &c->dev;
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index c479d2f9605b..a7a7537ce1e7 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -10,6 +10,10 @@
#include <linux/suspend.h>
#include <linux/mm.h>
#include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/pci.h>
+#include "entry.h"
/*
* References to section boundaries
@@ -211,3 +215,11 @@ void restore_processor_state(void)
__ctl_set_bit(0,28);
local_mcck_enable();
}
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+ lgr_info_log();
+ channel_subsystem_reinit();
+ zpci_rescan();
+}
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index c487be4cfc81..6b09fdffbd2f 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -281,11 +281,8 @@ restore_registers:
lghi %r2,0
brasl %r14,arch_set_page_states
- /* Log potential guest relocation */
- brasl %r14,lgr_info_log
-
- /* Reinitialize the channel subsystem */
- brasl %r14,channel_subsystem_reinit
+ /* Call arch specific early resume code */
+ brasl %r14,s390_early_resume
/* Return 0 */
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 62f89d98e880..811f542b8ed4 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -418,7 +418,7 @@ void s390_adjust_jiffies(void)
/*
* calibrate the delay loop
*/
-void __cpuinit calibrate_delay(void)
+void calibrate_delay(void)
{
s390_adjust_jiffies();
/* Print the good old Bogomips line .. */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 876546b9cfa1..064c3082ab33 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -92,7 +92,6 @@ void clock_comparator_work(void)
struct clock_event_device *cd;
S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
cd = &__get_cpu_var(comparators);
cd->event_handler(cd);
}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index d7776281cb60..05d75c413137 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -63,7 +63,7 @@ static int __init vdso_setup(char *s)
else if (strncmp(s, "off", 4) == 0)
vdso_enabled = 0;
else {
- rc = strict_strtoul(s, 0, &val);
+ rc = kstrtoul(s, 0, &val);
vdso_enabled = rc ? 0 : !!val;
}
return !rc;
@@ -113,11 +113,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
PAGE_SIZE << SEGMENT_ORDER);
- clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) page_table, _PAGE_INVALID,
256*sizeof(unsigned long));
*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
- *(unsigned long *) page_table = _PAGE_RO + page_frame;
+ *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
psal = (u32 *) (page_table + 256*sizeof(unsigned long));
aste = psal + 32;
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 3fb09359eda6..abcfab55f99b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/vtimer.h>
+#include <asm/vtime.h>
#include <asm/irq.h>
#include "entry.h"
@@ -371,14 +372,14 @@ EXPORT_SYMBOL(del_virt_timer);
/*
* Start the virtual CPU timer on the current CPU.
*/
-void __cpuinit init_cpu_vtimer(void)
+void init_cpu_vtimer(void)
{
/* set initial cpu timer */
set_vtimer(VTIMER_MAX_SLICE);
}
-static int __cpuinit s390_nohz_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int s390_nohz_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
struct s390_idle_data *idle;
long cpu = (long) hcpu;
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 8fe9d65a4585..40b4c6470f88 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,7 +6,8 @@
# it under the terms of the GNU General Public License (version 2 only)
# as published by the Free Software Foundation.
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 1c01a9912989..3a74d8af0d69 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -119,12 +119,21 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
* The layout is as follows:
* - gpr 2 contains the subchannel id (passed as addr)
* - gpr 3 contains the virtqueue index (passed as datamatch)
+ * - gpr 4 contains the index on the bus (optionally)
*/
- ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
- vcpu->run->s.regs.gprs[2],
- 8, &vcpu->run->s.regs.gprs[3]);
+ ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+ vcpu->run->s.regs.gprs[2],
+ 8, &vcpu->run->s.regs.gprs[3],
+ vcpu->run->s.regs.gprs[4]);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
- /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */
+
+ /*
+ * Return cookie in gpr 2, but don't overwrite the register if the
+ * diagnose will be handled by userspace.
+ */
+ if (ret != -EOPNOTSUPP)
+ vcpu->run->s.regs.gprs[2] = ret;
+ /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
return ret < 0 ? ret : 0;
}
@@ -132,6 +141,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
{
int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
trace_kvm_s390_handle_diag(vcpu, code);
switch (code) {
case 0x10:
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 302e0e52b009..99d789e8a018 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -42,9 +42,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
({ \
__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
int __mask = sizeof(__typeof__(*(gptr))) - 1; \
- int __ret = PTR_RET((void __force *)__uptr); \
+ int __ret; \
\
- if (!__ret) { \
+ if (IS_ERR((void __force *)__uptr)) { \
+ __ret = PTR_ERR((void __force *)__uptr); \
+ } else { \
BUG_ON((unsigned long)__uptr & __mask); \
__ret = get_user(x, __uptr); \
} \
@@ -55,9 +57,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
({ \
__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
int __mask = sizeof(__typeof__(*(gptr))) - 1; \
- int __ret = PTR_RET((void __force *)__uptr); \
+ int __ret; \
\
- if (!__ret) { \
+ if (IS_ERR((void __force *)__uptr)) { \
+ __ret = PTR_ERR((void __force *)__uptr); \
+ } else { \
BUG_ON((unsigned long)__uptr & __mask); \
__ret = put_user(x, __uptr); \
} \
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index b7d1b2edeeb3..5ee56e5acc23 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -22,87 +22,6 @@
#include "trace.h"
#include "trace-s390.h"
-static int handle_lctlg(struct kvm_vcpu *vcpu)
-{
- int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
- int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
- u64 useraddr;
- int reg, rc;
-
- vcpu->stat.instruction_lctlg++;
-
- useraddr = kvm_s390_get_base_disp_rsy(vcpu);
-
- if (useraddr & 7)
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-
- reg = reg1;
-
- VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
- useraddr);
- trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
-
- do {
- rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
- (u64 __user *) useraddr);
- if (rc)
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- useraddr += 8;
- if (reg == reg3)
- break;
- reg = (reg + 1) % 16;
- } while (1);
- return 0;
-}
-
-static int handle_lctl(struct kvm_vcpu *vcpu)
-{
- int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
- int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
- u64 useraddr;
- u32 val = 0;
- int reg, rc;
-
- vcpu->stat.instruction_lctl++;
-
- useraddr = kvm_s390_get_base_disp_rs(vcpu);
-
- if (useraddr & 3)
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-
- VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
- useraddr);
- trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
-
- reg = reg1;
- do {
- rc = get_guest(vcpu, val, (u32 __user *) useraddr);
- if (rc)
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
- vcpu->arch.sie_block->gcr[reg] |= val;
- useraddr += 4;
- if (reg == reg3)
- break;
- reg = (reg + 1) % 16;
- } while (1);
- return 0;
-}
-
-static const intercept_handler_t eb_handlers[256] = {
- [0x2f] = handle_lctlg,
- [0x8a] = kvm_s390_handle_priv_eb,
-};
-
-static int handle_eb(struct kvm_vcpu *vcpu)
-{
- intercept_handler_t handler;
-
- handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
- if (handler)
- return handler(vcpu);
- return -EOPNOTSUPP;
-}
static const intercept_handler_t instruction_handlers[256] = {
[0x01] = kvm_s390_handle_01,
@@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = {
[0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_b2,
- [0xb7] = handle_lctl,
+ [0xb7] = kvm_s390_handle_lctl,
[0xb9] = kvm_s390_handle_b9,
[0xe5] = kvm_s390_handle_e5,
- [0xeb] = handle_eb,
+ [0xeb] = kvm_s390_handle_eb,
};
static int handle_noop(struct kvm_vcpu *vcpu)
@@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu)
static int handle_validity(struct kvm_vcpu *vcpu)
{
- unsigned long vmaddr;
int viwhy = vcpu->arch.sie_block->ipb >> 16;
- int rc;
vcpu->stat.exit_validity++;
trace_kvm_s390_intercept_validity(vcpu, viwhy);
- if (viwhy == 0x37) {
- vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
- vcpu->arch.gmap);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = -EOPNOTSUPP;
- goto out;
- }
- rc = fault_in_pages_writeable((char __user *) vmaddr,
- PAGE_SIZE);
- if (rc) {
- /* user will receive sigsegv, exit to user */
- rc = -EOPNOTSUPP;
- goto out;
- }
- vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE,
- vcpu->arch.gmap);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = -EOPNOTSUPP;
- goto out;
- }
- rc = fault_in_pages_writeable((char __user *) vmaddr,
- PAGE_SIZE);
- if (rc) {
- /* user will receive sigsegv, exit to user */
- rc = -EOPNOTSUPP;
- goto out;
- }
- } else
- rc = -EOPNOTSUPP;
-
-out:
- if (rc)
- VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
- viwhy);
- return rc;
+ WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
+ return -EOPNOTSUPP;
}
static int handle_instruction(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5c948177529e..7f35cb33e510 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
no_timer:
spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
- add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ add_wait_queue(&vcpu->wq, &wait);
while (list_empty(&vcpu->arch.local_int.list) &&
list_empty(&vcpu->arch.local_int.float_int->list) &&
(!vcpu->arch.local_int.timer_due) &&
@@ -452,7 +452,7 @@ no_timer:
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
@@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm)
spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1;
- if (waitqueue_active(&vcpu->arch.local_int.wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
spin_unlock(&vcpu->arch.local_int.lock);
}
@@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
spin_lock_bh(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
- BUG_ON(waitqueue_active(&li->wq));
+ BUG_ON(waitqueue_active(li->wq));
spin_unlock_bh(&li->lock);
return 0;
}
@@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
li = fi->local_int[sigcpu];
spin_lock_bh(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
@@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
if (inti->type == KVM_S390_SIGP_STOP)
li->action_bits |= ACTION_STOP_ON_STOP;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
spin_unlock_bh(&li->lock);
mutex_unlock(&vcpu->kvm->lock);
return 0;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c1c7c683fa26..776dafe918db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
#include <asm/pgtable.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
+#include <asm/facility.h>
#include <asm/sclp.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -59,6 +60,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
{ "instruction_spx", VCPU_STAT(instruction_spx) },
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
@@ -83,7 +85,14 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-static unsigned long long *facilities;
+unsigned long *vfacilities;
+static struct gmap_notifier gmap_notifier;
+
+/* test availability of vfacility */
+static inline int test_vfacility(unsigned long nr)
+{
+ return __test_facility(nr, (void *) vfacilities);
+}
/* Section: not file related */
int kvm_arch_hardware_enable(void *garbage)
@@ -96,13 +105,18 @@ void kvm_arch_hardware_disable(void *garbage)
{
}
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+
int kvm_arch_hardware_setup(void)
{
+ gmap_notifier.notifier_call = kvm_gmap_notifier;
+ gmap_register_ipte_notifier(&gmap_notifier);
return 0;
}
void kvm_arch_hardware_unsetup(void)
{
+ gmap_unregister_ipte_notifier(&gmap_notifier);
}
void kvm_arch_check_processor_compat(void *rtn)
@@ -239,6 +253,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.gmap = gmap_alloc(current->mm);
if (!kvm->arch.gmap)
goto out_nogmap;
+ kvm->arch.gmap->private = kvm;
}
kvm->arch.css_support = 0;
@@ -270,7 +285,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
- kfree(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -309,6 +324,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.gmap = gmap_alloc(current->mm);
if (!vcpu->arch.gmap)
return -ENOMEM;
+ vcpu->arch.gmap->private = vcpu->kvm;
return 0;
}
@@ -373,10 +389,12 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
- CPUSTAT_STOPPED);
+ CPUSTAT_STOPPED |
+ CPUSTAT_GED);
vcpu->arch.sie_block->ecb = 6;
+ vcpu->arch.sie_block->ecb2 = 8;
vcpu->arch.sie_block->eca = 0xC1002001U;
- vcpu->arch.sie_block->fac = (int) (long) facilities;
+ vcpu->arch.sie_block->fac = (int) (long) vfacilities;
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
(unsigned long) vcpu);
@@ -397,7 +415,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
rc = -ENOMEM;
- vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
goto out;
@@ -427,7 +445,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
spin_lock(&kvm->arch.float_int.lock);
kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
- init_waitqueue_head(&vcpu->arch.local_int.wq);
+ vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
spin_unlock(&kvm->arch.float_int.lock);
@@ -442,7 +460,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
out_free_sie_block:
free_page((unsigned long)(vcpu->arch.sie_block));
out_free_cpu:
- kfree(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
return ERR_PTR(rc);
}
@@ -454,6 +472,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return 0;
}
+void s390_vcpu_block(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+{
+ atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+/*
+ * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * If the CPU is not running (e.g. waiting as idle) the function will
+ * return immediately. */
+void exit_sie(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+ while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
+ cpu_relax();
+}
+
+/* Kick a guest cpu out of SIE and prevent SIE-reentry */
+void exit_sie_sync(struct kvm_vcpu *vcpu)
+{
+ s390_vcpu_block(vcpu);
+ exit_sie(vcpu);
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+{
+ int i;
+ struct kvm *kvm = gmap->private;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /* match against both prefix pages */
+ if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
+ VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ exit_sie_sync(vcpu);
+ }
+ }
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */
@@ -606,6 +668,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
+{
+ /*
+ * We use MMU_RELOAD just to re-arm the ipte notifier for the
+ * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+ * This ensures that the ipte instruction for this request has
+ * already finished. We might race against a second unmapper that
+ * wants to set the blocking bit. Lets just retry the request loop.
+ */
+ while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+ int rc;
+ rc = gmap_ipte_notify(vcpu->arch.gmap,
+ vcpu->arch.sie_block->prefix,
+ PAGE_SIZE * 2);
+ if (rc)
+ return rc;
+ s390_vcpu_unblock(vcpu);
+ }
+ return 0;
+}
+
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc;
@@ -621,16 +704,33 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
if (!kvm_is_ucontrol(vcpu->kvm))
kvm_s390_deliver_pending_interrupts(vcpu);
+ rc = kvm_s390_handle_requests(vcpu);
+ if (rc)
+ return rc;
+
vcpu->arch.sie_block->icptcode = 0;
- preempt_disable();
- kvm_guest_enter();
- preempt_enable();
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
atomic_read(&vcpu->arch.sie_block->cpuflags));
trace_kvm_s390_sie_enter(vcpu,
atomic_read(&vcpu->arch.sie_block->cpuflags));
+
+ /*
+ * As PF_VCPU will be used in fault handler, between guest_enter
+ * and guest_exit should be no uaccess.
+ */
+ preempt_disable();
+ kvm_guest_enter();
+ preempt_enable();
rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
- if (rc) {
+ kvm_guest_exit();
+
+ VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+ vcpu->arch.sie_block->icptcode);
+ trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
+
+ if (rc > 0)
+ rc = 0;
+ if (rc < 0) {
if (kvm_is_ucontrol(vcpu->kvm)) {
rc = SIE_INTERCEPT_UCONTROL;
} else {
@@ -639,10 +739,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
}
- VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
- vcpu->arch.sie_block->icptcode);
- trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
- kvm_guest_exit();
memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
return rc;
@@ -974,6 +1070,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
return 0;
}
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -1040,22 +1140,31 @@ static int __init kvm_s390_init(void)
* to hold the maximum amount of facilities. On the other hand, we
* only set facilities that are known to work in KVM.
*/
- facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
- if (!facilities) {
+ vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
+ if (!vfacilities) {
kvm_exit();
return -ENOMEM;
}
- memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
- facilities[0] &= 0xff00fff3f47c0000ULL;
- facilities[1] &= 0x001c000000000000ULL;
+ memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
+ vfacilities[0] &= 0xff82fff3f47c0000UL;
+ vfacilities[1] &= 0x001c000000000000UL;
return 0;
}
static void __exit kvm_s390_exit(void)
{
- free_page((unsigned long) facilities);
+ free_page((unsigned long) vfacilities);
kvm_exit();
}
module_init(kvm_s390_init);
module_exit(kvm_s390_exit);
+
+/*
+ * Enable autoloading of the kvm module.
+ * Note that we add the module alias here instead of virt/kvm/kvm_main.c
+ * since x86 takes a different approach.
+ */
+#include <linux/miscdevice.h>
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index efc14f687265..dc99f1ca4267 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,6 +24,9 @@
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
+/* declare vfacilities extern */
+extern unsigned long *vfacilities;
+
/* negativ values are error codes, positive values for internal conditions */
#define SIE_INTERCEPT_RERUNVCPU (1<<0)
#define SIE_INTERCEPT_UCONTROL (1<<1)
@@ -63,6 +66,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
{
vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u;
vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
}
static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
@@ -85,6 +89,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
+static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
+{
+ *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+ *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+}
+
static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
{
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
@@ -105,6 +115,13 @@ static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
+/* Set the condition code in the guest program status word */
+static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
+{
+ vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
+ vcpu->arch.sie_block->gpsw.mask |= cc << 44;
+}
+
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
@@ -125,7 +142,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
-int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
@@ -133,6 +151,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
unsigned long addr);
+void s390_vcpu_block(struct kvm_vcpu *vcpu);
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void exit_sie(struct kvm_vcpu *vcpu);
+void exit_sie_sync(struct kvm_vcpu *vcpu);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 6bbd7b5a0bbe..59200ee275e5 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,7 +1,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2013
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -16,10 +16,14 @@
#include <linux/errno.h>
#include <linux/compat.h>
#include <asm/asm-offsets.h>
+#include <asm/facility.h>
#include <asm/current.h>
#include <asm/debug.h>
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/compat.h>
#include "gaccess.h"
@@ -34,6 +38,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_spx++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
@@ -65,6 +72,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stpx++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
@@ -89,6 +99,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stap++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
useraddr = kvm_s390_get_base_disp_s(vcpu);
if (useraddr & 1)
@@ -105,7 +118,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
static int handle_skey(struct kvm_vcpu *vcpu)
{
vcpu->stat.instruction_storage_key++;
- vcpu->arch.sie_block->gpsw.addr -= 4;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ vcpu->arch.sie_block->gpsw.addr =
+ __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return 0;
}
@@ -129,9 +147,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
* Store the two-word I/O interruption code into the
* provided area.
*/
- put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr);
- put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2));
- put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4));
+ if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr)
+ || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2))
+ || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4)))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
} else {
/*
* Store the three-word I/O interruption code into
@@ -145,8 +164,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
kfree(inti);
no_interrupt:
/* Set condition code and we're done. */
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
+ kvm_s390_set_psw_cc(vcpu, cc);
return 0;
}
@@ -182,6 +200,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
if (vcpu->kvm->arch.css_support) {
/*
* Most I/O instructions will be handled by userspace.
@@ -198,27 +219,27 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
* Set condition code 3 to stop the guest from issueing channel
* I/O instructions.
*/
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ kvm_s390_set_psw_cc(vcpu, 3);
return 0;
}
}
static int handle_stfl(struct kvm_vcpu *vcpu)
{
- unsigned int facility_list;
int rc;
vcpu->stat.instruction_stfl++;
- /* only pass the facility bits, which we can handle */
- facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
- &facility_list, sizeof(facility_list));
+ vfacilities, 4);
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list);
- trace_kvm_s390_handle_stfl(vcpu, facility_list);
+ VCPU_EVENT(vcpu, 5, "store facility list value %x",
+ *(unsigned int *) vfacilities);
+ trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
return 0;
}
@@ -255,8 +276,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
u64 addr;
if (gpsw->mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -278,6 +299,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
psw_t new_psw;
u64 addr;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -296,6 +320,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stidp++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
operand2 = kvm_s390_get_base_disp_s(vcpu);
if (operand2 & 7)
@@ -351,16 +378,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stsi++;
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
- operand2 = kvm_s390_get_base_disp_s(vcpu);
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (fc > 3) {
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
- if (operand2 & 0xfff && fc > 0)
+ if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
+ || vcpu->run->s.regs.gprs[1] & 0xffff0000)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- switch (fc) {
- case 0:
+ if (fc == 0) {
vcpu->run->s.regs.gprs[0] = 3 << 28;
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ kvm_s390_set_psw_cc(vcpu, 0);
return 0;
+ }
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
+
+ if (operand2 & 0xfff)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (fc) {
case 1: /* same handling for 1 and 2 */
case 2:
mem = get_zeroed_page(GFP_KERNEL);
@@ -377,8 +418,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
goto out_no_data;
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
- default:
- goto out_no_data;
}
if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
@@ -387,12 +426,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
}
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ kvm_s390_set_psw_cc(vcpu, 0);
vcpu->run->s.regs.gprs[0] = 0;
return 0;
out_no_data:
- /* condition code 3 */
- vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
+ kvm_s390_set_psw_cc(vcpu, 3);
out_exception:
free_page(mem);
return rc;
@@ -432,20 +470,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
intercept_handler_t handler;
/*
- * a lot of B2 instructions are priviledged. We first check for
- * the privileged ones, that we can handle in the kernel. If the
- * kernel can handle this instruction, we check for the problem
- * state bit and (a) handle the instruction or (b) send a code 2
- * program check.
- * Anything else goes to userspace.*/
+ * A lot of B2 instructions are priviledged. Here we check for
+ * the privileged ones, that we can handle in the kernel.
+ * Anything else goes to userspace.
+ */
handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
- if (handler) {
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
- else
- return handler(vcpu);
- }
+ if (handler)
+ return handler(vcpu);
+
return -EOPNOTSUPP;
}
@@ -453,23 +485,100 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
{
int reg1, reg2;
- reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24;
- reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
/* This basically extracts the mask half of the psw. */
- vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000;
+ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
if (reg2) {
- vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000;
+ vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
vcpu->run->s.regs.gprs[reg2] |=
- vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff;
+ vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
+ }
+ return 0;
+}
+
+#define PFMF_RESERVED 0xfffc0101UL
+#define PFMF_SK 0x00020000UL
+#define PFMF_CF 0x00010000UL
+#define PFMF_UI 0x00008000UL
+#define PFMF_FSC 0x00007000UL
+#define PFMF_NQ 0x00000800UL
+#define PFMF_MR 0x00000400UL
+#define PFMF_MC 0x00000200UL
+#define PFMF_KEY 0x000000feUL
+
+static int handle_pfmf(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2;
+ unsigned long start, end;
+
+ vcpu->stat.instruction_pfmf++;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ if (!MACHINE_HAS_PFMF)
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Only provide non-quiescing support if the host supports it */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* No support for conditional-SSKE */
+ if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+ case 0x00000000:
+ end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+ break;
+ case 0x00001000:
+ end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ break;
+ /* We dont support EDAT2
+ case 0x00002000:
+ end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+ break;*/
+ default:
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ }
+ while (start < end) {
+ unsigned long useraddr;
+
+ useraddr = gmap_translate(start, vcpu->arch.gmap);
+ if (IS_ERR((void *)useraddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (clear_user((void __user *)useraddr, PAGE_SIZE))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+ if (set_guest_storage_key(current->mm, useraddr,
+ vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
+ vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ start += PAGE_SIZE;
}
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
+ vcpu->run->s.regs.gprs[reg2] = end;
return 0;
}
static const intercept_handler_t b9_handlers[256] = {
[0x8d] = handle_epsw,
[0x9c] = handle_io_inst,
+ [0xaf] = handle_pfmf,
};
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
@@ -478,29 +587,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
/* This is handled just as for the B2 instructions. */
handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
- if (handler) {
- if ((handler != handle_epsw) &&
- (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE))
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
- else
- return handler(vcpu);
- }
+ if (handler)
+ return handler(vcpu);
+
return -EOPNOTSUPP;
}
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 useraddr;
+ u32 val = 0;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ useraddr = kvm_s390_get_base_disp_rs(vcpu);
+
+ if (useraddr & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
+ useraddr);
+ trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
+
+ reg = reg1;
+ do {
+ rc = get_guest(vcpu, val, (u32 __user *) useraddr);
+ if (rc)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+ vcpu->arch.sie_block->gcr[reg] |= val;
+ useraddr += 4;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+static int handle_lctlg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 useraddr;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctlg++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ useraddr = kvm_s390_get_base_disp_rsy(vcpu);
+
+ if (useraddr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
+ useraddr);
+ trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
+
+ do {
+ rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
+ (u64 __user *) useraddr);
+ if (rc)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ useraddr += 8;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
static const intercept_handler_t eb_handlers[256] = {
+ [0x2f] = handle_lctlg,
[0x8a] = handle_io_inst,
};
-int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu)
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
- /* All eb instructions that end up here are privileged. */
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
if (handler)
return handler(vcpu);
@@ -515,6 +691,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_tprot++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
/* we only handle the Linux memory detection case:
@@ -560,8 +739,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
u32 value;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
return kvm_s390_inject_program_int(vcpu,
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 1c48ab2845e0..bec398c57acf 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
@@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
@@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
li->action_bits |= action;
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
out:
spin_unlock_bh(&li->lock);
@@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
@@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
/* sigp in userspace can exit */
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
order_code = kvm_s390_get_base_disp_rs(vcpu);
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index c61b9fad43cc..57c87d7d7ede 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -44,7 +44,6 @@ static void __udelay_disabled(unsigned long long usecs)
do {
set_clock_comparator(end);
vtime_stop_cpu();
- local_irq_disable();
} while (get_tod_clock() < end);
lockdep_on();
__ctl_load(cr0, 0, 0);
@@ -64,7 +63,6 @@ static void __udelay_enabled(unsigned long long usecs)
set_clock_comparator(end);
}
vtime_stop_cpu();
- local_irq_disable();
if (clock_saved)
local_tick_enable(clock_saved);
} while (get_tod_clock() < end);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 50ea137a2d3c..1694d738b175 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -86,28 +86,28 @@ static unsigned long follow_table(struct mm_struct *mm,
switch (mm->context.asce_bits & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
table = table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return -0x39UL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION2:
table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return -0x3aUL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION3:
table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return -0x3bUL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_SEGMENT:
table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INV))
+ if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
return -0x10UL;
if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) {
- if (write && (*table & _SEGMENT_ENTRY_RO))
+ if (write && (*table & _SEGMENT_ENTRY_PROTECT))
return -0x04UL;
return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) +
(address & ~_SEGMENT_ENTRY_ORIGIN_LARGE);
@@ -117,7 +117,7 @@ static unsigned long follow_table(struct mm_struct *mm,
table = table + ((address >> 12) & 0xff);
if (unlikely(*table & _PAGE_INVALID))
return -0x11UL;
- if (write && (*table & _PAGE_RO))
+ if (write && (*table & _PAGE_PROTECT))
return -0x04UL;
return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
}
@@ -130,13 +130,13 @@ static unsigned long follow_table(struct mm_struct *mm,
unsigned long *table = (unsigned long *)__pa(mm->pgd);
table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INV))
+ if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
return -0x10UL;
table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
table = table + ((address >> 12) & 0xff);
if (unlikely(*table & _PAGE_INVALID))
return -0x11UL;
- if (write && (*table & _PAGE_RO))
+ if (write && (*table & _PAGE_PROTECT))
return -0x04UL;
return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 3ad65b04ac15..46d517c3c763 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -53,7 +53,7 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
seq_printf(m, "I\n");
return;
}
- seq_printf(m, "%s", pr & _PAGE_RO ? "RO " : "RW ");
+ seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " ");
seq_putc(m, '\n');
}
@@ -105,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st,
}
/*
- * The actual page table walker functions. In order to keep the implementation
- * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
- * flags to note_page() if a region, segment or page table entry is invalid or
- * read-only.
- * After all it's just a hint that the current level being walked contains an
- * invalid or read-only entry.
+ * The actual page table walker functions. In order to keep the
+ * implementation of print_prot() short, we only check and pass
+ * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
+ * segment or page table entry is invalid or read-only.
+ * After all it's just a hint that the current level being walked
+ * contains an invalid or read-only entry.
*/
static void walk_pte_level(struct seq_file *m, struct pg_state *st,
pmd_t *pmd, unsigned long addr)
@@ -122,14 +122,14 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
st->current_address = addr;
pte = pte_offset_kernel(pmd, addr);
- prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
+ prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
note_page(m, st, prot, 4);
addr += PAGE_SIZE;
}
}
#ifdef CONFIG_64BIT
-#define _PMD_PROT_MASK (_SEGMENT_ENTRY_RO | _SEGMENT_ENTRY_CO)
+#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
#else
#define _PMD_PROT_MASK 0
#endif
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 047c3e4c59a2..fc6679210d83 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem);
@@ -639,8 +641,8 @@ out:
put_task_struct(tsk);
}
-static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int pfault_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
struct thread_struct *thread, *next;
struct task_struct *tsk;
@@ -673,7 +675,7 @@ static int __init pfault_irq_init(void)
rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
if (rc)
goto out_pfault;
- service_subclass_irq_register();
+ irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
hotcpu_notifier(pfault_cpu_notify, 0);
return 0;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1f5315d1215c..5d758db27bdc 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
pte_t *ptep, pte;
struct page *page;
- mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+ mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
do {
@@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
struct page *head, *page, *tail;
int refs;
- result = write ? 0 : _SEGMENT_ENTRY_RO;
- mask = result | _SEGMENT_ENTRY_INV;
+ result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
+ mask = result | _SEGMENT_ENTRY_INVALID;
if ((pmd_val(pmd) & mask) != result)
return 0;
VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 121089d57802..d261c62e40a6 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -8,21 +8,127 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+static inline pmd_t __pte_to_pmd(pte_t pte)
+{
+ int none, young, prot;
+ pmd_t pmd;
+
+ /*
+ * Convert encoding pte bits pmd bits
+ * .IR...wrdytp ..R...I...y.
+ * empty .10...000000 -> ..0...1...0.
+ * prot-none, clean, old .11...000001 -> ..0...1...1.
+ * prot-none, clean, young .11...000101 -> ..1...1...1.
+ * prot-none, dirty, old .10...001001 -> ..0...1...1.
+ * prot-none, dirty, young .10...001101 -> ..1...1...1.
+ * read-only, clean, old .11...010001 -> ..1...1...0.
+ * read-only, clean, young .01...010101 -> ..1...0...1.
+ * read-only, dirty, old .11...011001 -> ..1...1...0.
+ * read-only, dirty, young .01...011101 -> ..1...0...1.
+ * read-write, clean, old .11...110001 -> ..0...1...0.
+ * read-write, clean, young .01...110101 -> ..0...0...1.
+ * read-write, dirty, old .10...111001 -> ..0...1...0.
+ * read-write, dirty, young .00...111101 -> ..0...0...1.
+ * Huge ptes are dirty by definition, a clean pte is made dirty
+ * by the conversion.
+ */
+ if (pte_present(pte)) {
+ pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
+ if (pte_val(pte) & _PAGE_INVALID)
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ none = (pte_val(pte) & _PAGE_PRESENT) &&
+ !(pte_val(pte) & _PAGE_READ) &&
+ !(pte_val(pte) & _PAGE_WRITE);
+ prot = (pte_val(pte) & _PAGE_PROTECT) &&
+ !(pte_val(pte) & _PAGE_WRITE);
+ young = pte_val(pte) & _PAGE_YOUNG;
+ if (none || young)
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ if (prot || (none && young))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ } else
+ pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
+ return pmd;
+}
+
+static inline pte_t __pmd_to_pte(pmd_t pmd)
+{
+ pte_t pte;
+
+ /*
+ * Convert encoding pmd bits pte bits
+ * ..R...I...y. .IR...wrdytp
+ * empty ..0...1...0. -> .10...000000
+ * prot-none, old ..0...1...1. -> .10...001001
+ * prot-none, young ..1...1...1. -> .10...001101
+ * read-only, old ..1...1...0. -> .11...011001
+ * read-only, young ..1...0...1. -> .01...011101
+ * read-write, old ..0...1...0. -> .10...111001
+ * read-write, young ..0...0...1. -> .00...111101
+ * Huge ptes are dirty by definition
+ */
+ if (pmd_present(pmd)) {
+ pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
+ (pmd_val(pmd) & PAGE_MASK);
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
+ pte_val(pte) |= _PAGE_INVALID;
+ if (pmd_prot_none(pmd)) {
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+ pte_val(pte) |= _PAGE_YOUNG;
+ } else {
+ pte_val(pte) |= _PAGE_READ;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
+ pte_val(pte) |= _PAGE_YOUNG;
+ }
+ } else
+ pte_val(pte) = _PAGE_INVALID;
+ return pte;
+}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *pteptr, pte_t pteval)
+ pte_t *ptep, pte_t pte)
{
- pmd_t *pmdp = (pmd_t *) pteptr;
- unsigned long mask;
+ pmd_t pmd;
+ pmd = __pte_to_pmd(pte);
if (!MACHINE_HAS_HPAGE) {
- pteptr = (pte_t *) pte_page(pteval)[1].index;
- mask = pte_val(pteval) &
- (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
- pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= pte_page(pte)[1].index;
+ } else
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO;
+ *(pmd_t *) ptep = pmd;
+}
+
+pte_t huge_ptep_get(pte_t *ptep)
+{
+ unsigned long origin;
+ pmd_t pmd;
+
+ pmd = *(pmd_t *) ptep;
+ if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
+ origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= *(unsigned long *) origin;
}
+ return __pmd_to_pte(pmd);
+}
- pmd_val(*pmdp) = pte_val(pteval);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pmd_t *pmdp = (pmd_t *) ptep;
+ pte_t pte = huge_ptep_get(ptep);
+
+ if (MACHINE_HAS_IDTE)
+ __pmd_idte(addr, pmdp);
+ else
+ __pmd_csp(pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ return pte;
}
int arch_prepare_hugepage(struct page *page)
@@ -58,7 +164,7 @@ void arch_release_hugepage(struct page *page)
ptep = (pte_t *) page[1].index;
if (!ptep)
return;
- clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) ptep, _PAGE_INVALID,
PTRS_PER_PTE * sizeof(pte_t));
page_table_free(&init_mm, (unsigned long *) ptep);
page[1].index = 0;
@@ -117,6 +223,11 @@ int pud_huge(pud_t pud)
return 0;
}
+int pmd_huge_support(void)
+{
+ return 1;
+}
+
struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmdp, int write)
{
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 89ebae4008f2..ad446b0c55b6 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -69,6 +69,7 @@ static void __init setup_zero_pages(void)
order = 2;
break;
case 0x2827: /* zEC12 */
+ case 0x2828: /* zEC12 */
default:
order = 5;
break;
@@ -135,30 +136,17 @@ void __init paging_init(void)
void __init mem_init(void)
{
- unsigned long codesize, reservedpages, datasize, initsize;
-
- max_mapnr = num_physpages = max_low_pfn;
+ max_mapnr = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
/* Setup guest page hinting */
cmma_init();
/* this will put all low memory onto the freelists */
- totalram_pages += free_all_bootmem();
+ free_all_bootmem();
setup_zero_pages(); /* Setup zeroed pages. */
- reservedpages = 0;
-
- codesize = (unsigned long) &_etext - (unsigned long) &_text;
- datasize = (unsigned long) &_edata - (unsigned long) &_etext;
- initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
- printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
- nr_free_pages() << (PAGE_SHIFT-10),
- max_mapnr << (PAGE_SHIFT-10),
- codesize >> 10,
- reservedpages << (PAGE_SHIFT-10),
- datasize >>10,
- initsize >> 10);
+ mem_init_print_info(NULL);
printk("Write protected kernel read-only data: %#lx - %#lx\n",
(unsigned long)&_stext,
PFN_ALIGN((unsigned long)&_eshared) - 1);
@@ -166,13 +154,14 @@ void __init mem_init(void)
void free_initmem(void)
{
- free_initmem_default(0);
+ free_initmem_default(POISON_FREE_INITMEM);
}
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
+ free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+ "initrd");
}
#endif
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 921fa541dc04..d1e0e0c7a7e2 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -14,6 +14,7 @@
#include <linux/gfp.h>
#include <linux/cpu.h>
#include <asm/ctl_reg.h>
+#include <asm/io.h>
/*
* This function writes to kernel memory bypassing DAT and possible
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
index 3cbd3b8bf311..cca388253a39 100644
--- a/arch/s390/mm/mem_detect.c
+++ b/arch/s390/mm/mem_detect.c
@@ -123,7 +123,8 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr,
continue;
} else if ((addr <= chunk->addr) &&
(addr + size >= chunk->addr + chunk->size)) {
- memset(chunk, 0 , sizeof(*chunk));
+ memmove(chunk, chunk + 1, (MEMORY_CHUNKS-i-1) * sizeof(*chunk));
+ memset(&mem_chunk[MEMORY_CHUNKS-1], 0, sizeof(*chunk));
} else if (addr + size < chunk->addr + chunk->size) {
chunk->size = chunk->addr + chunk->size - addr - size;
chunk->addr = addr + size;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 06bafec00278..40023290ee5b 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -91,11 +91,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
@@ -176,11 +174,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = s390_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 80adfbf75065..990397420e6b 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -118,7 +118,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
pte = pte_offset_kernel(pmd, address);
if (!enable) {
__ptep_ipte(address, pte);
- pte_val(*pte) = _PAGE_TYPE_EMPTY;
+ pte_val(*pte) = _PAGE_INVALID;
continue;
}
pte_val(*pte) = __pa(address);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a938b548f07e..de8cbc30dcd1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
struct gmap_rmap *rmap;
struct page *page;
- if (*table & _SEGMENT_ENTRY_INV)
+ if (*table & _SEGMENT_ENTRY_INVALID)
return 0;
page = pfn_to_page(*table >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
@@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
kfree(rmap);
break;
}
- *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+ *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
return 1;
}
@@ -245,7 +245,9 @@ EXPORT_SYMBOL_GPL(gmap_disable);
* gmap_alloc_table is assumed to be called with mmap_sem held
*/
static int gmap_alloc_table(struct gmap *gmap,
- unsigned long *table, unsigned long init)
+ unsigned long *table, unsigned long init)
+ __releases(&gmap->mm->page_table_lock)
+ __acquires(&gmap->mm->page_table_lock)
{
struct page *page;
unsigned long *new;
@@ -258,7 +260,7 @@ static int gmap_alloc_table(struct gmap *gmap,
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
crst_table_init(new, init);
- if (*table & _REGION_ENTRY_INV) {
+ if (*table & _REGION_ENTRY_INVALID) {
list_add(&page->lru, &gmap->crst_list);
*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
@@ -292,22 +294,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
for (off = 0; off < len; off += PMD_SIZE) {
/* Walk the guest addr space page table */
table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 42) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 31) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 20) & 0x7ff);
/* Clear segment table entry in guest address space. */
flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INV;
+ *table = _SEGMENT_ENTRY_INVALID;
}
out:
spin_unlock(&gmap->mm->page_table_lock);
@@ -335,7 +337,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
- if (len == 0 || from + len > PGDIR_SIZE ||
+ if (len == 0 || from + len > TASK_MAX_SIZE ||
from + len < from || to + len < to)
return -EINVAL;
@@ -345,17 +347,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
for (off = 0; off < len; off += PMD_SIZE) {
/* Walk the gmap address space page table */
table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 42) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 31) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
@@ -363,7 +365,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
/* Store 'from' address in an invalid segment table entry. */
flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
+ *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
+ _SEGMENT_ENTRY_PROTECT);
}
spin_unlock(&gmap->mm->page_table_lock);
up_read(&gmap->mm->mmap_sem);
@@ -384,15 +387,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
unsigned long *table;
table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 20) & 0x7ff);
@@ -422,11 +425,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
return PTR_ERR(segment_ptr);
/* Convert the gmap address to an mm address. */
segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INV)) {
+ if (!(segment & _SEGMENT_ENTRY_INVALID)) {
page = pfn_to_page(segment >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
return mp->vmaddr | (address & ~PMD_MASK);
- } else if (segment & _SEGMENT_ENTRY_RO) {
+ } else if (segment & _SEGMENT_ENTRY_PROTECT) {
vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
return vmaddr | (address & ~PMD_MASK);
}
@@ -517,8 +520,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
- *rmap->entry =
- _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+ *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
+ _SEGMENT_ENTRY_PROTECT);
list_del(&rmap->list);
kfree(rmap);
flush = 1;
@@ -545,13 +548,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
/* Convert the gmap address to an mm address. */
while (1) {
segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INV)) {
+ if (!(segment & _SEGMENT_ENTRY_INVALID)) {
/* Page table is present */
page = pfn_to_page(segment >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
return mp->vmaddr | (address & ~PMD_MASK);
}
- if (!(segment & _SEGMENT_ENTRY_RO))
+ if (!(segment & _SEGMENT_ENTRY_PROTECT))
/* Nothing mapped in the gmap address space. */
break;
rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
@@ -586,25 +589,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
while (address < to) {
/* Walk the gmap address space page table */
table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
+ if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -687,9 +690,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
continue;
/* Set notification bit in the pgste of the pte */
entry = *ptep;
- if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
+ if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
pgste = pgste_get_lock(ptep);
- pgste_val(pgste) |= RCP_IN_BIT;
+ pgste_val(pgste) |= PGSTE_IN_BIT;
pgste_set_unlock(ptep, pgste);
start += PAGE_SIZE;
len -= PAGE_SIZE;
@@ -731,6 +734,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
spin_unlock(&gmap_notifier_lock);
}
+static inline int page_table_with_pgste(struct page *page)
+{
+ return atomic_read(&page->_mapcount) == 0;
+}
+
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
unsigned long vmaddr)
{
@@ -750,10 +758,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
mp->vmaddr = vmaddr & PMD_MASK;
INIT_LIST_HEAD(&mp->mapper);
page->index = (unsigned long) mp;
- atomic_set(&page->_mapcount, 3);
+ atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
+ PAGE_SIZE/2);
return table;
}
@@ -771,8 +780,56 @@ static inline void page_table_free_pgste(unsigned long *table)
__free_page(page);
}
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned long key, bool nq)
+{
+ spinlock_t *ptl;
+ pgste_t old, new;
+ pte_t *ptep;
+
+ down_read(&mm->mmap_sem);
+ ptep = get_locked_pte(current->mm, addr, &ptl);
+ if (unlikely(!ptep)) {
+ up_read(&mm->mmap_sem);
+ return -EFAULT;
+ }
+
+ new = old = pgste_get_lock(ptep);
+ pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
+ PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+ pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ unsigned long address, bits, skey;
+
+ address = pte_val(*ptep) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+ /* Set storage key ACC and FP */
+ page_set_storage_key(address, skey, !nq);
+ /* Merge host changed & referenced into pgste */
+ pgste_val(new) |= bits << 52;
+ }
+ /* changing the guest storage key is considered a change of the page */
+ if ((pgste_val(new) ^ pgste_val(old)) &
+ (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
+ pgste_val(new) |= PGSTE_HC_BIT;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(*ptep, ptl);
+ up_read(&mm->mmap_sem);
+ return 0;
+}
+EXPORT_SYMBOL(set_guest_storage_key);
+
#else /* CONFIG_PGSTE */
+static inline int page_table_with_pgste(struct page *page)
+{
+ return 0;
+}
+
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
unsigned long vmaddr)
{
@@ -830,7 +887,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
pgtable_page_ctor(page);
atomic_set(&page->_mapcount, 1);
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE);
spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list);
} else {
@@ -849,12 +906,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
struct page *page;
unsigned int bit, mask;
- if (mm_has_pgste(mm)) {
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page)) {
gmap_disconnect_pgtable(mm, table);
return page_table_free_pgste(table);
}
/* Free 1K/2K page table fragment of a 4K page */
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -892,14 +949,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
unsigned int bit, mask;
mm = tlb->mm;
- if (mm_has_pgste(mm)) {
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page)) {
gmap_disconnect_pgtable(mm, table);
table = (unsigned long *) (__pa(table) | FRAG_MASK);
tlb_remove_table(tlb, table);
return;
}
bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
list_del(&page->lru);
@@ -911,7 +968,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
tlb_remove_table(tlb, table);
}
-void __tlb_remove_table(void *_table)
+static void __tlb_remove_table(void *_table)
{
const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
void *table = (void *)((unsigned long) _table & ~mask);
@@ -959,7 +1016,6 @@ void tlb_table_flush(struct mmu_gather *tlb)
struct mmu_table_batch **batch = &tlb->batch;
if (*batch) {
- __tlb_flush_mm(tlb->mm);
call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
*batch = NULL;
}
@@ -969,11 +1025,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
{
struct mmu_table_batch **batch = &tlb->batch;
+ tlb->mm->context.flush_mm = 1;
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)
__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
- __tlb_flush_mm(tlb->mm);
+ __tlb_flush_mm_lazy(tlb->mm);
tlb_remove_table_one(table);
return;
}
@@ -981,40 +1038,124 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
}
(*batch)->tables[(*batch)->nr++] = table;
if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_table_flush(tlb);
+ tlb_flush_mmu(tlb);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void thp_split_vma(struct vm_area_struct *vma)
+static inline void thp_split_vma(struct vm_area_struct *vma)
{
unsigned long addr;
- struct page *page;
- for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
- page = follow_page(vma, addr, FOLL_SPLIT);
- }
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
+ follow_page(vma, addr, FOLL_SPLIT);
}
-void thp_split_mm(struct mm_struct *mm)
+static inline void thp_split_mm(struct mm_struct *mm)
{
- struct vm_area_struct *vma = mm->mmap;
+ struct vm_area_struct *vma;
- while (vma != NULL) {
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
thp_split_vma(vma);
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
- vma = vma->vm_next;
}
+ mm->def_flags |= VM_NOHUGEPAGE;
+}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
+ struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next, *table, *new;
+ struct page *page;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+again:
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ table = (unsigned long *) pmd_deref(*pmd);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page))
+ continue;
+ /* Allocate new page table with pgstes */
+ new = page_table_alloc_pgste(mm, addr);
+ if (!new) {
+ mm->context.has_pgste = 0;
+ continue;
+ }
+ spin_lock(&mm->page_table_lock);
+ if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
+ /* Nuke pmd entry pointing to the "short" page table */
+ pmdp_flush_lazy(mm, addr, pmd);
+ pmd_clear(pmd);
+ /* Copy ptes from old table to new table */
+ memcpy(new, table, PAGE_SIZE/2);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ /* Establish new table */
+ pmd_populate(mm, pmd, (pte_t *) new);
+ /* Free old table with rcu, there might be a walker! */
+ page_table_free_rcu(tlb, table);
+ new = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (new) {
+ page_table_free_pgste(new);
+ goto again;
+ }
+ } while (pmd++, addr = next, addr != end);
+
+ return addr;
+}
+
+static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
+ struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pud_t *pud;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+
+ return addr;
+}
+
+static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pgd_t *pgd;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
{
struct task_struct *tsk = current;
- struct mm_struct *mm, *old_mm;
+ struct mm_struct *mm = tsk->mm;
+ struct mmu_gather tlb;
/* Do we have switched amode? If no, we cannot do sie */
if (s390_user_mode == HOME_SPACE_MODE)
@@ -1024,57 +1165,16 @@ int s390_enable_sie(void)
if (mm_has_pgste(tsk->mm))
return 0;
- /* lets check if we are allowed to replace the mm */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-#ifdef CONFIG_AIO
- !hlist_empty(&tsk->mm->ioctx_list) ||
-#endif
- tsk->mm != tsk->active_mm) {
- task_unlock(tsk);
- return -EINVAL;
- }
- task_unlock(tsk);
-
- /* we copy the mm and let dup_mm create the page tables with_pgstes */
- tsk->mm->context.alloc_pgste = 1;
- /* make sure that both mms have a correct rss state */
- sync_mm_rss(tsk->mm);
- mm = dup_mm(tsk);
- tsk->mm->context.alloc_pgste = 0;
- if (!mm)
- return -ENOMEM;
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ down_write(&mm->mmap_sem);
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- mm->def_flags |= VM_NOHUGEPAGE;
-#endif
-
- /* Now lets check again if something happened */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-#ifdef CONFIG_AIO
- !hlist_empty(&tsk->mm->ioctx_list) ||
-#endif
- tsk->mm != tsk->active_mm) {
- mmput(mm);
- task_unlock(tsk);
- return -EINVAL;
- }
-
- /* ok, we are alone. No ptrace, no threads, etc. */
- old_mm = tsk->mm;
- tsk->mm = tsk->active_mm = mm;
- preempt_disable();
- update_mm(mm, tsk);
- atomic_inc(&mm->context.attach_count);
- atomic_dec(&old_mm->context.attach_count);
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- preempt_enable();
- task_unlock(tsk);
- mmput(old_mm);
- return 0;
+ /* Reallocate the page tables with pgstes */
+ mm->context.has_pgste = 1;
+ tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
+ page_table_realloc(&tlb, mm, 0, TASK_SIZE);
+ tlb_finish_mmu(&tlb, 0, TASK_SIZE);
+ up_write(&mm->mmap_sem);
+ return mm->context.has_pgste ? 0 : -ENOMEM;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
@@ -1117,7 +1217,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
}
}
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
{
struct list_head *lh = (struct list_head *) pgtable;
@@ -1131,7 +1232,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
mm->pmd_huge_pte = pgtable;
}
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
struct list_head *lh;
pgtable_t pgtable;
@@ -1149,9 +1250,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
list_del(lh);
}
ptep = (pte_t *) pgtable;
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
ptep++;
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
return pgtable;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 8b268fcc4612..bcfb70b60be6 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -69,7 +69,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
if (!pte)
return NULL;
- clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) pte, _PAGE_INVALID,
PTRS_PER_PTE * sizeof(pte_t));
return pte;
}
@@ -101,7 +101,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
!(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
pud_val(*pu_dir) = __pa(address) |
_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
- (ro ? _REGION_ENTRY_RO : 0);
+ (ro ? _REGION_ENTRY_PROTECT : 0);
address += PUD_SIZE;
continue;
}
@@ -118,7 +118,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
!(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
pmd_val(*pm_dir) = __pa(address) |
_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
- (ro ? _SEGMENT_ENTRY_RO : 0);
+ _SEGMENT_ENTRY_YOUNG |
+ (ro ? _SEGMENT_ENTRY_PROTECT : 0);
address += PMD_SIZE;
continue;
}
@@ -131,7 +132,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
}
pt_dir = pte_offset_kernel(pm_dir, address);
- pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0);
+ pte_val(*pt_dir) = __pa(address) |
+ pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
address += PAGE_SIZE;
}
ret = 0;
@@ -154,7 +156,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
pte_t *pt_dir;
pte_t pte;
- pte_val(pte) = _PAGE_TYPE_EMPTY;
+ pte_val(pte) = _PAGE_INVALID;
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
@@ -255,7 +257,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
new_page =__pa(vmem_alloc_pages(0));
if (!new_page)
goto out;
- pte_val(*pt_dir) = __pa(new_page);
+ pte_val(*pt_dir) =
+ __pa(new_page) | pgprot_val(PAGE_KERNEL);
}
address += PAGE_SIZE;
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 82f165f8078c..709239285869 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -9,6 +9,8 @@
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/filter.h>
+#include <linux/random.h>
+#include <linux/init.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/facility.h>
@@ -221,6 +223,37 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
EMIT2(0x07fe);
}
+/* Helper to find the offset of pkt_type in sk_buff
+ * Make sure its still a 3bit field starting at the MSBs within a byte.
+ */
+#define PKT_TYPE_MAX 0xe0
+static int pkt_type_offset;
+
+static int __init bpf_pkt_type_offset_init(void)
+{
+ struct sk_buff skb_probe = {
+ .pkt_type = ~0,
+ };
+ char *ct = (char *)&skb_probe;
+ int off;
+
+ pkt_type_offset = -1;
+ for (off = 0; off < sizeof(struct sk_buff); off++) {
+ if (!ct[off])
+ continue;
+ if (ct[off] == PKT_TYPE_MAX)
+ pkt_type_offset = off;
+ else {
+ /* Found non matching bit pattern, fix needed. */
+ WARN_ON_ONCE(1);
+ pkt_type_offset = -1;
+ return -1;
+ }
+ }
+ return 0;
+}
+device_initcall(bpf_pkt_type_offset_init);
+
/*
* make sure we dont leak kernel information to user
*/
@@ -720,6 +753,16 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
EMIT4_DISP(0x88500000, 12);
}
break;
+ case BPF_S_ANC_PKTTYPE:
+ if (pkt_type_offset < 0)
+ goto out;
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* ic %r5,<d(pkt_type_offset)>(%r2) */
+ EMIT4_DISP(0x43502000, pkt_type_offset);
+ /* srl %r5,5 */
+ EMIT4_DISP(0x88500000, 5);
+ break;
case BPF_S_ANC_CPU: /* A = smp_processor_id() */
#ifdef CONFIG_SMP
/* l %r5,<d(cpu_nr)> */
@@ -738,8 +781,41 @@ out:
return -1;
}
+/*
+ * Note: for security reasons, bpf code will follow a randomly
+ * sized amount of illegal instructions.
+ */
+struct bpf_binary_header {
+ unsigned int pages;
+ u8 image[];
+};
+
+static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
+ u8 **image_ptr)
+{
+ struct bpf_binary_header *header;
+ unsigned int sz, hole;
+
+ /* Most BPF filters are really small, but if some of them fill a page,
+ * allow at least 128 extra bytes for illegal instructions.
+ */
+ sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
+ header = module_alloc(sz);
+ if (!header)
+ return NULL;
+ memset(header, 0, sz);
+ header->pages = sz / PAGE_SIZE;
+ hole = sz - (bpfsize + sizeof(*header));
+ /* Insert random number of illegal instructions before BPF code
+ * and make sure the first instruction starts at an even address.
+ */
+ *image_ptr = &header->image[(prandom_u32() % hole) & -2];
+ return header;
+}
+
void bpf_jit_compile(struct sk_filter *fp)
{
+ struct bpf_binary_header *header = NULL;
unsigned long size, prg_len, lit_len;
struct bpf_jit jit, cjit;
unsigned int *addrs;
@@ -772,12 +848,11 @@ void bpf_jit_compile(struct sk_filter *fp)
} else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
prg_len = jit.prg - jit.start;
lit_len = jit.lit - jit.mid;
- size = max_t(unsigned long, prg_len + lit_len,
- sizeof(struct work_struct));
+ size = prg_len + lit_len;
if (size >= BPF_SIZE_MAX)
goto out;
- jit.start = module_alloc(size);
- if (!jit.start)
+ header = bpf_alloc_binary(size, &jit.start);
+ if (!header)
goto out;
jit.prg = jit.mid = jit.start + prg_len;
jit.lit = jit.end = jit.start + prg_len + lit_len;
@@ -788,37 +863,25 @@ void bpf_jit_compile(struct sk_filter *fp)
cjit = jit;
}
if (bpf_jit_enable > 1) {
- pr_err("flen=%d proglen=%lu pass=%d image=%p\n",
- fp->len, jit.end - jit.start, pass, jit.start);
- if (jit.start) {
- printk(KERN_ERR "JIT code:\n");
+ bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start);
+ if (jit.start)
print_fn_code(jit.start, jit.mid - jit.start);
- print_hex_dump(KERN_ERR, "JIT literals:\n",
- DUMP_PREFIX_ADDRESS, 16, 1,
- jit.mid, jit.end - jit.mid, false);
- }
}
- if (jit.start)
+ if (jit.start) {
+ set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *) jit.start;
+ }
out:
kfree(addrs);
}
-static void jit_free_defer(struct work_struct *arg)
-{
- module_free(NULL, arg);
-}
-
-/* run from softirq, we must use a work_struct to call
- * module_free() from process context
- */
void bpf_jit_free(struct sk_filter *fp)
{
- struct work_struct *work;
+ unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+ struct bpf_binary_header *header = (void *)addr;
if (fp->bpf_func == sk_run_filter)
return;
- work = (struct work_struct *)fp->bpf_func;
- INIT_WORK(work, jit_free_defer);
- schedule_work(work);
+ set_memory_rw(addr, header->pages);
+ module_free(NULL, header);
}
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index b5b2916895e0..231cecafc2f1 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -1001,7 +1001,7 @@ int hwsampler_deallocate(void)
if (hws_state != HWS_STOPPED)
goto deallocate_exit;
- measurement_alert_subclass_unregister();
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
deallocate_sdbt();
hws_state = HWS_DEALLOCATED;
@@ -1115,7 +1115,7 @@ int hwsampler_shutdown(void)
mutex_lock(&hws_sem);
if (hws_state == HWS_STOPPED) {
- measurement_alert_subclass_unregister();
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
deallocate_sdbt();
}
if (hws_wq) {
@@ -1190,7 +1190,7 @@ start_all_exit:
hws_oom = 1;
hws_flush_all = 0;
/* now let them in, 1407 CPUMF external interrupts */
- measurement_alert_subclass_register();
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
return 0;
}
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 1912f3bb190c..0022e1ebfbde 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -81,8 +81,8 @@ struct hws_data_entry {
unsigned int:16;
unsigned int prim_asn:16; /* primary ASN */
unsigned long long ia; /* Instruction Address */
- unsigned long long lpp; /* Logical-Partition Program Param. */
- unsigned long long vpp; /* Virtual-Machine Program Param. */
+ unsigned long long gpp; /* Guest Program Parameter */
+ unsigned long long hpp; /* Host Program Parameter */
};
struct hws_trailer_entry {
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index ffeb17ce7f31..04e1b6a85362 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -346,16 +346,15 @@ static const struct file_operations timer_enabled_fops = {
};
-static int oprofile_create_hwsampling_files(struct super_block *sb,
- struct dentry *root)
+static int oprofile_create_hwsampling_files(struct dentry *root)
{
struct dentry *dir;
- dir = oprofilefs_mkdir(sb, root, "timer");
+ dir = oprofilefs_mkdir(root, "timer");
if (!dir)
return -EINVAL;
- oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
+ oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
if (!hwsampler_available)
return 0;
@@ -376,17 +375,17 @@ static int oprofile_create_hwsampling_files(struct super_block *sb,
* and can only be set to 0.
*/
- dir = oprofilefs_mkdir(sb, root, "0");
+ dir = oprofilefs_mkdir(root, "0");
if (!dir)
return -EINVAL;
- oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
- oprofilefs_create_file(sb, dir, "event", &zero_fops);
- oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
- oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
- oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
- oprofilefs_create_file(sb, dir, "user", &user_fops);
- oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+ oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
+ oprofilefs_create_file(dir, "event", &zero_fops);
+ oprofilefs_create_file(dir, "count", &hw_interval_fops);
+ oprofilefs_create_file(dir, "unit_mask", &zero_fops);
+ oprofilefs_create_file(dir, "kernel", &kernel_fops);
+ oprofilefs_create_file(dir, "user", &user_fops);
+ oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
&oprofile_sdbt_blocks);
} else {
@@ -396,19 +395,19 @@ static int oprofile_create_hwsampling_files(struct super_block *sb,
* space tools. The /dev/oprofile/hwsampling fs is
* provided in that case.
*/
- dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ dir = oprofilefs_mkdir(root, "hwsampling");
if (!dir)
return -EINVAL;
- oprofilefs_create_file(sb, dir, "hwsampler",
+ oprofilefs_create_file(dir, "hwsampler",
&hwsampler_fops);
- oprofilefs_create_file(sb, dir, "hw_interval",
+ oprofilefs_create_file(dir, "hw_interval",
&hw_interval_fops);
- oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
+ oprofilefs_create_ro_ulong(dir, "hw_min_interval",
&oprofile_min_interval);
- oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
+ oprofilefs_create_ro_ulong(dir, "hw_max_interval",
&oprofile_max_interval);
- oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+ oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
&oprofile_sdbt_blocks);
}
return 0;
@@ -440,7 +439,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
switch (id.machine) {
case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
- case 0x2827: ops->cpu_type = "s390/zEC12"; break;
+ case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
default: return -ENODEV;
}
}
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 086a2e37935d..a9e1dc4ae442 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -2,5 +2,5 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e6f15b5d8b7d..f17a8343e360 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -42,78 +42,43 @@
#define SIC_IRQ_MODE_SINGLE 1
#define ZPCI_NR_DMA_SPACES 1
-#define ZPCI_MSI_VEC_BITS 6
#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
/* list of all detected zpci devices */
-LIST_HEAD(zpci_list);
-EXPORT_SYMBOL_GPL(zpci_list);
-DEFINE_MUTEX(zpci_list_lock);
-EXPORT_SYMBOL_GPL(zpci_list_lock);
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
-static struct pci_hp_callback_ops *hotplug_ops;
+static void zpci_enable_irq(struct irq_data *data);
+static void zpci_disable_irq(struct irq_data *data);
+
+static struct irq_chip zpci_irq_chip = {
+ .name = "zPCI",
+ .irq_unmask = zpci_enable_irq,
+ .irq_mask = zpci_disable_irq,
+};
static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
static DEFINE_SPINLOCK(zpci_domain_lock);
-struct callback {
- irq_handler_t handler;
- void *data;
-};
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
-struct zdev_irq_map {
- unsigned long aibv; /* AI bit vector */
- int msi_vecs; /* consecutive MSI-vectors used */
- int __unused;
- struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
- spinlock_t lock; /* protect callbacks against de-reg */
-};
+/* Adapter interrupt definitions */
+static void zpci_irq_handler(struct airq_struct *airq);
-struct intr_bucket {
- /* amap of adapters, one bit per dev, corresponds to one irq nr */
- unsigned long *alloc;
- /* AI summary bit, global page for all devices */
- unsigned long *aisb;
- /* pointer to aibv and callback data in zdev */
- struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
- /* protects the whole bucket struct */
- spinlock_t lock;
+static struct airq_struct zpci_airq = {
+ .handler = zpci_irq_handler,
+ .isc = PCI_ISC,
};
-static struct intr_bucket *bucket;
-
-/* Adapter local summary indicator */
-static u8 *zpci_irq_si;
-
-static atomic_t irq_retries = ATOMIC_INIT(0);
-
/* I/O Map */
static DEFINE_SPINLOCK(zpci_iomap_lock);
static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
struct zpci_iomap_entry *zpci_iomap_start;
EXPORT_SYMBOL_GPL(zpci_iomap_start);
-/* highest irq summary bit */
-static int __read_mostly aisb_max;
-
-static struct kmem_cache *zdev_irq_cache;
static struct kmem_cache *zdev_fmb_cache;
-static inline int irq_to_msi_nr(unsigned int irq)
-{
- return irq & ZPCI_MSI_MASK;
-}
-
-static inline int irq_to_dev_nr(unsigned int irq)
-{
- return irq >> ZPCI_MSI_VEC_BITS;
-}
-
-static inline struct zdev_irq_map *get_imap(unsigned int irq)
-{
- return bucket->imap[irq_to_dev_nr(irq)];
-}
-
struct zpci_dev *get_zdev(struct pci_dev *pdev)
{
return (struct zpci_dev *) pdev->sysdata;
@@ -123,22 +88,17 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
- mutex_lock(&zpci_list_lock);
+ spin_lock(&zpci_list_lock);
list_for_each_entry(tmp, &zpci_list, entry) {
if (tmp->fid == fid) {
zdev = tmp;
break;
}
}
- mutex_unlock(&zpci_list_lock);
+ spin_unlock(&zpci_list_lock);
return zdev;
}
-bool zpci_fid_present(u32 fid)
-{
- return (get_zdev_by_fid(fid) != NULL) ? true : false;
-}
-
static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
{
return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
@@ -157,8 +117,7 @@ int pci_proc_domain(struct pci_bus *bus)
EXPORT_SYMBOL_GPL(pci_proc_domain);
/* Modify PCI: Register adapter interruptions */
-static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
- u64 aibv)
+static int zpci_set_airq(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
struct zpci_fib *fib;
@@ -169,14 +128,14 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
return -ENOMEM;
fib->isc = PCI_ISC;
- fib->noi = zdev->irq_map->msi_vecs;
fib->sum = 1; /* enable summary notifications */
- fib->aibv = aibv;
- fib->aibvo = 0; /* every function has its own page */
- fib->aisb = (u64) bucket->aisb + aisb / 8;
- fib->aisbo = aisb & ZPCI_MSI_MASK;
+ fib->noi = airq_iv_end(zdev->aibv);
+ fib->aibv = (unsigned long) zdev->aibv->vector;
+ fib->aibvo = 0; /* each zdev has its own interrupt vector */
+ fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+ fib->aisbo = zdev->aisb & 63;
- rc = s390pci_mod_fc(req, fib);
+ rc = zpci_mod_fc(req, fib);
pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
free_page((unsigned long) fib);
@@ -206,7 +165,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args
fib->iota = args->iota;
fib->fmb_addr = args->fmb_addr;
- rc = s390pci_mod_fc(req, fib);
+ rc = zpci_mod_fc(req, fib);
free_page((unsigned long) fib);
return rc;
}
@@ -231,7 +190,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
}
/* Modify PCI: Unregister adapter interruptions */
-static int zpci_unregister_airq(struct zpci_dev *zdev)
+static int zpci_clear_airq(struct zpci_dev *zdev)
{
struct mod_pci_args args = { 0, 0, 0, 0 };
@@ -280,7 +239,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
u64 data;
int rc;
- rc = s390pci_load(&data, req, offset);
+ rc = zpci_load(&data, req, offset);
if (!rc) {
data = data << ((8 - len) * 8);
data = le64_to_cpu(data);
@@ -298,58 +257,46 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
data = cpu_to_le64(data);
data = data >> ((8 - len) * 8);
- rc = s390pci_store(data, req, offset);
+ rc = zpci_store(data, req, offset);
return rc;
}
-void synchronize_irq(unsigned int irq)
+static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
{
- /*
- * Not needed, the handler is protected by a lock and IRQs that occur
- * after the handler is deleted are just NOPs.
- */
-}
-EXPORT_SYMBOL_GPL(synchronize_irq);
+ int offset, pos;
+ u32 mask_bits;
-void enable_irq(unsigned int irq)
-{
- struct msi_desc *msi = irq_get_msi_desc(irq);
-
- zpci_msi_set_mask_bits(msi, 1, 0);
-}
-EXPORT_SYMBOL_GPL(enable_irq);
-
-void disable_irq(unsigned int irq)
-{
- struct msi_desc *msi = irq_get_msi_desc(irq);
+ if (msi->msi_attrib.is_msix) {
+ offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL;
+ msi->masked = readl(msi->mask_base + offset);
+ writel(flag, msi->mask_base + offset);
+ } else if (msi->msi_attrib.maskbit) {
+ pos = (long) msi->mask_base;
+ pci_read_config_dword(msi->dev, pos, &mask_bits);
+ mask_bits &= ~(mask);
+ mask_bits |= flag & mask;
+ pci_write_config_dword(msi->dev, pos, mask_bits);
+ } else
+ return 0;
- zpci_msi_set_mask_bits(msi, 1, 1);
+ msi->msi_attrib.maskbit = !!flag;
+ return 1;
}
-EXPORT_SYMBOL_GPL(disable_irq);
-void disable_irq_nosync(unsigned int irq)
+static void zpci_enable_irq(struct irq_data *data)
{
- disable_irq(irq);
-}
-EXPORT_SYMBOL_GPL(disable_irq_nosync);
+ struct msi_desc *msi = irq_get_msi_desc(data->irq);
-unsigned long probe_irq_on(void)
-{
- return 0;
+ zpci_msi_set_mask_bits(msi, 1, 0);
}
-EXPORT_SYMBOL_GPL(probe_irq_on);
-int probe_irq_off(unsigned long val)
+static void zpci_disable_irq(struct irq_data *data)
{
- return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_off);
+ struct msi_desc *msi = irq_get_msi_desc(data->irq);
-unsigned int probe_irq_mask(unsigned long val)
-{
- return val;
+ zpci_msi_set_mask_bits(msi, 1, 1);
}
-EXPORT_SYMBOL_GPL(probe_irq_mask);
void pcibios_fixup_bus(struct pci_bus *bus)
{
@@ -434,156 +381,165 @@ static struct pci_ops pci_root_ops = {
.write = pci_write,
};
-/* store the last handled bit to implement fair scheduling of devices */
-static DEFINE_PER_CPU(unsigned long, next_sbit);
-
-static void zpci_irq_handler(void *dont, void *need)
+static void zpci_irq_handler(struct airq_struct *airq)
{
- unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
- int rescan = 0, max = aisb_max;
- struct zdev_irq_map *imap;
+ unsigned long si, ai;
+ struct airq_iv *aibv;
+ int irqs_on = 0;
inc_irq_stat(IRQIO_PCI);
- sbit = start;
-
-scan:
- /* find summary_bit */
- for_each_set_bit_left_cont(sbit, bucket->aisb, max) {
- clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6));
- last = sbit;
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+ if (si == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ si = 0;
+ continue;
+ }
- /* find vector bit */
- imap = bucket->imap[sbit];
- for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) {
+ /* Scan the adapter interrupt vector for this device. */
+ aibv = zpci_aibv[si];
+ for (ai = 0;;) {
+ ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+ if (ai == -1UL)
+ break;
inc_irq_stat(IRQIO_MSI);
- clear_bit(63 - mbit, &imap->aibv);
-
- spin_lock(&imap->lock);
- if (imap->cb[mbit].handler)
- imap->cb[mbit].handler(mbit,
- imap->cb[mbit].data);
- spin_unlock(&imap->lock);
+ airq_iv_lock(aibv, ai);
+ generic_handle_irq(airq_iv_get_data(aibv, ai));
+ airq_iv_unlock(aibv, ai);
}
}
-
- if (rescan)
- goto out;
-
- /* scan the skipped bits */
- if (start > 0) {
- sbit = 0;
- max = start;
- start = 0;
- goto scan;
- }
-
- /* enable interrupts again */
- set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
-
- /* check again to not lose initiative */
- rmb();
- max = aisb_max;
- sbit = find_first_bit_left(bucket->aisb, max);
- if (sbit != max) {
- atomic_inc(&irq_retries);
- rescan++;
- goto scan;
- }
-out:
- /* store next device bit to scan */
- __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
}
-/* msi_vecs - number of requested interrupts, 0 place function to error state */
-static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
struct zpci_dev *zdev = get_zdev(pdev);
- unsigned int aisb, msi_nr;
+ unsigned int hwirq, irq, msi_vecs;
+ unsigned long aisb;
struct msi_desc *msi;
+ struct msi_msg msg;
int rc;
- /* store the number of used MSI vectors */
- zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS);
-
- spin_lock(&bucket->lock);
- aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE);
- /* alloc map exhausted? */
- if (aisb == PAGE_SIZE) {
- spin_unlock(&bucket->lock);
- return -EIO;
- }
- set_bit(aisb, bucket->alloc);
- spin_unlock(&bucket->lock);
+ pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
+ if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
+ return -EINVAL;
+ msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
+ msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
+ /* Allocate adapter summary indicator bit */
+ rc = -EIO;
+ aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+ if (aisb == -1UL)
+ goto out;
zdev->aisb = aisb;
- if (aisb + 1 > aisb_max)
- aisb_max = aisb + 1;
- /* wire up IRQ shortcut pointer */
- bucket->imap[zdev->aisb] = zdev->irq_map;
- pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map);
+ /* Create adapter interrupt vector */
+ rc = -ENOMEM;
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ if (!zdev->aibv)
+ goto out_si;
- /* TODO: irq number 0 wont be found if we return less than requested MSIs.
- * ignore it for now and fix in common code.
- */
- msi_nr = aisb << ZPCI_MSI_VEC_BITS;
+ /* Wire up shortcut pointer */
+ zpci_aibv[aisb] = zdev->aibv;
+ /* Request MSI interrupts */
+ hwirq = 0;
list_for_each_entry(msi, &pdev->msi_list, list) {
- rc = zpci_setup_msi_irq(zdev, msi, msi_nr,
- aisb << ZPCI_MSI_VEC_BITS);
+ rc = -EIO;
+ irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
+ if (irq == NO_IRQ)
+ goto out_msi;
+ rc = irq_set_msi_desc(irq, msi);
if (rc)
- return rc;
- msi_nr++;
+ goto out_msi;
+ irq_set_chip_and_handler(irq, &zpci_irq_chip,
+ handle_simple_irq);
+ msg.data = hwirq;
+ msg.address_lo = zdev->msi_addr & 0xffffffff;
+ msg.address_hi = zdev->msi_addr >> 32;
+ write_msi_msg(irq, &msg);
+ airq_iv_set_data(zdev->aibv, hwirq, irq);
+ hwirq++;
}
- rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv);
- if (rc) {
- clear_bit(aisb, bucket->alloc);
- dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
- return rc;
+ /* Enable adapter interrupts */
+ rc = zpci_set_airq(zdev);
+ if (rc)
+ goto out_msi;
+
+ return (msi_vecs == nvec) ? 0 : msi_vecs;
+
+out_msi:
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ if (hwirq-- == 0)
+ break;
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
}
- return (zdev->irq_map->msi_vecs == msi_vecs) ?
- 0 : zdev->irq_map->msi_vecs;
+ zpci_aibv[aisb] = NULL;
+ airq_iv_release(zdev->aibv);
+out_si:
+ airq_iv_free_bit(zpci_aisb_iv, aisb);
+out:
+ dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
+ return rc;
}
-static void zpci_teardown_msi(struct pci_dev *pdev)
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = get_zdev(pdev);
struct msi_desc *msi;
- int aisb, rc;
+ int rc;
- rc = zpci_unregister_airq(zdev);
+ pr_info("%s: on pdev: %p\n", __func__, pdev);
+
+ /* Disable adapter interrupts */
+ rc = zpci_clear_airq(zdev);
if (rc) {
dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
return;
}
- msi = list_first_entry(&pdev->msi_list, struct msi_desc, list);
- aisb = irq_to_dev_nr(msi->irq);
-
- list_for_each_entry(msi, &pdev->msi_list, list)
- zpci_teardown_msi_irq(zdev, msi);
+ /* Release MSI interrupts */
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ zpci_msi_set_mask_bits(msi, 1, 1);
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ }
- clear_bit(aisb, bucket->alloc);
- if (aisb + 1 == aisb_max)
- aisb_max--;
+ zpci_aibv[zdev->aisb] = NULL;
+ airq_iv_release(zdev->aibv);
+ airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
}
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+static void zpci_map_resources(struct zpci_dev *zdev)
{
- pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
- if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
- return -EINVAL;
- return zpci_setup_msi(pdev, nvec);
-}
+ struct pci_dev *pdev = zdev->pdev;
+ resource_size_t len;
+ int i;
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
- pr_info("%s: on pdev: %p\n", __func__, pdev);
- zpci_teardown_msi(pdev);
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ len = pci_resource_len(pdev, i);
+ if (!len)
+ continue;
+ pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
+ pdev->resource[i].end = pdev->resource[i].start + len - 1;
+ }
}
-static void zpci_map_resources(struct zpci_dev *zdev)
+static void zpci_unmap_resources(struct zpci_dev *zdev)
{
struct pci_dev *pdev = zdev->pdev;
resource_size_t len;
@@ -593,12 +549,9 @@ static void zpci_map_resources(struct zpci_dev *zdev)
len = pci_resource_len(pdev, i);
if (!len)
continue;
- pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
- pdev->resource[i].end = pdev->resource[i].start + len - 1;
- pr_debug("BAR%i: -> start: %Lx end: %Lx\n",
- i, pdev->resource[i].start, pdev->resource[i].end);
+ pci_iounmap(pdev, (void *) pdev->resource[i].start);
}
-};
+}
struct zpci_dev *zpci_alloc_device(void)
{
@@ -606,178 +559,47 @@ struct zpci_dev *zpci_alloc_device(void)
/* Alloc memory for our private pci device data */
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
- if (!zdev)
- return ERR_PTR(-ENOMEM);
-
- /* Alloc aibv & callback space */
- zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL);
- if (!zdev->irq_map)
- goto error;
- WARN_ON((u64) zdev->irq_map & 0xff);
- return zdev;
-
-error:
- kfree(zdev);
- return ERR_PTR(-ENOMEM);
+ return zdev ? : ERR_PTR(-ENOMEM);
}
void zpci_free_device(struct zpci_dev *zdev)
{
- kmem_cache_free(zdev_irq_cache, zdev->irq_map);
kfree(zdev);
}
-/*
- * Too late for any s390 specific setup, since interrupts must be set up
- * already which requires DMA setup too and the pci scan will access the
- * config space, which only works if the function handle is enabled.
- */
-int pcibios_enable_device(struct pci_dev *pdev, int mask)
-{
- struct resource *res;
- u16 cmd;
- int i;
-
- pci_read_config_word(pdev, PCI_COMMAND, &cmd);
-
- for (i = 0; i < PCI_BAR_COUNT; i++) {
- res = &pdev->resource[i];
-
- if (res->flags & IORESOURCE_IO)
- return -EINVAL;
-
- if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- pci_write_config_word(pdev, PCI_COMMAND, cmd);
- return 0;
-}
-
int pcibios_add_platform_entries(struct pci_dev *pdev)
{
return zpci_sysfs_add_device(&pdev->dev);
}
-int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
-{
- int msi_nr = irq_to_msi_nr(irq);
- struct zdev_irq_map *imap;
- struct msi_desc *msi;
-
- msi = irq_get_msi_desc(irq);
- if (!msi)
- return -EIO;
-
- imap = get_imap(irq);
- spin_lock_init(&imap->lock);
-
- pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
- imap->cb[msi_nr].handler = handler;
- imap->cb[msi_nr].data = data;
-
- /*
- * The generic MSI code returns with the interrupt disabled on the
- * card, using the MSI mask bits. Firmware doesn't appear to unmask
- * at that level, so we do it here by hand.
- */
- zpci_msi_set_mask_bits(msi, 1, 0);
- return 0;
-}
-
-void zpci_free_irq(unsigned int irq)
-{
- struct zdev_irq_map *imap = get_imap(irq);
- int msi_nr = irq_to_msi_nr(irq);
- unsigned long flags;
-
- pr_debug("%s: for irq: %d\n", __func__, irq);
-
- spin_lock_irqsave(&imap->lock, flags);
- imap->cb[msi_nr].handler = NULL;
- imap->cb[msi_nr].data = NULL;
- spin_unlock_irqrestore(&imap->lock, flags);
-}
-
-int request_irq(unsigned int irq, irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
-{
- pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n",
- __func__, irq, handler, irqflags, devname);
-
- return zpci_request_irq(irq, handler, dev_id);
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
- zpci_free_irq(irq);
-}
-EXPORT_SYMBOL_GPL(free_irq);
-
static int __init zpci_irq_init(void)
{
- int cpu, rc;
-
- bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
- if (!bucket)
- return -ENOMEM;
-
- bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
- if (!bucket->aisb) {
- rc = -ENOMEM;
- goto out_aisb;
- }
-
- bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
- if (!bucket->alloc) {
- rc = -ENOMEM;
- goto out_alloc;
- }
+ int rc;
- isc_register(PCI_ISC);
- zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC);
- if (IS_ERR(zpci_irq_si)) {
- rc = PTR_ERR(zpci_irq_si);
- zpci_irq_si = NULL;
- goto out_ai;
- }
+ rc = register_adapter_interrupt(&zpci_airq);
+ if (rc)
+ goto out;
+ /* Set summary to 1 to be called every time for the ISC. */
+ *zpci_airq.lsi_ptr = 1;
- for_each_online_cpu(cpu)
- per_cpu(next_sbit, cpu) = 0;
+ rc = -ENOMEM;
+ zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ if (!zpci_aisb_iv)
+ goto out_airq;
- spin_lock_init(&bucket->lock);
- /* set summary to 1 to be called every time for the ISC */
- *zpci_irq_si = 1;
- set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
return 0;
-out_ai:
- isc_unregister(PCI_ISC);
- free_page((unsigned long) bucket->alloc);
-out_alloc:
- free_page((unsigned long) bucket->aisb);
-out_aisb:
- kfree(bucket);
+out_airq:
+ unregister_adapter_interrupt(&zpci_airq);
+out:
return rc;
}
static void zpci_irq_exit(void)
{
- free_page((unsigned long) bucket->alloc);
- free_page((unsigned long) bucket->aisb);
- s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC);
- isc_unregister(PCI_ISC);
- kfree(bucket);
-}
-
-void zpci_debug_info(struct zpci_dev *zdev, struct seq_file *m)
-{
- if (!zdev)
- return;
-
- seq_printf(m, "global irq retries: %u\n", atomic_read(&irq_retries));
- seq_printf(m, "aibv[0]:%016lx aibv[1]:%016lx aisb:%016lx\n",
- get_imap(0)->aibv, get_imap(1)->aibv, *bucket->aisb);
+ airq_iv_release(zpci_aisb_iv);
+ unregister_adapter_interrupt(&zpci_airq);
}
static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
@@ -834,15 +656,58 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
int pcibios_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = get_zdev(pdev);
+ struct resource *res;
+ int i;
+
+ zdev->pdev = pdev;
+ zpci_map_resources(zdev);
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ res = &pdev->resource[i];
+ if (res->parent || !res->flags)
+ continue;
+ pci_claim_resource(pdev, i);
+ }
+
+ return 0;
+}
+
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ struct resource *res;
+ u16 cmd;
+ int i;
zdev->pdev = pdev;
zpci_debug_init_device(zdev);
zpci_fmb_enable_device(zdev);
zpci_map_resources(zdev);
+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ res = &pdev->resource[i];
+
+ if (res->flags & IORESOURCE_IO)
+ return -EINVAL;
+
+ if (res->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+ pci_write_config_word(pdev, PCI_COMMAND, cmd);
return 0;
}
+void pcibios_disable_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+
+ zpci_unmap_resources(zdev);
+ zpci_fmb_disable_device(zdev);
+ zpci_debug_exit_device(zdev);
+ zdev->pdev = NULL;
+}
+
static int zpci_scan_bus(struct zpci_dev *zdev)
{
struct resource *res;
@@ -921,6 +786,8 @@ int zpci_enable_device(struct zpci_dev *zdev)
rc = zpci_dma_init_device(zdev);
if (rc)
goto out_dma;
+
+ zdev->state = ZPCI_FN_STATE_ONLINE;
return 0;
out_dma:
@@ -949,18 +816,16 @@ int zpci_create_device(struct zpci_dev *zdev)
rc = zpci_enable_device(zdev);
if (rc)
goto out_free;
-
- zdev->state = ZPCI_FN_STATE_ONLINE;
}
rc = zpci_scan_bus(zdev);
if (rc)
goto out_disable;
- mutex_lock(&zpci_list_lock);
+ spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
- if (hotplug_ops)
- hotplug_ops->create_slot(zdev);
- mutex_unlock(&zpci_list_lock);
+ spin_unlock(&zpci_list_lock);
+
+ zpci_init_slot(zdev);
return 0;
@@ -983,25 +848,6 @@ void zpci_stop_device(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(zpci_stop_device);
-int zpci_scan_device(struct zpci_dev *zdev)
-{
- zdev->pdev = pci_scan_single_device(zdev->bus, ZPCI_DEVFN);
- if (!zdev->pdev) {
- pr_err("pci_scan_single_device failed for fid: 0x%x\n",
- zdev->fid);
- goto out;
- }
-
- pci_bus_add_devices(zdev->bus);
-
- return 0;
-out:
- zpci_dma_exit_device(zdev);
- clp_disable_fh(zdev);
- return -EIO;
-}
-EXPORT_SYMBOL_GPL(zpci_scan_device);
-
static inline int barsize(u8 size)
{
return (size) ? (1 << size) >> 10 : 0;
@@ -1009,15 +855,10 @@ static inline int barsize(u8 size)
static int zpci_mem_init(void)
{
- zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map),
- L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL);
- if (!zdev_irq_cache)
- goto error_zdev;
-
zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
16, 0, NULL);
if (!zdev_fmb_cache)
- goto error_fmb;
+ goto error_zdev;
/* TODO: use realloc */
zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
@@ -1028,8 +869,6 @@ static int zpci_mem_init(void)
error_iomap:
kmem_cache_destroy(zdev_fmb_cache);
-error_fmb:
- kmem_cache_destroy(zdev_irq_cache);
error_zdev:
return -ENOMEM;
}
@@ -1037,28 +876,10 @@ error_zdev:
static void zpci_mem_exit(void)
{
kfree(zpci_iomap_start);
- kmem_cache_destroy(zdev_irq_cache);
kmem_cache_destroy(zdev_fmb_cache);
}
-void zpci_register_hp_ops(struct pci_hp_callback_ops *ops)
-{
- mutex_lock(&zpci_list_lock);
- hotplug_ops = ops;
- mutex_unlock(&zpci_list_lock);
-}
-EXPORT_SYMBOL_GPL(zpci_register_hp_ops);
-
-void zpci_deregister_hp_ops(void)
-{
- mutex_lock(&zpci_list_lock);
- hotplug_ops = NULL;
- mutex_unlock(&zpci_list_lock);
-}
-EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops);
-
-unsigned int s390_pci_probe;
-EXPORT_SYMBOL_GPL(s390_pci_probe);
+static unsigned int s390_pci_probe;
char * __init pcibios_setup(char *str)
{
@@ -1086,16 +907,12 @@ static int __init pci_base_init(void)
rc = zpci_debug_init();
if (rc)
- return rc;
+ goto out;
rc = zpci_mem_init();
if (rc)
goto out_mem;
- rc = zpci_msihash_init();
- if (rc)
- goto out_hash;
-
rc = zpci_irq_init();
if (rc)
goto out_irq;
@@ -1104,7 +921,7 @@ static int __init pci_base_init(void)
if (rc)
goto out_dma;
- rc = clp_find_pci_devices();
+ rc = clp_scan_pci_devices();
if (rc)
goto out_find;
@@ -1115,11 +932,15 @@ out_find:
out_dma:
zpci_irq_exit();
out_irq:
- zpci_msihash_exit();
-out_hash:
zpci_mem_exit();
out_mem:
zpci_debug_exit();
+out:
return rc;
}
-subsys_initcall(pci_base_init);
+subsys_initcall_sync(pci_base_init);
+
+void zpci_rescan(void)
+{
+ clp_rescan_pci_devices_simple();
+}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index bd34359d1546..475563c3d1e4 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -36,9 +36,9 @@ static inline u8 clp_instr(void *data)
return cc;
}
-static void *clp_alloc_block(void)
+static void *clp_alloc_block(gfp_t gfp_mask)
{
- return (void *) __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+ return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
}
static void clp_free_block(void *ptr)
@@ -70,7 +70,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
struct clp_req_rsp_query_pci_grp *rrb;
int rc;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -113,7 +113,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
struct clp_req_rsp_query_pci *rrb;
int rc;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -179,9 +179,9 @@ error:
static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
{
struct clp_req_rsp_set_pci *rrb;
- int rc, retries = 1000;
+ int rc, retries = 100;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -199,7 +199,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
retries--;
if (retries < 0)
break;
- msleep(1);
+ msleep(20);
}
} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
@@ -236,7 +236,6 @@ int clp_disable_fh(struct zpci_dev *zdev)
if (!zdev_enabled(zdev))
return 0;
- dev_info(&zdev->pdev->dev, "disabling fn handle: 0x%x\n", fh);
rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
if (!rc)
/* Success -> store disabled handle in zdev */
@@ -246,49 +245,12 @@ int clp_disable_fh(struct zpci_dev *zdev)
return rc;
}
-static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry)
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
+ void (*cb)(struct clp_fh_list_entry *entry))
{
- int present, rc;
-
- if (!entry->vendor_id)
- return;
-
- /* TODO: be a little bit more scalable */
- present = zpci_fid_present(entry->fid);
-
- if (present)
- pr_debug("%s: device %x already present\n", __func__, entry->fid);
-
- /* skip already used functions */
- if (present && entry->config_state)
- return;
-
- /* aev 306: function moved to stand-by state */
- if (present && !entry->config_state) {
- /*
- * The handle is already disabled, that means no iota/irq freeing via
- * the firmware interfaces anymore. Need to free resources manually
- * (DMA memory, debug, sysfs)...
- */
- zpci_stop_device(get_zdev_by_fid(entry->fid));
- return;
- }
-
- rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
- if (rc)
- pr_err("Failed to add fid: 0x%x\n", entry->fid);
-}
-
-int clp_find_pci_devices(void)
-{
- struct clp_req_rsp_list_pci *rrb;
u64 resume_token = 0;
int entries, i, rc;
- rrb = clp_alloc_block();
- if (!rrb)
- return -ENOMEM;
-
do {
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
@@ -317,12 +279,101 @@ int clp_find_pci_devices(void)
resume_token = rrb->response.resume_token;
for (i = 0; i < entries; i++)
- clp_check_pcifn_entry(&rrb->response.fh_list[i]);
+ cb(&rrb->response.fh_list[i]);
} while (resume_token);
pr_debug("Maximum number of supported PCI functions: %u\n",
rrb->response.max_fn);
out:
+ return rc;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry)
+{
+ if (!entry->vendor_id)
+ return;
+
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+}
+
+static void __clp_rescan(struct clp_fh_list_entry *entry)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev) {
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+ return;
+ }
+
+ if (!entry->config_state) {
+ /*
+ * The handle is already disabled, that means no iota/irq freeing via
+ * the firmware interfaces anymore. Need to free resources manually
+ * (DMA memory, debug, sysfs)...
+ */
+ zpci_stop_device(zdev);
+ }
+}
+
+static void __clp_update(struct clp_fh_list_entry *entry)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev)
+ return;
+
+ zdev->fh = entry->fh;
+}
+
+int clp_scan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_add);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_rescan);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices_simple(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_NOWAIT);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_update);
+
clp_free_block(rrb);
return rc;
}
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 771b82359af4..75c69b402e05 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -115,27 +115,6 @@ static const struct file_operations debugfs_pci_perf_fops = {
.release = single_release,
};
-static int pci_debug_show(struct seq_file *m, void *v)
-{
- struct zpci_dev *zdev = m->private;
-
- zpci_debug_info(zdev, m);
- return 0;
-}
-
-static int pci_debug_seq_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, pci_debug_show,
- file_inode(filp)->i_private);
-}
-
-static const struct file_operations debugfs_pci_debug_fops = {
- .open = pci_debug_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
void zpci_debug_init_device(struct zpci_dev *zdev)
{
zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
@@ -149,19 +128,11 @@ void zpci_debug_init_device(struct zpci_dev *zdev)
&debugfs_pci_perf_fops);
if (IS_ERR(zdev->debugfs_perf))
zdev->debugfs_perf = NULL;
-
- zdev->debugfs_debug = debugfs_create_file("debug",
- S_IFREG | S_IRUGO | S_IWUSR,
- zdev->debugfs_dev, zdev,
- &debugfs_pci_debug_fops);
- if (IS_ERR(zdev->debugfs_debug))
- zdev->debugfs_debug = NULL;
}
void zpci_debug_exit_device(struct zpci_dev *zdev)
{
debugfs_remove(zdev->debugfs_perf);
- debugfs_remove(zdev->debugfs_debug);
debugfs_remove(zdev->debugfs_dev);
}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f8e69d5bc0a9..7e5573acb063 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -10,6 +10,7 @@
#include <linux/export.h>
#include <linux/iommu-helper.h>
#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
#include <linux/pci.h>
#include <asm/pci_dma.h>
@@ -170,8 +171,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
*/
goto no_refresh;
- rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
- nr_pages * PAGE_SIZE);
+ rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
+ nr_pages * PAGE_SIZE);
no_refresh:
spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -263,7 +264,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
unsigned long nr_pages, iommu_page_index;
unsigned long pa = page_to_phys(page) + offset;
int flags = ZPCI_PTE_VALID;
@@ -304,7 +305,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
unsigned long iommu_page_index;
int npages;
@@ -323,7 +324,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
struct page *page;
unsigned long pa;
dma_addr_t map;
@@ -407,7 +408,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
int zpci_dma_init_device(struct zpci_dev *zdev)
{
- unsigned int bitmap_order;
int rc;
spin_lock_init(&zdev->iommu_bitmap_lock);
@@ -421,12 +421,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- bitmap_order = get_order(zdev->iommu_pages / 8);
- pr_info("iommu_size: 0x%lx iommu_pages: 0x%lx bitmap_order: %i\n",
- zdev->iommu_size, zdev->iommu_pages, bitmap_order);
-
- zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- bitmap_order);
+ zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
if (!zdev->iommu_bitmap) {
rc = -ENOMEM;
goto out_reg;
@@ -451,8 +446,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
{
zpci_unregister_ioat(zdev, 0);
dma_cleanup_tables(zdev);
- free_pages((unsigned long) zdev->iommu_bitmap,
- get_order(zdev->iommu_pages / 8));
+ vfree(zdev->iommu_bitmap);
zdev->iommu_bitmap = NULL;
zdev->next_bit = 0;
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ec62e3a0dc09..0aecaf954845 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -69,7 +69,7 @@ static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf)
clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
break;
case 0x0306:
- clp_find_pci_devices();
+ clp_rescan_pci_devices();
break;
default:
break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 22eeb9d7ffeb..85267c058af8 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -27,7 +27,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
return cc;
}
-int s390pci_mod_fc(u64 req, struct zpci_fib *fib)
+int zpci_mod_fc(u64 req, struct zpci_fib *fib)
{
u8 cc, status;
@@ -61,7 +61,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
return cc;
}
-int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
{
u8 cc, status;
@@ -78,7 +78,7 @@ int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
}
/* Set Interruption Controls */
-void set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
{
asm volatile (
" .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
@@ -109,7 +109,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
return cc;
}
-int s390pci_load(u64 *data, u64 req, u64 offset)
+int zpci_load(u64 *data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -125,7 +125,7 @@ int s390pci_load(u64 *data, u64 req, u64 offset)
__func__, cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
-EXPORT_SYMBOL_GPL(s390pci_load);
+EXPORT_SYMBOL_GPL(zpci_load);
/* PCI Store */
static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
@@ -147,7 +147,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
return cc;
}
-int s390pci_store(u64 data, u64 req, u64 offset)
+int zpci_store(u64 data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -163,7 +163,7 @@ int s390pci_store(u64 data, u64 req, u64 offset)
__func__, cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
-EXPORT_SYMBOL_GPL(s390pci_store);
+EXPORT_SYMBOL_GPL(zpci_store);
/* PCI Store Block */
static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
@@ -183,7 +183,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
return cc;
}
-int s390pci_store_block(const u64 *data, u64 req, u64 offset)
+int zpci_store_block(const u64 *data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -199,4 +199,4 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset)
__func__, cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
-EXPORT_SYMBOL_GPL(s390pci_store_block);
+EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c
deleted file mode 100644
index b097aed05a9b..000000000000
--- a/arch/s390/pci/pci_msi.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/rculist.h>
-#include <linux/hash.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <asm/hw_irq.h>
-
-/* mapping of irq numbers to msi_desc */
-static struct hlist_head *msi_hash;
-static const unsigned int msi_hash_bits = 8;
-#define MSI_HASH_BUCKETS (1U << msi_hash_bits)
-#define msi_hashfn(nr) hash_long(nr, msi_hash_bits)
-
-static DEFINE_SPINLOCK(msi_map_lock);
-
-struct msi_desc *__irq_get_msi_desc(unsigned int irq)
-{
- struct msi_map *map;
-
- hlist_for_each_entry_rcu(map,
- &msi_hash[msi_hashfn(irq)], msi_chain)
- if (map->irq == irq)
- return map->msi;
- return NULL;
-}
-
-int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
-{
- if (msi->msi_attrib.is_msix) {
- int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL;
- msi->masked = readl(msi->mask_base + offset);
- writel(flag, msi->mask_base + offset);
- } else {
- if (msi->msi_attrib.maskbit) {
- int pos;
- u32 mask_bits;
-
- pos = (long) msi->mask_base;
- pci_read_config_dword(msi->dev, pos, &mask_bits);
- mask_bits &= ~(mask);
- mask_bits |= flag & mask;
- pci_write_config_dword(msi->dev, pos, mask_bits);
- } else {
- return 0;
- }
- }
-
- msi->msi_attrib.maskbit = !!flag;
- return 1;
-}
-
-int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi,
- unsigned int nr, int offset)
-{
- struct msi_map *map;
- struct msi_msg msg;
- int rc;
-
- map = kmalloc(sizeof(*map), GFP_KERNEL);
- if (map == NULL)
- return -ENOMEM;
-
- map->irq = nr;
- map->msi = msi;
- zdev->msi_map[nr & ZPCI_MSI_MASK] = map;
- INIT_HLIST_NODE(&map->msi_chain);
-
- pr_debug("%s hashing irq: %u to bucket nr: %llu\n",
- __func__, nr, msi_hashfn(nr));
- hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]);
-
- spin_lock(&msi_map_lock);
- rc = irq_set_msi_desc(nr, msi);
- if (rc) {
- spin_unlock(&msi_map_lock);
- hlist_del_rcu(&map->msi_chain);
- kfree(map);
- zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL;
- return rc;
- }
- spin_unlock(&msi_map_lock);
-
- msg.data = nr - offset;
- msg.address_lo = zdev->msi_addr & 0xffffffff;
- msg.address_hi = zdev->msi_addr >> 32;
- write_msi_msg(nr, &msg);
- return 0;
-}
-
-void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi)
-{
- int irq = msi->irq & ZPCI_MSI_MASK;
- struct msi_map *map;
-
- msi->msg.address_lo = 0;
- msi->msg.address_hi = 0;
- msi->msg.data = 0;
- msi->irq = 0;
- zpci_msi_set_mask_bits(msi, 1, 1);
-
- spin_lock(&msi_map_lock);
- map = zdev->msi_map[irq];
- hlist_del_rcu(&map->msi_chain);
- kfree(map);
- zdev->msi_map[irq] = NULL;
- spin_unlock(&msi_map_lock);
-}
-
-/*
- * The msi hash table has 256 entries which is good for 4..20
- * devices (a typical device allocates 10 + CPUs MSI's). Maybe make
- * the hash table size adjustable later.
- */
-int __init zpci_msihash_init(void)
-{
- unsigned int i;
-
- msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL);
- if (!msi_hash)
- return -ENOMEM;
-
- for (i = 0; i < MSI_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&msi_hash[i]);
- return 0;
-}
-
-void __init zpci_msihash_exit(void)
-{
- kfree(msi_hash);
-}
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index a42cce69d0a0..cf8a12ff733b 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -15,48 +15,71 @@
static ssize_t show_fid(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
- sprintf(buf, "0x%08x\n", zdev->fid);
- return strlen(buf);
+ return sprintf(buf, "0x%08x\n", zdev->fid);
}
static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL);
static ssize_t show_fh(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
- sprintf(buf, "0x%08x\n", zdev->fh);
- return strlen(buf);
+ return sprintf(buf, "0x%08x\n", zdev->fh);
}
static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL);
static ssize_t show_pchid(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
- sprintf(buf, "0x%04x\n", zdev->pchid);
- return strlen(buf);
+ return sprintf(buf, "0x%04x\n", zdev->pchid);
}
static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL);
static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
- sprintf(buf, "0x%02x\n", zdev->pfgid);
- return strlen(buf);
+ return sprintf(buf, "0x%02x\n", zdev->pfgid);
}
static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
+static void recover_callback(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = get_zdev(pdev);
+ int ret;
+
+ pci_stop_and_remove_bus_device(pdev);
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ return;
+
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ return;
+
+ pci_rescan_bus(zdev->bus);
+}
+
+static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int rc = device_schedule_callback(dev, recover_callback);
+ return rc ? rc : count;
+}
+static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover);
+
static struct device_attribute *zpci_dev_attrs[] = {
&dev_attr_function_id,
&dev_attr_function_handle,
&dev_attr_pchid,
&dev_attr_pfgid,
+ &dev_attr_recover,
NULL,
};