diff options
Diffstat (limited to 'arch/s390')
115 files changed, 2967 insertions, 2182 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index da183c5a103c..7143793859fa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -62,6 +62,7 @@ config S390 def_bool y select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH @@ -91,8 +92,8 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 @@ -102,7 +103,6 @@ config S390 select GENERIC_TIME_VSYSCALL_OLD select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 - select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT @@ -118,6 +118,7 @@ config S390 select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ @@ -133,15 +134,15 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 if 32BIT select HAVE_VIRT_CPU_ACCOUNTING - select VIRT_TO_BUS select INIT_ALL_POSSIBLE select KTIME_SCALAR if 32BIT select MODULES_USE_ELF_RELA - select OLD_SIGSUSPEND3 select OLD_SIGACTION + select OLD_SIGSUSPEND3 select SYSCTL_EXCEPTION_TRACE select USE_GENERIC_SMP_HELPERS if SMP select VIRT_CPU_ACCOUNTING + select VIRT_TO_BUS config SCHED_OMIT_FRAME_POINTER def_bool y @@ -227,11 +228,12 @@ config MARCH_Z196 not work on older machines. config MARCH_ZEC12 - bool "IBM zEC12" + bool "IBM zBC12 and zEC12" select HAVE_MARCH_ZEC12_FEATURES if 64BIT help - Select this to enable optimizations for IBM zEC12 (2827 series). The - kernel will be slightly faster but will not work on older machines. + Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and + 2827 series). The kernel will be slightly faster but will not work on + older machines. endchoice @@ -301,7 +303,6 @@ config HOTPLUG_CPU def_bool y prompt "Support for hot-pluggable CPUs" depends on SMP - select HOTPLUG help Say Y here to be able to turn CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu/cpu#. @@ -429,7 +430,6 @@ menuconfig PCI bool "PCI support" default n depends on 64BIT - select ARCH_SUPPORTS_MSI select PCI_MSI help Enable PCI support. @@ -444,6 +444,16 @@ config PCI_NR_FUNCTIONS This allows you to specify the maximum number of PCI functions which this kernel will support. +config PCI_NR_MSI + int "Maximum number of MSI interrupts (64-32768)" + range 64 32768 + default "256" + help + This defines the number of virtual interrupts the kernel will + provide for MSI interrupts. If you configure your system to have + too few drivers will fail to allocate MSI interrupts for all + PCI devices. + source "drivers/pci/Kconfig" source "drivers/pci/pcie/Kconfig" source "drivers/pci/hotplug/Kconfig" @@ -515,6 +525,7 @@ config CRASH_DUMP bool "kernel crash dumps" depends on 64BIT && SMP select KEXEC + select ZFCPDUMP help Generate crash dump after being started by kexec. Crash dump kernels are loaded in the main kernel with kexec-tools @@ -525,7 +536,7 @@ config CRASH_DUMP config ZFCPDUMP def_bool n prompt "zfcpdump support" - select SMP + depends on SMP help Select this option if you want to build an zfcpdump enabled kernel. Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. @@ -710,6 +721,7 @@ config S390_GUEST def_bool y prompt "s390 support for virtio devices" depends on 64BIT + select TTY select VIRTUALIZATION select VIRTIO select VIRTIO_CONSOLE diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 7ef60b52d6e0..42be53743133 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -32,7 +32,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_mem_data { +struct appldata_mem_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -63,7 +63,7 @@ static struct appldata_mem_data { u64 pgmajfault; /* page faults (major only) */ // <-- New in 2.6 -} __attribute__((packed)) appldata_mem_data; +} __packed; /* @@ -118,7 +118,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_MEM_ID, .size = sizeof(struct appldata_mem_data), .callback = &appldata_get_mem_data, - .data = &appldata_mem_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -131,7 +130,17 @@ static struct appldata_ops ops = { */ static int __init appldata_mem_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -142,6 +151,7 @@ static int __init appldata_mem_init(void) static void __exit appldata_mem_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 2d224b945355..66037d2622b4 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -29,7 +29,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_net_sum_data { +struct appldata_net_sum_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -51,7 +51,7 @@ static struct appldata_net_sum_data { u64 rx_dropped; /* no space in linux buffers */ u64 tx_dropped; /* no space available in linux */ u64 collisions; /* collisions while transmitting */ -} __attribute__((packed)) appldata_net_sum_data; +} __packed; /* @@ -121,7 +121,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_NET_SUM_ID, .size = sizeof(struct appldata_net_sum_data), .callback = &appldata_get_net_sum_data, - .data = &appldata_net_sum_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -134,7 +133,17 @@ static struct appldata_ops ops = { */ static int __init appldata_net_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -145,6 +154,7 @@ static int __init appldata_net_init(void) static void __exit appldata_net_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 3ad8f61c9985..866ecbe670e4 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -6,9 +6,9 @@ BITS := $(if $(CONFIG_64BIT),64,31) -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ - vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \ - sizes.h head$(BITS).o +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 +targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 +targets += misc.o piggy.o sizes.h head$(BITS).o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING @@ -48,6 +48,7 @@ vmlinux.bin.all-y := $(obj)/vmlinux.bin suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZ4) := lz4 suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_XZ) := xz @@ -56,6 +57,8 @@ $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) $(call if_changed,gzip) $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) + $(call if_changed,lz4) $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) $(call if_changed,lzma) $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index c4c6a1cf221b..57cbaff1f397 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -47,6 +47,10 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/decompress_bunzip2.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + #ifdef CONFIG_KERNEL_LZMA #include "../../../../lib/decompress_unlzma.c" #endif diff --git a/arch/s390/defconfig b/arch/s390/defconfig index b74400e3e035..d204c65bf722 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,14 +1,13 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_AUDIT=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -27,6 +26,7 @@ CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y CONFIG_RD_XZ=y CONFIG_RD_LZO=y +CONFIG_RD_LZ4=y CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y @@ -38,11 +38,13 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y +# CONFIG_EFI_PARTITION is not set CONFIG_DEFAULT_DEADLINE=y CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y @@ -92,40 +94,49 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y CONFIG_ZFCP=y +CONFIG_SCSI_VIRTIO=y CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_EQUALIZER=m CONFIG_TUN=m CONFIG_VIRTIO_NET=y +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set CONFIG_RAW_DRIVER=m CONFIG_VIRTIO_BALLOON=y -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_BTRFS_FS=y +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_FANOTIFY=y +CONFIG_FUSE_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y # CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_PAGEALLOC=y CONFIG_TIMER_STATS=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_NOTIFIERS=y +CONFIG_PROVE_RCU=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_RCU_TRACE=y -CONFIG_KPROBES_SANITY_TEST=y -CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y CONFIG_LATENCYTOP=y -CONFIG_DEBUG_PAGEALLOC=y CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_KPROBES_SANITY_TEST=y # CONFIG_STRICT_DEVMEM is not set -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m @@ -137,8 +148,10 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m @@ -165,6 +178,8 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m CONFIG_ZCRYPT=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index f41e0ef7fdf9..79f2ac55253f 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -18,26 +18,23 @@ #define UPDATE_FILE_MODE 0220 #define DIR_MODE 0550 -extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent, - const char *name); +extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name); -extern struct dentry *hypfs_create_u64(struct super_block *sb, - struct dentry *dir, const char *name, +extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, __u64 value); -extern struct dentry *hypfs_create_str(struct super_block *sb, - struct dentry *dir, const char *name, +extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name, char *string); /* LPAR Hypervisor */ extern int hypfs_diag_init(void); extern void hypfs_diag_exit(void); -extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root); +extern int hypfs_diag_create_files(struct dentry *root); /* VM Hypervisor */ extern int hypfs_vm_init(void); extern void hypfs_vm_exit(void); -extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root); +extern int hypfs_vm_create_files(struct dentry *root); /* debugfs interface */ struct hypfs_dbfs_file; diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index bb5dd496614f..17ab8b7b53cc 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -105,7 +105,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) int hypfs_dbfs_init(void) { dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); - return PTR_RET(dbfs_dir); + return PTR_ERR_OR_ZERO(dbfs_dir); } void hypfs_dbfs_exit(void) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 7fd3690b6760..5eeffeefae06 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -623,8 +623,7 @@ void hypfs_diag_exit(void) * ******************************************* */ -static int hypfs_create_cpu_files(struct super_block *sb, - struct dentry *cpus_dir, void *cpu_info) +static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) { struct dentry *cpu_dir; char buffer[TMP_SIZE]; @@ -632,32 +631,29 @@ static int hypfs_create_cpu_files(struct super_block *sb, snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type, cpu_info)); - cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer); - rc = hypfs_create_u64(sb, cpu_dir, "mgmtime", + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_info_type, cpu_info) - cpu_info__lp_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); - rc = hypfs_create_u64(sb, cpu_dir, "cputime", + rc = hypfs_create_u64(cpu_dir, "cputime", cpu_info__lp_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); if (diag204_info_type == INFO_EXT) { - rc = hypfs_create_u64(sb, cpu_dir, "onlinetime", + rc = hypfs_create_u64(cpu_dir, "onlinetime", cpu_info__online_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); - rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_RET(rc); } -static void *hypfs_create_lpar_files(struct super_block *sb, - struct dentry *systems_dir, void *part_hdr) +static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) { struct dentry *cpus_dir; struct dentry *lpar_dir; @@ -667,16 +663,16 @@ static void *hypfs_create_lpar_files(struct super_block *sb, part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); lpar_name[LPAR_NAME_LEN] = 0; - lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name); + lpar_dir = hypfs_mkdir(systems_dir, lpar_name); if (IS_ERR(lpar_dir)) return lpar_dir; - cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus"); + cpus_dir = hypfs_mkdir(lpar_dir, "cpus"); if (IS_ERR(cpus_dir)) return cpus_dir; cpu_info = part_hdr + part_hdr__size(diag204_info_type); for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) { int rc; - rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info); + rc = hypfs_create_cpu_files(cpus_dir, cpu_info); if (rc) return ERR_PTR(rc); cpu_info += cpu_info__size(diag204_info_type); @@ -684,8 +680,7 @@ static void *hypfs_create_lpar_files(struct super_block *sb, return cpu_info; } -static int hypfs_create_phys_cpu_files(struct super_block *sb, - struct dentry *cpus_dir, void *cpu_info) +static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) { struct dentry *cpu_dir; char buffer[TMP_SIZE]; @@ -693,34 +688,31 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb, snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type, cpu_info)); - cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); if (IS_ERR(cpu_dir)) return PTR_ERR(cpu_dir); - rc = hypfs_create_u64(sb, cpu_dir, "mgmtime", + rc = hypfs_create_u64(cpu_dir, "mgmtime", phys_cpu__mgm_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); - rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_RET(rc); } -static void *hypfs_create_phys_files(struct super_block *sb, - struct dentry *parent_dir, void *phys_hdr) +static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) { int i; void *cpu_info; struct dentry *cpus_dir; - cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus"); + cpus_dir = hypfs_mkdir(parent_dir, "cpus"); if (IS_ERR(cpus_dir)) return cpus_dir; cpu_info = phys_hdr + phys_hdr__size(diag204_info_type); for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) { int rc; - rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info); + rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info); if (rc) return ERR_PTR(rc); cpu_info += phys_cpu__size(diag204_info_type); @@ -728,7 +720,7 @@ static void *hypfs_create_phys_files(struct super_block *sb, return cpu_info; } -int hypfs_diag_create_files(struct super_block *sb, struct dentry *root) +int hypfs_diag_create_files(struct dentry *root) { struct dentry *systems_dir, *hyp_dir; void *time_hdr, *part_hdr; @@ -739,7 +731,7 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root) if (IS_ERR(buffer)) return PTR_ERR(buffer); - systems_dir = hypfs_mkdir(sb, root, "systems"); + systems_dir = hypfs_mkdir(root, "systems"); if (IS_ERR(systems_dir)) { rc = PTR_ERR(systems_dir); goto err_out; @@ -747,25 +739,25 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root) time_hdr = (struct x_info_blk_hdr *)buffer; part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type); for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) { - part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr); + part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); if (IS_ERR(part_hdr)) { rc = PTR_ERR(part_hdr); goto err_out; } } if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { - ptr = hypfs_create_phys_files(sb, root, part_hdr); + ptr = hypfs_create_phys_files(root, part_hdr); if (IS_ERR(ptr)) { rc = PTR_ERR(ptr); goto err_out; } } - hyp_dir = hypfs_mkdir(sb, root, "hyp"); + hyp_dir = hypfs_mkdir(root, "hyp"); if (IS_ERR(hyp_dir)) { rc = PTR_ERR(hyp_dir); goto err_out; } - ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor"); + ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); if (IS_ERR(ptr)) { rc = PTR_ERR(ptr); goto err_out; diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index f364dcf77e8e..24908ce149f1 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -107,16 +107,15 @@ static void diag2fc_free(const void *data) vfree(data); } -#define ATTRIBUTE(sb, dir, name, member) \ +#define ATTRIBUTE(dir, name, member) \ do { \ void *rc; \ - rc = hypfs_create_u64(sb, dir, name, member); \ + rc = hypfs_create_u64(dir, name, member); \ if (IS_ERR(rc)) \ return PTR_ERR(rc); \ } while(0) -static int hpyfs_vm_create_guest(struct super_block *sb, - struct dentry *systems_dir, +static int hpyfs_vm_create_guest(struct dentry *systems_dir, struct diag2fc_data *data) { char guest_name[NAME_LEN + 1] = {}; @@ -130,46 +129,46 @@ static int hpyfs_vm_create_guest(struct super_block *sb, memcpy(guest_name, data->guest_name, NAME_LEN); EBCASC(guest_name, NAME_LEN); strim(guest_name); - guest_dir = hypfs_mkdir(sb, systems_dir, guest_name); + guest_dir = hypfs_mkdir(systems_dir, guest_name); if (IS_ERR(guest_dir)) return PTR_ERR(guest_dir); - ATTRIBUTE(sb, guest_dir, "onlinetime_us", data->el_time); + ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time); /* logical cpu information */ - cpus_dir = hypfs_mkdir(sb, guest_dir, "cpus"); + cpus_dir = hypfs_mkdir(guest_dir, "cpus"); if (IS_ERR(cpus_dir)) return PTR_ERR(cpus_dir); - ATTRIBUTE(sb, cpus_dir, "cputime_us", data->used_cpu); - ATTRIBUTE(sb, cpus_dir, "capped", capped_value); - ATTRIBUTE(sb, cpus_dir, "dedicated", dedicated_flag); - ATTRIBUTE(sb, cpus_dir, "count", data->vcpus); - ATTRIBUTE(sb, cpus_dir, "weight_min", data->cpu_min); - ATTRIBUTE(sb, cpus_dir, "weight_max", data->cpu_max); - ATTRIBUTE(sb, cpus_dir, "weight_cur", data->cpu_shares); + ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu); + ATTRIBUTE(cpus_dir, "capped", capped_value); + ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); + ATTRIBUTE(cpus_dir, "count", data->vcpus); + ATTRIBUTE(cpus_dir, "weight_min", data->cpu_min); + ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); + ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); /* memory information */ - mem_dir = hypfs_mkdir(sb, guest_dir, "mem"); + mem_dir = hypfs_mkdir(guest_dir, "mem"); if (IS_ERR(mem_dir)) return PTR_ERR(mem_dir); - ATTRIBUTE(sb, mem_dir, "min_KiB", data->mem_min_kb); - ATTRIBUTE(sb, mem_dir, "max_KiB", data->mem_max_kb); - ATTRIBUTE(sb, mem_dir, "used_KiB", data->mem_used_kb); - ATTRIBUTE(sb, mem_dir, "share_KiB", data->mem_share_kb); + ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb); + ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb); + ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb); + ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb); /* samples */ - samples_dir = hypfs_mkdir(sb, guest_dir, "samples"); + samples_dir = hypfs_mkdir(guest_dir, "samples"); if (IS_ERR(samples_dir)) return PTR_ERR(samples_dir); - ATTRIBUTE(sb, samples_dir, "cpu_using", data->cpu_use_samp); - ATTRIBUTE(sb, samples_dir, "cpu_delay", data->cpu_delay_samp); - ATTRIBUTE(sb, samples_dir, "mem_delay", data->page_wait_samp); - ATTRIBUTE(sb, samples_dir, "idle", data->idle_samp); - ATTRIBUTE(sb, samples_dir, "other", data->other_samp); - ATTRIBUTE(sb, samples_dir, "total", data->total_samp); + ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp); + ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp); + ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp); + ATTRIBUTE(samples_dir, "idle", data->idle_samp); + ATTRIBUTE(samples_dir, "other", data->other_samp); + ATTRIBUTE(samples_dir, "total", data->total_samp); return 0; } -int hypfs_vm_create_files(struct super_block *sb, struct dentry *root) +int hypfs_vm_create_files(struct dentry *root) { struct dentry *dir, *file; struct diag2fc_data *data; @@ -181,38 +180,38 @@ int hypfs_vm_create_files(struct super_block *sb, struct dentry *root) return PTR_ERR(data); /* Hpervisor Info */ - dir = hypfs_mkdir(sb, root, "hyp"); + dir = hypfs_mkdir(root, "hyp"); if (IS_ERR(dir)) { rc = PTR_ERR(dir); goto failed; } - file = hypfs_create_str(sb, dir, "type", "z/VM Hypervisor"); + file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); if (IS_ERR(file)) { rc = PTR_ERR(file); goto failed; } /* physical cpus */ - dir = hypfs_mkdir(sb, root, "cpus"); + dir = hypfs_mkdir(root, "cpus"); if (IS_ERR(dir)) { rc = PTR_ERR(dir); goto failed; } - file = hypfs_create_u64(sb, dir, "count", data->lcpus); + file = hypfs_create_u64(dir, "count", data->lcpus); if (IS_ERR(file)) { rc = PTR_ERR(file); goto failed; } /* guests */ - dir = hypfs_mkdir(sb, root, "systems"); + dir = hypfs_mkdir(root, "systems"); if (IS_ERR(dir)) { rc = PTR_ERR(dir); goto failed; } for (i = 0; i < count; i++) { - rc = hpyfs_vm_create_guest(sb, dir, &(data[i])); + rc = hpyfs_vm_create_guest(dir, &(data[i])); if (rc) goto failed; } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 7a539f4f5e30..ddfe09b45134 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -28,8 +28,7 @@ #define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */ #define TMP_SIZE 64 /* size of temporary buffers */ -static struct dentry *hypfs_create_update_file(struct super_block *sb, - struct dentry *dir); +static struct dentry *hypfs_create_update_file(struct dentry *dir); struct hypfs_sb_info { kuid_t uid; /* uid used for files and dirs */ @@ -193,9 +192,9 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, } hypfs_delete_tree(sb->s_root); if (MACHINE_IS_VM) - rc = hypfs_vm_create_files(sb, sb->s_root); + rc = hypfs_vm_create_files(sb->s_root); else - rc = hypfs_diag_create_files(sb, sb->s_root); + rc = hypfs_diag_create_files(sb->s_root); if (rc) { pr_err("Updating the hypfs tree failed\n"); hypfs_delete_tree(sb->s_root); @@ -302,12 +301,12 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent) if (!root_dentry) return -ENOMEM; if (MACHINE_IS_VM) - rc = hypfs_vm_create_files(sb, root_dentry); + rc = hypfs_vm_create_files(root_dentry); else - rc = hypfs_diag_create_files(sb, root_dentry); + rc = hypfs_diag_create_files(root_dentry); if (rc) return rc; - sbi->update_file = hypfs_create_update_file(sb, root_dentry); + sbi->update_file = hypfs_create_update_file(root_dentry); if (IS_ERR(sbi->update_file)) return PTR_ERR(sbi->update_file); hypfs_update_update(sb); @@ -334,8 +333,7 @@ static void hypfs_kill_super(struct super_block *sb) kill_litter_super(sb); } -static struct dentry *hypfs_create_file(struct super_block *sb, - struct dentry *parent, const char *name, +static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, char *data, umode_t mode) { struct dentry *dentry; @@ -347,7 +345,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb, dentry = ERR_PTR(-ENOMEM); goto fail; } - inode = hypfs_make_inode(sb, mode); + inode = hypfs_make_inode(parent->d_sb, mode); if (!inode) { dput(dentry); dentry = ERR_PTR(-ENOMEM); @@ -373,24 +371,22 @@ fail: return dentry; } -struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent, - const char *name) +struct dentry *hypfs_mkdir(struct dentry *parent, const char *name) { struct dentry *dentry; - dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE); + dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE); if (IS_ERR(dentry)) return dentry; hypfs_add_dentry(dentry); return dentry; } -static struct dentry *hypfs_create_update_file(struct super_block *sb, - struct dentry *dir) +static struct dentry *hypfs_create_update_file(struct dentry *dir) { struct dentry *dentry; - dentry = hypfs_create_file(sb, dir, "update", NULL, + dentry = hypfs_create_file(dir, "update", NULL, S_IFREG | UPDATE_FILE_MODE); /* * We do not put the update file on the 'delete' list with @@ -400,7 +396,7 @@ static struct dentry *hypfs_create_update_file(struct super_block *sb, return dentry; } -struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, +struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, __u64 value) { char *buffer; @@ -412,7 +408,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, if (!buffer) return ERR_PTR(-ENOMEM); dentry = - hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE); + hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); if (IS_ERR(dentry)) { kfree(buffer); return ERR_PTR(-ENOMEM); @@ -421,7 +417,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, return dentry; } -struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir, +struct dentry *hypfs_create_str(struct dentry *dir, const char *name, char *string) { char *buffer; @@ -432,7 +428,7 @@ struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir, return ERR_PTR(-ENOMEM); sprintf(buffer, "%s\n", string); dentry = - hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE); + hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); if (IS_ERR(dentry)) { kfree(buffer); return ERR_PTR(-ENOMEM); diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 9819891ed7a2..4bbb5957ed1b 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -9,9 +9,85 @@ #ifndef _ASM_S390_AIRQ_H #define _ASM_S390_AIRQ_H -typedef void (*adapter_int_handler_t)(void *, void *); +#include <linux/bit_spinlock.h> -void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8); -void s390_unregister_adapter_interrupt(void *, u8); +struct airq_struct { + struct hlist_node list; /* Handler queueing. */ + void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ + u8 *lsi_ptr; /* Local-Summary-Indicator pointer */ + u8 lsi_mask; /* Local-Summary-Indicator mask */ + u8 isc; /* Interrupt-subclass */ + u8 flags; +}; + +#define AIRQ_PTR_ALLOCATED 0x01 + +int register_adapter_interrupt(struct airq_struct *airq); +void unregister_adapter_interrupt(struct airq_struct *airq); + +/* Adapter interrupt bit vector */ +struct airq_iv { + unsigned long *vector; /* Adapter interrupt bit vector */ + unsigned long *avail; /* Allocation bit mask for the bit vector */ + unsigned long *bitlock; /* Lock bit mask for the bit vector */ + unsigned long *ptr; /* Pointer associated with each bit */ + unsigned int *data; /* 32 bit value associated with each bit */ + unsigned long bits; /* Number of bits in the vector */ + unsigned long end; /* Number of highest allocated bit + 1 */ + spinlock_t lock; /* Lock to protect alloc & free */ +}; + +#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */ +#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */ +#define AIRQ_IV_PTR 4 /* Allocate the ptr array */ +#define AIRQ_IV_DATA 8 /* Allocate the data array */ + +struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags); +void airq_iv_release(struct airq_iv *iv); +unsigned long airq_iv_alloc_bit(struct airq_iv *iv); +void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit); +unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start, + unsigned long end); + +static inline unsigned long airq_iv_end(struct airq_iv *iv) +{ + return iv->end; +} + +static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_lock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_unlock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit, + unsigned int data) +{ + iv->data[bit] = data; +} + +static inline unsigned int airq_iv_get_data(struct airq_iv *iv, + unsigned long bit) +{ + return iv->data[bit]; +} + +static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit, + unsigned long ptr) +{ + iv->ptr[bit] = ptr; +} + +static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv, + unsigned long bit) +{ + return iv->ptr[bit]; +} #endif /* _ASM_S390_AIRQ_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 4d8604e311f3..10135a38673c 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -216,7 +216,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " oc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc"); } static inline void @@ -244,7 +244,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " nc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc"); } static inline void @@ -271,7 +271,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " xc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc"); } static inline void @@ -301,7 +301,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " oc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } @@ -320,7 +320,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " nc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } @@ -339,7 +339,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " xc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } @@ -693,7 +693,7 @@ static inline int find_next_bit_left(const unsigned long *addr, size -= offset; p = addr + offset / BITS_PER_LONG; if (bit) { - set = __flo_word(0, *p & (~0UL << bit)); + set = __flo_word(0, *p & (~0UL >> bit)); if (set >= size) return size + offset; if (set < BITS_PER_LONG) diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index ffb898961c8d..d42625053c37 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -296,6 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, return 0; } +void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); extern void reipl_ccw_dev(struct ccw_dev_id *id); diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index d2ff41370c0c..f65bd3634519 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -13,9 +13,6 @@ #include <asm/div64.h> -#define __ARCH_HAS_VTIME_ACCOUNT -#define __ARCH_HAS_VTIME_TASK_SWITCH - /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ typedef unsigned long long __nocast cputime_t; diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 886ac7d4937a..3fbc67d9e197 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -50,9 +50,10 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *dma_ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (dma_ops->mapping_error) return dma_ops->mapping_error(dev, dma_addr); - return (dma_addr == 0UL); + return dma_addr == DMA_ERROR_CODE; } static inline void *dma_alloc_coherent(struct device *dev, size_t size, diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 2ee66a65f2d4..0aa6a7ed95a3 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -13,6 +13,16 @@ #define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ +static inline int __test_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) facilities + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + /* * The test_facility function uses the bit odering where the MSB is bit 0. * That makes it easier to query facility bits with the bit number as @@ -20,12 +30,7 @@ */ static inline int test_facility(unsigned long nr) { - unsigned char *ptr; - - if (nr >= MAX_FACILITY_BIT) - return 0; - ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); - return (*ptr & (0x80 >> (nr & 7))) != 0; + return __test_facility(nr, &S390_lowcore.stfle_fac_list); } /** diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 0c82ba86e997..a908d2941c5d 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -20,4 +20,9 @@ #define HARDIRQ_BITS 8 +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} + #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index bd90359d6d22..11eae5f55b70 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -17,6 +17,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +pte_t huge_ptep_get(pte_t *ptep); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); /* * If the arch doesn't supply something else, assume that hugepage @@ -38,147 +41,75 @@ static inline int prepare_hugepage_range(struct file *file, int arch_prepare_hugepage(struct page *page); void arch_release_hugepage(struct page *page); -static inline pte_t huge_pte_wrprotect(pte_t pte) +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - pte_val(pte) |= _PAGE_RO; - return pte; + pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; } -static inline int huge_pte_none(pte_t pte) +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { - return (pte_val(pte) & _SEGMENT_ENTRY_INV) && - !(pte_val(pte) & _SEGMENT_ENTRY_RO); + huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } -static inline pte_t huge_ptep_get(pte_t *ptep) +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) { - pte_t pte = *ptep; - unsigned long mask; - - if (!MACHINE_HAS_HPAGE) { - ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN); - if (ptep) { - mask = pte_val(pte) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte = pte_mkhuge(*ptep); - pte_val(pte) |= mask; - } + int changed = !pte_same(huge_ptep_get(ptep), pte); + if (changed) { + huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } - return pte; + return changed; } -static inline void __pmd_csp(pmd_t *pmdp) +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - register unsigned long reg2 asm("2") = pmd_val(*pmdp); - register unsigned long reg3 asm("3") = pmd_val(*pmdp) | - _SEGMENT_ENTRY_INV; - register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; - - asm volatile( - " csp %1,%3" - : "=m" (*pmdp) - : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); + pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } -static inline void huge_ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - pmd_t *pmdp = (pmd_t *) ptep; - - if (MACHINE_HAS_IDTE) - __pmd_idte(address, pmdp); - else - __pmd_csp(pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; -} - -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = huge_ptep_get(ptep); - - huge_ptep_invalidate(mm, addr, ptep); - return pte; -} - -#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ - if (__changed) { \ - huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) - -#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = huge_ptep_get(__ptep); \ - if (huge_pte_write(__pte)) { \ - huge_ptep_invalidate(__mm, __addr, __ptep); \ - set_huge_pte_at(__mm, __addr, __ptep, \ - huge_pte_wrprotect(__pte)); \ - } \ -}) - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) { - huge_ptep_invalidate(vma->vm_mm, address, ptep); + return mk_pte(page, pgprot); } -static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +static inline int huge_pte_none(pte_t pte) { - pte_t pte; - pmd_t pmd; - - pmd = mk_pmd_phys(page_to_phys(page), pgprot); - pte_val(pte) = pmd_val(pmd); - return pte; + return pte_none(pte); } static inline int huge_pte_write(pte_t pte) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - return pmd_write(pmd); + return pte_write(pte); } static inline int huge_pte_dirty(pte_t pte) { - /* No dirty bit in the segment table entry. */ - return 0; + return pte_dirty(pte); } static inline pte_t huge_pte_mkwrite(pte_t pte) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - pte_val(pte) = pmd_val(pmd_mkwrite(pmd)); - return pte; + return pte_mkwrite(pte); } static inline pte_t huge_pte_mkdirty(pte_t pte) { - /* No dirty bit in the segment table entry. */ - return pte; + return pte_mkdirty(pte); } -static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) +static inline pte_t huge_pte_wrprotect(pte_t pte) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - pte_val(pte) = pmd_val(pmd_modify(pmd, newprot)); - return pte; + return pte_wrprotect(pte); } -static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) { - pmd_clear((pmd_t *) ptep); + return pte_modify(pte, newprot); } #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h index 7e3d2586c1ff..ee96a8b697f9 100644 --- a/arch/s390/include/asm/hw_irq.h +++ b/arch/s390/include/asm/hw_irq.h @@ -4,19 +4,8 @@ #include <linux/msi.h> #include <linux/pci.h> -static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) -{ - return __irq_get_msi_desc(irq); -} - -/* Must be called with msi map lock held */ -static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi) -{ - if (!msi) - return -EINVAL; - - msi->irq = irq; - return 0; -} +void __init init_airq_interrupts(void); +void __init init_cio_interrupts(void); +void __init init_ext_interrupts(void); #endif diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index fd9be010f9b2..cd6b9ee7b69c 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -13,28 +13,6 @@ #include <asm/page.h> #include <asm/pci_io.h> -/* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial - */ -static inline unsigned long virt_to_phys(volatile void * address) -{ - unsigned long real_address; - asm volatile( - " lra %0,0(%1)\n" - " jz 0f\n" - " la %0,0\n" - "0:" - : "=a" (real_address) : "a" (address) : "cc"); - return real_address; -} -#define virt_to_phys virt_to_phys - -static inline void * phys_to_virt(unsigned long address) -{ - return (void *) address; -} - void *xlate_dev_mem_ptr(unsigned long phys); #define xlate_dev_mem_ptr xlate_dev_mem_ptr void unxlate_dev_mem_ptr(unsigned long phys, void *addr); diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 87c17bfb2968..5f8bcc5fe423 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -1,17 +1,28 @@ #ifndef _ASM_IRQ_H #define _ASM_IRQ_H +#define EXT_INTERRUPT 1 +#define IO_INTERRUPT 2 +#define THIN_INTERRUPT 3 + +#define NR_IRQS_BASE 4 + +#ifdef CONFIG_PCI_NR_MSI +# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI) +#else +# define NR_IRQS NR_IRQS_BASE +#endif + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ 0 + +#ifndef __ASSEMBLY__ + #include <linux/hardirq.h> #include <linux/percpu.h> #include <linux/cache.h> #include <linux/types.h> -enum interruption_main_class { - EXTERNAL_INTERRUPT, - IO_INTERRUPT, - NR_IRQS -}; - enum interruption_class { IRQEXT_CLK, IRQEXT_EXC, @@ -67,19 +78,17 @@ typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); int register_external_interrupt(u16 code, ext_int_handler_t handler); int unregister_external_interrupt(u16 code, ext_int_handler_t handler); -void service_subclass_irq_register(void); -void service_subclass_irq_unregister(void); -void measurement_alert_subclass_register(void); -void measurement_alert_subclass_unregister(void); - -#ifdef CONFIG_LOCKDEP -# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq) -# define disable_irq_nosync_lockdep_irqsave(irq, flags) \ - disable_irq_nosync(irq) -# define disable_irq_lockdep(irq) disable_irq(irq) -# define enable_irq_lockdep(irq) enable_irq(irq) -# define enable_irq_lockdep_irqrestore(irq, flags) \ - enable_irq(irq) -#endif + +enum irq_subclass { + IRQ_SUBCLASS_MEASUREMENT_ALERT = 5, + IRQ_SUBCLASS_SERVICE_SIGNAL = 9, +}; + +void irq_subclass_register(enum irq_subclass subclass); +void irq_subclass_unregister(enum irq_subclass subclass); + +#define irq_canonicalize(irq) (irq) + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index dcf6948a875c..4176dfe0fba1 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -31,6 +31,8 @@ #include <linux/ptrace.h> #include <linux/percpu.h> +#define __ARCH_WANT_KPROBES_INSN_SLOT + struct pt_regs; struct kprobe; @@ -57,7 +59,7 @@ typedef u16 kprobe_opcode_t; /* Architecture specific copy of original instruction */ struct arch_specific_insn { /* copy of original instruction */ - kprobe_opcode_t insn[MAX_INSN_SIZE]; + kprobe_opcode_t *insn; }; struct prev_kprobe { diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 16bd5d169cdb..e87ecaa2c569 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -62,13 +62,20 @@ struct sca_block { #define CPUSTAT_MCDS 0x00000100 #define CPUSTAT_SM 0x00000080 #define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 #define CPUSTAT_J 0x00000002 #define CPUSTAT_P 0x00000001 struct kvm_s390_sie_block { atomic_t cpuflags; /* 0x0000 */ __u32 prefix; /* 0x0004 */ - __u8 reserved8[32]; /* 0x0008 */ + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + __u8 reserved10[16]; /* 0x0010 */ +#define PROG_BLOCK_SIE 0x00000001 + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ __u64 cputm; /* 0x0028 */ __u64 ckc; /* 0x0030 */ __u64 epoch; /* 0x0038 */ @@ -90,7 +97,8 @@ struct kvm_s390_sie_block { __u32 scaoh; /* 0x005c */ __u8 reserved60; /* 0x0060 */ __u8 ecb; /* 0x0061 */ - __u8 reserved62[2]; /* 0x0062 */ + __u8 ecb2; /* 0x0062 */ + __u8 reserved63[1]; /* 0x0063 */ __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ __u32 todpr; /* 0x006c */ @@ -130,6 +138,7 @@ struct kvm_vcpu_stat { u32 deliver_program_int; u32 deliver_io_int; u32 exit_wait_state; + u32 instruction_pfmf; u32 instruction_stidp; u32 instruction_spx; u32 instruction_stpx; @@ -166,7 +175,7 @@ struct kvm_s390_ext_info { }; #define PGM_OPERATION 0x01 -#define PGM_PRIVILEGED_OPERATION 0x02 +#define PGM_PRIVILEGED_OP 0x02 #define PGM_EXECUTE 0x03 #define PGM_PROTECTION 0x04 #define PGM_ADDRESSING 0x05 @@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt { atomic_t active; struct kvm_s390_float_interrupt *float_int; int timer_due; /* event indicator for waitqueue below */ - wait_queue_head_t wq; + wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; }; @@ -265,5 +274,14 @@ struct kvm_arch{ int css_support; }; +#define KVM_HVA_ERR_BAD (-1UL) +#define KVM_HVA_ERR_RO_BAD (-2UL) + +static inline bool kvm_is_error_hva(unsigned long addr) +{ + return IS_ERR_VALUE(addr); +} + extern int sie64a(struct kvm_s390_sie_block *, u64 *); +extern char sie_exit; #endif diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 6340178748bf..ff132ac64ddd 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -12,8 +12,6 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - /* Cloned contexts will be created with extended page tables. */ - unsigned int alloc_pgste:1; /* The mmu context has extended page tables. */ unsigned int has_pgste:1; } mm_context_t; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 084e7755ed9b..9f973d8de90e 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -21,24 +21,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - if (current->mm && current->mm->context.alloc_pgste) { - /* - * alloc_pgste indicates, that any NEW context will be created - * with extended page tables. The old context is unchanged. The - * page table allocation and the page table operations will - * look at has_pgste to distinguish normal and extended page - * tables. The only way to create extended page tables is to - * set alloc_pgste and then create a new context (e.g. dup_mm). - * The page table allocation is called after init_new_context - * and if has_pgste is set, it will create extended page - * tables. - */ - mm->context.has_pgste = 1; - mm->context.alloc_pgste = 1; - } else { - mm->context.has_pgste = 0; - mm->context.alloc_pgste = 0; - } + mm->context.has_pgste = 0; mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; @@ -77,8 +60,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, WARN_ON(atomic_read(&prev->context.attach_count) < 0); atomic_inc(&next->context.attach_count); /* Check for TLBs not flushed yet */ - if (next->context.flush_mm) - __tlb_flush_mm(next); + __tlb_flush_mm_lazy(next); } #define enter_lazy_tlb(mm,tsk) do { } while (0) diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 688271f5f2e4..458c1f7fbc18 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,5 +7,3 @@ */ #include <asm-generic/mutex-dec.h> - -#define arch_mutex_cpu_relax() barrier() diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 5d64fb7619cc..1e51f2915b2e 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -32,16 +32,6 @@ void storage_key_init_range(unsigned long start, unsigned long end); -static inline unsigned long pfmf(unsigned long function, unsigned long address) -{ - asm volatile( - " .insn rre,0xb9af0000,%[function],%[address]" - : [address] "+a" (address) - : [function] "d" (function) - : "memory"); - return address; -} - static inline void clear_page(void *page) { register unsigned long reg1 asm ("1") = 0; @@ -150,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr) #define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ #define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ -/* - * Test and clear referenced bit in storage key. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -static inline int page_test_and_clear_young(unsigned long pfn) -{ - return page_reset_referenced(pfn << PAGE_SHIFT); -} - struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6c1801235db9..1cc185da9d38 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -6,6 +6,7 @@ /* must be set before including pci_clp.h */ #define PCI_BAR_COUNT 6 +#include <linux/pci.h> #include <asm-generic/pci.h> #include <asm-generic/pci-dma-compat.h> #include <asm/pci_clp.h> @@ -21,10 +22,6 @@ void pci_iounmap(struct pci_dev *, void __iomem *); int pci_domain_nr(struct pci_bus *); int pci_proc_domain(struct pci_bus *); -/* MSI arch hooks */ -#define arch_setup_msi_irqs arch_setup_msi_irqs -#define arch_teardown_msi_irqs arch_teardown_msi_irqs - #define ZPCI_BUS_NR 0 /* default bus number */ #define ZPCI_DEVFN 0 /* default device number */ @@ -53,14 +50,9 @@ struct zpci_fmb { atomic64_t unmapped_pages; } __packed __aligned(16); -struct msi_map { - unsigned long irq; - struct msi_desc *msi; - struct hlist_node msi_chain; -}; - -#define ZPCI_NR_MSI_VECS 64 -#define ZPCI_MSI_MASK (ZPCI_NR_MSI_VECS - 1) +#define ZPCI_MSI_VEC_BITS 11 +#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) +#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1) enum zpci_state { ZPCI_FN_STATE_RESERVED, @@ -91,8 +83,7 @@ struct zpci_dev { /* IRQ stuff */ u64 msi_addr; /* MSI address */ - struct zdev_irq_map *irq_map; - struct msi_map *msi_map[ZPCI_NR_MSI_VECS]; + struct airq_iv *aibv; /* adapter interrupt bit vector */ unsigned int aisb; /* number of the summary bit */ /* DMA stuff */ @@ -120,12 +111,6 @@ struct zpci_dev { struct dentry *debugfs_dev; struct dentry *debugfs_perf; - struct dentry *debugfs_debug; -}; - -struct pci_hp_callback_ops { - int (*create_slot) (struct zpci_dev *zdev); - void (*remove_slot) (struct zpci_dev *zdev); }; static inline bool zdev_enabled(struct zpci_dev *zdev) @@ -143,37 +128,42 @@ int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); void zpci_stop_device(struct zpci_dev *); void zpci_free_device(struct zpci_dev *); -int zpci_scan_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); /* CLP */ -int clp_find_pci_devices(void); +int clp_scan_pci_devices(void); +int clp_rescan_pci_devices(void); +int clp_rescan_pci_devices_simple(void); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); -/* MSI */ -struct msi_desc *__irq_get_msi_desc(unsigned int); -int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32); -int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int); -void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *); -int zpci_msihash_init(void); -void zpci_msihash_exit(void); - #ifdef CONFIG_PCI /* Error handling and recovery */ void zpci_event_error(void *); void zpci_event_availability(void *); +void zpci_rescan(void); #else /* CONFIG_PCI */ static inline void zpci_event_error(void *e) {} static inline void zpci_event_availability(void *e) {} +static inline void zpci_rescan(void) {} #endif /* CONFIG_PCI */ +#ifdef CONFIG_HOTPLUG_PCI_S390 +int zpci_init_slot(struct zpci_dev *); +void zpci_exit_slot(struct zpci_dev *); +#else /* CONFIG_HOTPLUG_PCI_S390 */ +static inline int zpci_init_slot(struct zpci_dev *zdev) +{ + return 0; +} +static inline void zpci_exit_slot(struct zpci_dev *zdev) {} +#endif /* CONFIG_HOTPLUG_PCI_S390 */ + /* Helpers */ struct zpci_dev *get_zdev(struct pci_dev *); struct zpci_dev *get_zdev_by_fid(u32); -bool zpci_fid_present(u32); /* sysfs */ int zpci_sysfs_add_device(struct device *); @@ -183,14 +173,6 @@ void zpci_sysfs_remove_device(struct device *); int zpci_dma_init(void); void zpci_dma_exit(void); -/* Hotplug */ -extern struct mutex zpci_list_lock; -extern struct list_head zpci_list; -extern unsigned int s390_pci_probe; - -void zpci_register_hp_ops(struct pci_hp_callback_ops *); -void zpci_deregister_hp_ops(void); - /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); int zpci_fmb_disable_device(struct zpci_dev *); diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index e6a2bdd4d705..df6eac9f0cb4 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -79,11 +79,11 @@ struct zpci_fib { } __packed; -int s390pci_mod_fc(u64 req, struct zpci_fib *fib); -int s390pci_refresh_trans(u64 fn, u64 addr, u64 range); -int s390pci_load(u64 *data, u64 req, u64 offset); -int s390pci_store(u64 data, u64 req, u64 offset); -int s390pci_store_block(const u64 *data, u64 req, u64 offset); -void set_irq_ctrl(u16 ctl, char *unused, u8 isc); +int zpci_mod_fc(u64 req, struct zpci_fib *fib); +int zpci_refresh_trans(u64 fn, u64 addr, u64 range); +int zpci_load(u64 *data, u64 req, u64 offset); +int zpci_store(u64 data, u64 req, u64 offset); +int zpci_store_block(const u64 *data, u64 req, u64 offset); +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); #endif diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 83a9caa6ae53..d194d544d694 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \ u64 data; \ int rc; \ \ - rc = s390pci_load(&data, req, ZPCI_OFFSET(addr)); \ + rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \ if (rc) \ data = -1ULL; \ return (RETTYPE) data; \ @@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ u64 data = (VALTYPE) val; \ \ - s390pci_store(data, req, ZPCI_OFFSET(addr)); \ + zpci_store(data, req, ZPCI_OFFSET(addr)); \ } zpci_read(8, u64) @@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len val = 0; /* let FW report error */ break; } - return s390pci_store(val, req, offset); + return zpci_store(val, req, offset); } static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) @@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) u64 data; int cc; - cc = s390pci_load(&data, req, offset); + cc = zpci_load(&data, req, offset); if (cc) goto out; @@ -115,7 +115,7 @@ out: static inline int zpci_write_block(u64 req, const u64 *data, u64 offset) { - return s390pci_store_block(data, req, offset); + return zpci_store_block(data, req, offset); } static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 5f0173a31693..1141fb3e7b21 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -14,3 +14,13 @@ /* Per-CPU flags for PMU states */ #define PMU_F_RESERVED 0x1000 #define PMU_F_ENABLED 0x2000 + +#ifdef CONFIG_64BIT + +/* Perf callbacks */ +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 590c3219c634..e1408ddb94f8 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -22,6 +22,9 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq); + static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { typedef struct { char _[n]; } addrtype; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index ac01463038f1..9b60a36c348d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -58,9 +58,6 @@ extern unsigned long zero_page_mask; #define __HAVE_COLOR_ZERO_PAGE /* TODO: s390 cannot support io_remap_pfn_range... */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #endif /* !__ASSEMBLY__ */ /* @@ -220,63 +217,57 @@ extern unsigned long MODULES_END; /* Hardware bits in the page table entry */ #define _PAGE_CO 0x100 /* HW Change-bit override */ -#define _PAGE_RO 0x200 /* HW read-only bit */ +#define _PAGE_PROTECT 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ +#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */ /* Software bits in the page table entry */ -#define _PAGE_SWT 0x001 /* SW pte type bit t */ -#define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SWC 0x004 /* SW pte changed bit */ -#define _PAGE_SWR 0x008 /* SW pte referenced bit */ -#define _PAGE_SWW 0x010 /* SW pte write bit */ -#define _PAGE_SPECIAL 0x020 /* SW associated with special page */ +#define _PAGE_PRESENT 0x001 /* SW pte present bit */ +#define _PAGE_TYPE 0x002 /* SW pte type bit */ +#define _PAGE_YOUNG 0x004 /* SW pte young bit */ +#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ +#define _PAGE_READ 0x010 /* SW pte read bit */ +#define _PAGE_WRITE 0x020 /* SW pte write bit */ +#define _PAGE_SPECIAL 0x040 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ - _PAGE_SWC | _PAGE_SWR) - -/* Six different types of pages. */ -#define _PAGE_TYPE_EMPTY 0x400 -#define _PAGE_TYPE_NONE 0x401 -#define _PAGE_TYPE_SWAP 0x403 -#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ -#define _PAGE_TYPE_RO 0x200 -#define _PAGE_TYPE_RW 0x000 - -/* - * Only four types for huge pages, using the invalid bit and protection bit - * of a segment table entry. - */ -#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */ -#define _HPAGE_TYPE_NONE 0x220 -#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */ -#define _HPAGE_TYPE_RW 0x000 + _PAGE_DIRTY | _PAGE_YOUNG) /* - * PTE type bits are rather complicated. handle_pte_fault uses pte_present, - * pte_none and pte_file to find out the pte type WITHOUT holding the page - * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to - * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs - * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards. - * This change is done while holding the lock, but the intermediate step - * of a previously valid pte with the hw invalid bit set can be observed by - * handle_pte_fault. That makes it necessary that all valid pte types with - * the hw invalid bit set must be distinguishable from the four pte types - * empty, none, swap and file. + * handle_pte_fault uses pte_present, pte_none and pte_file to find out the + * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit + * is used to distinguish present from not-present ptes. It is changed only + * with the page table lock held. + * + * The following table gives the different possible bit combinations for + * the pte hardware and software bits in the last 12 bits of a pte: * - * irxt ipte irxt - * _PAGE_TYPE_EMPTY 1000 -> 1000 - * _PAGE_TYPE_NONE 1001 -> 1001 - * _PAGE_TYPE_SWAP 1011 -> 1011 - * _PAGE_TYPE_FILE 11?1 -> 11?1 - * _PAGE_TYPE_RO 0100 -> 1100 - * _PAGE_TYPE_RW 0000 -> 1000 + * 842100000000 + * 000084210000 + * 000000008421 + * .IR...wrdytp + * empty .10...000000 + * swap .10...xxxx10 + * file .11...xxxxx0 + * prot-none, clean, old .11...000001 + * prot-none, clean, young .11...000101 + * prot-none, dirty, old .10...001001 + * prot-none, dirty, young .10...001101 + * read-only, clean, old .11...010001 + * read-only, clean, young .01...010101 + * read-only, dirty, old .11...011001 + * read-only, dirty, young .01...011101 + * read-write, clean, old .11...110001 + * read-write, clean, young .01...110101 + * read-write, dirty, old .10...111001 + * read-write, dirty, young .00...111101 * - * pte_none is true for bits combinations 1000, 1010, 1100, 1110 - * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 - * pte_file is true for bits combinations 1101, 1111 - * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. + * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 + * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 + * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600 + * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 */ #ifndef CONFIG_64BIT @@ -289,28 +280,35 @@ extern unsigned long MODULES_END; #define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ -#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) + +/* + * Segment table entry encoding (I = invalid, R = read-only bit): + * ..R...I..... + * prot-none ..1...1..... + * read-only ..1...0..... + * read-write ..0...0..... + * empty ..0...1..... + */ /* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf0000000UL -#define RCP_FP_BIT 0x08000000UL -#define RCP_PCL_BIT 0x00800000UL -#define RCP_HR_BIT 0x00400000UL -#define RCP_HC_BIT 0x00200000UL -#define RCP_GR_BIT 0x00040000UL -#define RCP_GC_BIT 0x00020000UL -#define RCP_IN_BIT 0x00002000UL /* IPTE notify bit */ - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x00008000UL -#define KVM_UC_BIT 0x00004000UL +#define PGSTE_ACC_BITS 0xf0000000UL +#define PGSTE_FP_BIT 0x08000000UL +#define PGSTE_PCL_BIT 0x00800000UL +#define PGSTE_HR_BIT 0x00400000UL +#define PGSTE_HC_BIT 0x00200000UL +#define PGSTE_GR_BIT 0x00040000UL +#define PGSTE_GC_BIT 0x00020000UL +#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */ #else /* CONFIG_64BIT */ @@ -329,8 +327,8 @@ extern unsigned long MODULES_END; /* Bits in the region table entry */ #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ -#define _REGION_ENTRY_RO 0x200 /* region protection bit */ -#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ +#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ #define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ @@ -338,47 +336,61 @@ extern unsigned long MODULES_END; #define _REGION_ENTRY_LENGTH 0x03 /* region third length */ #define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) -#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) +#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) -#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) +#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) -#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) +#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ #define _REGION3_ENTRY_RO 0x200 /* page protection bit */ #define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ -#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY (0) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ +#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG + +/* + * Segment table entry encoding (R = read-only, I = invalid, y = young bit): + * ..R...I...y. + * prot-none, old ..0...1...1. + * prot-none, young ..1...1...1. + * read-only, old ..1...1...0. + * read-only, young ..1...0...1. + * read-write, old ..0...1...0. + * read-write, young ..0...0...1. + * The segment table origin is used to distinguish empty (origin==0) from + * read-write, old segment table entries (origin!=0) + */ + #define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ -#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT) /* Set of bits not changed in pmd_modify */ #define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) /* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf000000000000000UL -#define RCP_FP_BIT 0x0800000000000000UL -#define RCP_PCL_BIT 0x0080000000000000UL -#define RCP_HR_BIT 0x0040000000000000UL -#define RCP_HC_BIT 0x0020000000000000UL -#define RCP_GR_BIT 0x0004000000000000UL -#define RCP_GC_BIT 0x0002000000000000UL -#define RCP_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x0000800000000000UL -#define KVM_UC_BIT 0x0000400000000000UL +#define PGSTE_ACC_BITS 0xf000000000000000UL +#define PGSTE_FP_BIT 0x0800000000000000UL +#define PGSTE_PCL_BIT 0x0080000000000000UL +#define PGSTE_HR_BIT 0x0040000000000000UL +#define PGSTE_HC_BIT 0x0020000000000000UL +#define PGSTE_GR_BIT 0x0004000000000000UL +#define PGSTE_GC_BIT 0x0002000000000000UL +#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ #endif /* CONFIG_64BIT */ @@ -393,14 +405,18 @@ extern unsigned long MODULES_END; /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) -#define PAGE_RO __pgprot(_PAGE_TYPE_RO) -#define PAGE_RW __pgprot(_PAGE_TYPE_RO | _PAGE_SWW) -#define PAGE_RWC __pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC) - -#define PAGE_KERNEL PAGE_RWC -#define PAGE_SHARED PAGE_KERNEL -#define PAGE_COPY PAGE_RO +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) +#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \ + _PAGE_INVALID | _PAGE_PROTECT) +#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_INVALID | _PAGE_PROTECT) + +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ + _PAGE_PROTECT) /* * On s390 the page table entry has an invalid bit and a read-only bit. @@ -409,35 +425,31 @@ extern unsigned long MODULES_END; */ /*xwr*/ #define __P000 PAGE_NONE -#define __P001 PAGE_RO -#define __P010 PAGE_RO -#define __P011 PAGE_RO -#define __P100 PAGE_RO -#define __P101 PAGE_RO -#define __P110 PAGE_RO -#define __P111 PAGE_RO +#define __P001 PAGE_READ +#define __P010 PAGE_READ +#define __P011 PAGE_READ +#define __P100 PAGE_READ +#define __P101 PAGE_READ +#define __P110 PAGE_READ +#define __P111 PAGE_READ #define __S000 PAGE_NONE -#define __S001 PAGE_RO -#define __S010 PAGE_RW -#define __S011 PAGE_RW -#define __S100 PAGE_RO -#define __S101 PAGE_RO -#define __S110 PAGE_RW -#define __S111 PAGE_RW +#define __S001 PAGE_READ +#define __S010 PAGE_WRITE +#define __S011 PAGE_WRITE +#define __S100 PAGE_READ +#define __S101 PAGE_READ +#define __S110 PAGE_WRITE +#define __S111 PAGE_WRITE /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) -#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) -#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) - -static inline int mm_exclusive(struct mm_struct *mm) -{ - return likely(mm == current->active_mm && - atomic_read(&mm->context.attach_count) <= 1); -} +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_NONE) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_PROTECT) +#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID) static inline int mm_has_pgste(struct mm_struct *mm) { @@ -474,7 +486,7 @@ static inline int pgd_none(pgd_t pgd) { if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) return 0; - return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; + return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL; } static inline int pgd_bad(pgd_t pgd) @@ -485,7 +497,7 @@ static inline int pgd_bad(pgd_t pgd) * invalid for either table entry. */ unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pgd_val(pgd) & mask) != 0; } @@ -501,7 +513,7 @@ static inline int pud_none(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) return 0; - return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; + return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL; } static inline int pud_large(pud_t pud) @@ -519,7 +531,7 @@ static inline int pud_bad(pud_t pud) * invalid for either table entry. */ unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pud_val(pud) & mask) != 0; } @@ -528,30 +540,36 @@ static inline int pud_bad(pud_t pud) static inline int pmd_present(pmd_t pmd) { - unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO; - return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE || - !(pmd_val(pmd) & _SEGMENT_ENTRY_INV); + return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID; } static inline int pmd_none(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) && - !(pmd_val(pmd) & _SEGMENT_ENTRY_RO); + return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID; } static inline int pmd_large(pmd_t pmd) { #ifdef CONFIG_64BIT - return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; #else return 0; #endif } +static inline int pmd_prot_none(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) && + (pmd_val(pmd) & _SEGMENT_ENTRY_NONE); +} + static inline int pmd_bad(pmd_t pmd) { - unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; - return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; +#ifdef CONFIG_64BIT + if (pmd_large(pmd)) + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; +#endif + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH @@ -570,31 +588,40 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0; + if (pmd_prot_none(pmd)) + return 0; + return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; } static inline int pmd_young(pmd_t pmd) { - return 0; + int young = 0; +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) + young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0; + else + young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; +#endif + return young; } -static inline int pte_none(pte_t pte) +static inline int pte_present(pte_t pte) { - return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); + /* Bit pattern: (pte & 0x001) == 0x001 */ + return (pte_val(pte) & _PAGE_PRESENT) != 0; } -static inline int pte_present(pte_t pte) +static inline int pte_none(pte_t pte) { - unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX; - return (pte_val(pte) & mask) == _PAGE_TYPE_NONE || - (!(pte_val(pte) & _PAGE_INVALID) && - !(pte_val(pte) & _PAGE_SWT)); + /* Bit pattern: pte == 0x400 */ + return pte_val(pte) == _PAGE_INVALID; } static inline int pte_file(pte_t pte) { - unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT; - return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; + /* Bit pattern: (pte & 0x601) == 0x600 */ + return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT)) + == (_PAGE_INVALID | _PAGE_PROTECT); } static inline int pte_special(pte_t pte) @@ -618,12 +645,12 @@ static inline pgste_t pgste_get_lock(pte_t *ptep) asm( " lg %0,%2\n" "0: lgr %1,%0\n" - " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ - " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ " csg %0,%1,%2\n" " jl 0b\n" : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) - : "Q" (ptep[PTRS_PER_PTE]) : "cc"); + : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); #endif return __pgste(new); } @@ -632,44 +659,56 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE asm( - " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " nihh %1,0xff7f\n" /* clear PCL bit */ " stg %1,%0\n" : "=Q" (ptep[PTRS_PER_PTE]) - : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) + : "cc", "memory"); preempt_enable(); #endif } +static inline pgste_t pgste_get(pte_t *ptep) +{ + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); +} + +static inline void pgste_set(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; +#endif +} + static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long address, bits; - unsigned char skey; + unsigned long address, bits, skey; if (pte_val(*ptep) & _PAGE_INVALID) return pgste; address = pte_val(*ptep) & PAGE_MASK; - skey = page_get_storage_key(address); + skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); - /* Clear page changed & referenced bit in the storage key */ - if (bits & _PAGE_CHANGED) + if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) { + /* Transfer dirty + referenced bit to host bits in pgste */ + pgste_val(pgste) |= bits << 52; page_set_storage_key(address, skey ^ bits, 0); - else if (bits) + } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) && + (bits & _PAGE_REFERENCED)) { + /* Transfer referenced bit to host bit in pgste */ + pgste_val(pgste) |= PGSTE_HR_BIT; page_reset_referenced(address); + } /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ - /* Get host changed & referenced bits from pgste */ - bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; - /* Transfer page changed & referenced bit to kvm user bits */ - pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ - /* Clear relevant host bits in pgste. */ - pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); - pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) |= - (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; - /* Transfer referenced bit to pte */ - pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1; + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; #endif return pgste; @@ -678,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - int young; - if (pte_val(*ptep) & _PAGE_INVALID) return pgste; /* Get referenced bit from storage key */ - young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); - if (young) - pgste_val(pgste) |= RCP_GR_BIT; - /* Get host referenced bit from pgste */ - if (pgste_val(pgste) & RCP_HR_BIT) { - pgste_val(pgste) &= ~RCP_HR_BIT; - young = 1; - } - /* Transfer referenced bit to kvm user bits and pte */ - if (young) { - pgste_val(pgste) |= KVM_UR_BIT; - pte_val(*ptep) |= _PAGE_SWR; - } + if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK)) + pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT; #endif return pgste; } @@ -704,29 +730,31 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) { #ifdef CONFIG_PGSTE unsigned long address; - unsigned long okey, nkey; + unsigned long nkey; if (pte_val(entry) & _PAGE_INVALID) return; + VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); address = pte_val(entry) & PAGE_MASK; - okey = nkey = page_get_storage_key(address); - nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); - /* Set page access key and fetch protection bit from pgste */ - nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; - if (okey != nkey) - page_set_storage_key(address, nkey, 0); + /* + * Set page access key and fetch protection bit from pgste. + * The guest C/R information is still in the PGSTE, set real + * key C/R to 0. + */ + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + page_set_storage_key(address, nkey, 0); #endif } static inline void pgste_set_pte(pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) { + if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) { /* * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. */ - pte_val(entry) |= _PAGE_SWC; - pte_val(entry) &= ~_PAGE_RO; + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; } *ptep = entry; } @@ -743,6 +771,7 @@ struct gmap { struct mm_struct *mm; unsigned long *table; unsigned long asce; + void *private; struct list_head crst_list; }; @@ -801,8 +830,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - if (pgste_val(pgste) & RCP_IN_BIT) { - pgste_val(pgste) &= ~RCP_IN_BIT; + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; gmap_do_ipte_notify(mm, addr, ptep); } #endif @@ -837,21 +866,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, */ static inline int pte_write(pte_t pte) { - return (pte_val(pte) & _PAGE_SWW) != 0; + return (pte_val(pte) & _PAGE_WRITE) != 0; } static inline int pte_dirty(pte_t pte) { - return (pte_val(pte) & _PAGE_SWC) != 0; + return (pte_val(pte) & _PAGE_DIRTY) != 0; } static inline int pte_young(pte_t pte) { -#ifdef CONFIG_PGSTE - if (pte_val(pte) & _PAGE_SWR) - return 1; -#endif - return 0; + return (pte_val(pte) & _PAGE_YOUNG) != 0; } /* @@ -876,12 +901,12 @@ static inline void pud_clear(pud_t *pud) static inline void pmd_clear(pmd_t *pmdp) { - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID; } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; } /* @@ -892,55 +917,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) &= _PAGE_CHG_MASK; pte_val(pte) |= pgprot_val(newprot); - if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW)) - pte_val(pte) &= ~_PAGE_RO; + /* + * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the + * invalid bit set, clear it again for readable, young pages + */ + if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ)) + pte_val(pte) &= ~_PAGE_INVALID; + /* + * newprot for PAGE_READ and PAGE_WRITE has the page protection + * bit set, clear it again for writable, dirty pages + */ + if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~_PAGE_SWW; - /* Do not clobber _PAGE_TYPE_NONE pages! */ - if (!(pte_val(pte) & _PAGE_INVALID)) - pte_val(pte) |= _PAGE_RO; + pte_val(pte) &= ~_PAGE_WRITE; + pte_val(pte) |= _PAGE_PROTECT; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= _PAGE_SWW; - if (pte_val(pte) & _PAGE_SWC) - pte_val(pte) &= ~_PAGE_RO; + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_mkclean(pte_t pte) { - pte_val(pte) &= ~_PAGE_SWC; - /* Do not clobber _PAGE_TYPE_NONE pages! */ - if (!(pte_val(pte) & _PAGE_INVALID)) - pte_val(pte) |= _PAGE_RO; + pte_val(pte) &= ~_PAGE_DIRTY; + pte_val(pte) |= _PAGE_PROTECT; return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_SWC; - if (pte_val(pte) & _PAGE_SWW) - pte_val(pte) &= ~_PAGE_RO; + pte_val(pte) |= _PAGE_DIRTY; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_mkold(pte_t pte) { -#ifdef CONFIG_PGSTE - pte_val(pte) &= ~_PAGE_SWR; -#endif + pte_val(pte) &= ~_PAGE_YOUNG; + pte_val(pte) |= _PAGE_INVALID; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { + pte_val(pte) |= _PAGE_YOUNG; + if (pte_val(pte) & _PAGE_READ) + pte_val(pte) &= ~_PAGE_INVALID; return pte; } @@ -953,7 +986,7 @@ static inline pte_t pte_mkspecial(pte_t pte) #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); + pte_val(pte) |= _PAGE_LARGE; return pte; } #endif @@ -970,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_all(ptep, pgste); - dirty = !!(pgste_val(pgste) & KVM_UC_BIT); - pgste_val(pgste) &= ~KVM_UC_BIT; + dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT); + pgste_val(pgste) &= ~PGSTE_HC_BIT; pgste_set_unlock(ptep, pgste); return dirty; } @@ -990,59 +1023,75 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_young(ptep, pgste); - young = !!(pgste_val(pgste) & KVM_UR_BIT); - pgste_val(pgste) &= ~KVM_UR_BIT; + young = !!(pgste_val(pgste) & PGSTE_HR_BIT); + pgste_val(pgste) &= ~PGSTE_HR_BIT; pgste_set_unlock(ptep, pgste); } return young; } +static inline void __ptep_ipte(unsigned long address, pte_t *ptep) +{ + if (!(pte_val(*ptep) & _PAGE_INVALID)) { +#ifndef CONFIG_64BIT + /* pto must point to the start of the segment table */ + pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); +#else + /* ipte in zarch mode can do the math */ + pte_t *pto = ptep; +#endif + asm volatile( + " ipte %2,%3" + : "=m" (*ptep) : "m" (*ptep), + "a" (pto), "a" (address)); + } +} + +static inline void ptep_flush_lazy(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + int active = (mm == current->active_mm) ? 1 : 0; + + if (atomic_read(&mm->context.attach_count) > active) + __ptep_ipte(address, ptep); + else + mm->context.flush_mm = 1; +} + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pgste_t pgste; pte_t pte; + int young; if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_update_young(ptep, pgste); - pte = *ptep; - *ptep = pte_mkold(pte); - pgste_set_unlock(ptep, pgste); - return pte_young(pte); + pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); } - return 0; + + pte = *ptep; + __ptep_ipte(addr, ptep); + young = pte_young(pte); + pte = pte_mkold(pte); + + if (mm_has_pgste(vma->vm_mm)) { + pgste_set_pte(ptep, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; + + return young; } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH static inline int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - /* No need to flush TLB - * On s390 reference bits are in storage key and never in TLB - * With virtualization we handle the reference bit, without we - * we can simply return */ return ptep_test_and_clear_young(vma, address, ptep); } -static inline void __ptep_ipte(unsigned long address, pte_t *ptep) -{ - if (!(pte_val(*ptep) & _PAGE_INVALID)) { -#ifndef CONFIG_64BIT - /* pto must point to the start of the segment table */ - pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); -#else - /* ipte in zarch mode can do the math */ - pte_t *pto = ptep; -#endif - asm volatile( - " ipte %2,%3" - : "=m" (*ptep) : "m" (*ptep), - "a" (pto), "a" (address)); - } -} - /* * This is hard to understand. ptep_get_and_clear and ptep_clear_flush * both clear the TLB for the unmapped pte. The reason is that @@ -1063,16 +1112,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pgste_t pgste; pte_t pte; - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1089,18 +1136,19 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, pgste_t pgste; pte_t pte; - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) |= _PAGE_INVALID; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); + pgste_set(ptep, pgste); + } return pte; } @@ -1111,7 +1159,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, pgste_t pgste; if (mm_has_pgste(mm)) { - pgste = *(pgste_t *)(ptep + PTRS_PER_PTE); + pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte); pgste_set_pte(ptep, pte); pgste_set_unlock(ptep, pgste); @@ -1133,7 +1181,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pte = *ptep; __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1157,18 +1205,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pgste_t pgste; pte_t pte; - if (mm_has_pgste(mm)) { + if (!full && mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - if (!full) - pgste = pgste_ipte_notify(mm, address, ptep, pgste); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; if (!full) - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; - if (mm_has_pgste(mm)) { + if (!full && mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); pgste_set_unlock(ptep, pgste); } @@ -1183,14 +1230,12 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, pte_t pte = *ptep; if (pte_write(pte)) { - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_ipte_notify(mm, address, ptep, pgste); } - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); + ptep_flush_lazy(mm, address, ptep); pte = pte_wrprotect(pte); if (mm_has_pgste(mm)) { @@ -1234,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); - return __pte; + return pte_mkyoung(__pte); } static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) @@ -1242,10 +1287,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) unsigned long physpage = page_to_phys(page); pte_t __pte = mk_pte_phys(physpage, pgprot); - if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) { - pte_val(__pte) |= _PAGE_SWC; - pte_val(__pte) &= ~_PAGE_RO; - } + if (pte_write(__pte) && PageDirty(page)) + __pte = pte_mkdirty(__pte); return __pte; } @@ -1307,7 +1350,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) unsigned long sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); - if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { + if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) { asm volatile( " .insn rrf,0xb98e0000,%2,%3,0,0" : "=m" (*pmdp) @@ -1318,24 +1361,68 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) } } +static inline void __pmd_csp(pmd_t *pmdp) +{ + register unsigned long reg2 asm("2") = pmd_val(*pmdp); + register unsigned long reg3 asm("3") = pmd_val(*pmdp) | + _SEGMENT_ENTRY_INVALID; + register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; + + asm volatile( + " csp %1,%3" + : "=m" (*pmdp) + : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); +} + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) { /* - * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx) + * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) * Convert to segment table entry format. */ if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) return pgprot_val(SEGMENT_NONE); - if (pgprot_val(pgprot) == pgprot_val(PAGE_RO)) - return pgprot_val(SEGMENT_RO); - return pgprot_val(SEGMENT_RW); + if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) + return pgprot_val(SEGMENT_READ); + return pgprot_val(SEGMENT_WRITE); +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { + int young; + + young = pmd_young(pmd); pmd_val(pmd) &= _SEGMENT_CHG_MASK; pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (young) + pmd = pmd_mkyoung(pmd); return pmd; } @@ -1343,25 +1430,37 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { pmd_t __pmd; pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; + return pmd_mkyoung(__pmd); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - /* Do not clobber _HPAGE_TYPE_NONE pages! */ - if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV)) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; return pmd; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ +static inline void pmdp_flush_lazy(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + int active = (mm == current->active_mm) ? 1 : 0; + + if ((atomic_read(&mm->context.attach_count) & 0xffff) > active) + __pmd_idte(address, pmdp); + else + mm->context.flush_mm = 1; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { @@ -1371,7 +1470,7 @@ static inline int pmd_trans_splitting(pmd_t pmd) static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1) pmd_val(entry) |= _SEGMENT_ENTRY_CO; *pmdp = entry; } @@ -1384,7 +1483,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_RO; + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1394,50 +1495,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } -static inline pmd_t pmd_mkold(pmd_t pmd) -{ - /* No referenced bit in the segment table entry. */ - return pmd; -} - -static inline pmd_t pmd_mkyoung(pmd_t pmd) -{ - /* No referenced bit in the segment table entry. */ - return pmd; -} - #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK; - long tmp, rc; - int counter; + pmd_t pmd; - rc = 0; - if (MACHINE_HAS_RRBM) { - counter = PTRS_PER_PTE >> 6; - asm volatile( - "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */ - " ogr %1,%0\n" - " la %3,0(%4,%3)\n" - " brct %2,0b\n" - : "=&d" (tmp), "+&d" (rc), "+d" (counter), - "+a" (pmd_addr) - : "a" (64 * 4096UL) : "cc"); - rc = !!rc; - } else { - counter = PTRS_PER_PTE; - asm volatile( - "0: rrbe 0,%2\n" - " la %2,0(%3,%2)\n" - " brc 12,1f\n" - " lhi %0,1\n" - "1: brct %1,0b\n" - : "+d" (rc), "+d" (counter), "+a" (pmd_addr) - : "a" (4096UL) : "cc"); - } - return rc; + pmd = *pmdp; + __pmd_idte(address, pmdp); + *pmdp = pmd_mkold(pmd); + return pmd_young(pmd); } #define __HAVE_ARCH_PMDP_GET_AND_CLEAR @@ -1503,10 +1570,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd) * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bit 21 and bit 22 are the page invalid bit and the page protection - * bit. We set both to indicate a swapped page. - * Bit 30 and 31 are used to distinguish the different page types. For - * a swapped page these bits need to be zero. + * Bits 21, 22, 30 and 31 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 * This leaves the bits 1-19 and bits 24-29 to store type and offset. * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 * plus 24 for the offset. @@ -1520,10 +1585,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd) * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bit 53 and bit 54 are the page invalid bit and the page protection - * bit. We set both to indicate a swapped page. - * Bit 62 and 63 are used to distinguish the different page types. For - * a swapped page these bits need to be zero. + * Bits 53, 54, 62 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 * This leaves the bits 0-51 and bits 56-61 to store type and offset. * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 * plus 56 for the offset. @@ -1540,7 +1603,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | + pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); return pte; } @@ -1563,7 +1626,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define pgoff_to_pte(__off) \ ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ - | _PAGE_TYPE_FILE }) + | _PAGE_INVALID | _PAGE_PROTECT }) #endif /* !__ASSEMBLY__ */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 6b499870662f..ca7821f07260 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -43,6 +43,7 @@ extern void execve_tail(void); #ifndef CONFIG_64BIT #define TASK_SIZE (1UL << 31) +#define TASK_MAX_SIZE (1UL << 31) #define TASK_UNMAPPED_BASE (1UL << 30) #else /* CONFIG_64BIT */ @@ -51,6 +52,7 @@ extern void execve_tail(void); #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) +#define TASK_MAX_SIZE (1UL << 53) #endif /* CONFIG_64BIT */ @@ -91,7 +93,15 @@ struct thread_struct { #endif }; -#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */ +/* Flag to disable transactions. */ +#define PER_FLAG_NO_TE 1UL +/* Flag to enable random transaction aborts. */ +#define PER_FLAG_TE_ABORT_RAND 2UL +/* Flag to specify random transaction abort mode: + * - abort each transaction at a random instruction before TEND if set. + * - abort random transactions at a random instruction if cleared. + */ +#define PER_FLAG_TE_ABORT_RAND_TEND 4UL typedef struct thread_struct thread_struct; @@ -188,6 +198,8 @@ static inline void cpu_relax(void) barrier(); } +#define arch_mutex_cpu_relax() barrier() + static inline void psw_set_key(unsigned int key) { asm volatile("spka 0(%0)" : : "d" (key)); diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 559512a455da..52b56533c57c 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -24,6 +24,7 @@ struct pt_regs unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; unsigned int int_code; + unsigned int int_parm; unsigned long int_parm_long; }; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 06a136136047..7dc7f9c63b65 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -56,5 +56,6 @@ bool sclp_has_linemode(void); bool sclp_has_vt220(void); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); +int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h new file mode 100644 index 000000000000..5b3e48ef534b --- /dev/null +++ b/arch/s390/include/asm/serial.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_SERIAL_H +#define _ASM_S390_SERIAL_H + +#define BASE_BAUD 0 + +#endif /* _ASM_S390_SERIAL_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 701fe8c59e1f..83e5d216105e 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); extern int arch_spin_trylock_retry(arch_spinlock_t *); extern void arch_spin_relax(arch_spinlock_t *lock); +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.owner_cpu == 0; +} + static inline void arch_spin_lock(arch_spinlock_t *lp) { int old; diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index f3a9e0f92704..6dbd559763c9 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -8,9 +8,10 @@ #define __ASM_SWITCH_TO_H #include <linux/thread_info.h> +#include <asm/ptrace.h> extern struct task_struct *__switch_to(void *, void *); -extern void update_per_regs(struct task_struct *task); +extern void update_cr_regs(struct task_struct *task); static inline void save_fp_regs(s390_fp_regs *fpregs) { @@ -68,12 +69,16 @@ static inline void restore_fp_regs(s390_fp_regs *fpregs) static inline void save_access_regs(unsigned int *acrs) { - asm volatile("stam 0,15,%0" : "=Q" (*acrs)); + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs)); } static inline void restore_access_regs(unsigned int *acrs) { - asm volatile("lam 0,15,%0" : : "Q" (*acrs)); + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs)); } #define switch_to(prev,next,last) do { \ @@ -86,7 +91,7 @@ static inline void restore_access_regs(unsigned int *acrs) restore_fp_regs(&next->thread.fp_regs); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - update_per_regs(next); \ + update_cr_regs(next); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index b75d7d686684..2cb846c4b37f 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -32,6 +32,7 @@ struct mmu_gather { struct mm_struct *mm; struct mmu_table_batch *batch; unsigned int fullmm; + unsigned long start, end; }; struct mmu_table_batch { @@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned int full_mm_flush) + unsigned long start, + unsigned long end) { tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); tlb->batch = NULL; if (tlb->fullmm) __tlb_flush_mm(mm); @@ -59,13 +63,14 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb, static inline void tlb_flush_mmu(struct mmu_gather *tlb) { + __tlb_flush_mm_lazy(tlb->mm); tlb_table_flush(tlb); } static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - tlb_table_flush(tlb); + tlb_flush_mmu(tlb); } /* diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 6b32af30878c..f9fef0425fee 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -86,7 +86,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) __tlb_flush_full(mm); } -static inline void __tlb_flush_mm_cond(struct mm_struct * mm) +static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { if (mm->context.flush_mm) { __tlb_flush_mm(mm); @@ -118,13 +118,13 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm) static inline void flush_tlb_mm(struct mm_struct *mm) { - __tlb_flush_mm_cond(mm); + __tlb_flush_mm_lazy(mm); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __tlb_flush_mm_cond(vma->vm_mm); + __tlb_flush_mm_lazy(vma->vm_mm); } static inline void flush_tlb_kernel_range(unsigned long start, diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h new file mode 100644 index 000000000000..af9896c53eb3 --- /dev/null +++ b/arch/s390/include/asm/vtime.h @@ -0,0 +1,7 @@ +#ifndef _S390_VTIME_H +#define _S390_VTIME_H + +#define __ARCH_HAS_VTIME_ACCOUNT +#define __ARCH_HAS_VTIME_TASK_SWITCH + +#endif /* _S390_VTIME_H */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 9ccd1905bdad..6a9a9eb645f5 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -35,6 +35,7 @@ header-y += siginfo.h header-y += signal.h header-y += socket.h header-y += sockios.h +header-y += sclp_ctl.h header-y += stat.h header-y += statfs.h header-y += swab.h diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index 1c6a7f85a581..65dc694725a8 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -29,6 +29,16 @@ struct chsc_async_area { __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; } __attribute__ ((packed)); +struct chsc_header { + __u16 length; + __u16 code; +} __attribute__ ((packed)); + +struct chsc_sync_area { + struct chsc_header header; + __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; +} __attribute__ ((packed)); + struct chsc_response_struct { __u16 length; __u16 code; @@ -126,5 +136,8 @@ struct chsc_cpd_info { #define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list) #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) +#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area) +#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area) +#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b) #endif diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 38eca3ba40e2..5812a3b2df9e 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -261,6 +261,10 @@ struct dasd_snid_ioctl_data { #define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) /* Resume IO on device */ #define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) +/* Abort all I/O on a device */ +#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240) +/* Allow I/O on a device */ +#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241) /* retrieve API version number */ diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 3aa9f1ec5b29..7a84619e315e 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -400,6 +400,7 @@ typedef struct #define PTRACE_POKE_SYSTEM_CALL 0x5008 #define PTRACE_ENABLE_TE 0x5009 #define PTRACE_DISABLE_TE 0x5010 +#define PTRACE_TE_ABORT_RAND 0x5011 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h new file mode 100644 index 000000000000..f2818613ee41 --- /dev/null +++ b/arch/s390/include/uapi/asm/sclp_ctl.h @@ -0,0 +1,24 @@ +/* + * IOCTL interface for SCLP + * + * Copyright IBM Corp. 2012 + * + * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#ifndef _ASM_SCLP_CTL_H +#define _ASM_SCLP_CTL_H + +#include <linux/types.h> + +struct sclp_ctl_sccb { + __u32 cmdw; + __u64 sccb; +} __attribute__((packed)); + +#define SCLP_CTL_IOCTL_MAGIC 0x10 + +#define SCLP_CTL_SCCB \ + _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb) + +#endif diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 2dacb306835c..92494494692e 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_BUSY_POLL 46 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 7a82f9f70100..2416138ebd3e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -7,6 +7,7 @@ #define ASM_OFFSETS_C #include <linux/kbuild.h> +#include <linux/kvm_host.h> #include <linux/sched.h> #include <asm/cputime.h> #include <asm/vdso.h> @@ -47,6 +48,7 @@ int main(void) DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -161,6 +163,8 @@ int main(void) DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); + DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); + DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); #endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 64b24650e4f8..dd62071624be 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -173,7 +173,7 @@ error: } } -static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu) +static struct cache_dir *cache_create_cache_dir(int cpu) { struct cache_dir *cache_dir; struct kobject *kobj = NULL; @@ -289,9 +289,8 @@ static struct kobj_type cache_index_type = { .default_attrs = cache_index_default_attrs, }; -static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir, - struct cache *cache, int index, - int cpu) +static int cache_create_index_dir(struct cache_dir *cache_dir, + struct cache *cache, int index, int cpu) { struct cache_index_dir *index_dir; int rc; @@ -313,7 +312,7 @@ out: return rc; } -static int __cpuinit cache_add_cpu(int cpu) +static int cache_add_cpu(int cpu) { struct cache_dir *cache_dir; struct cache *cache; @@ -335,7 +334,7 @@ static int __cpuinit cache_add_cpu(int cpu) return 0; } -static void __cpuinit cache_remove_cpu(int cpu) +static void cache_remove_cpu(int cpu) { struct cache_index_dir *index, *next; struct cache_dir *cache_dir; @@ -354,8 +353,8 @@ static void __cpuinit cache_remove_cpu(int cpu) cache_dir_cpu[cpu] = NULL; } -static int __cpuinit cache_hotplug(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int cache_hotplug(struct notifier_block *nfb, unsigned long action, + void *hcpu) { int cpu = (long)hcpu; int rc = 0; diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 8b6e4f5288a2..1f1b8c70ab97 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -221,25 +221,26 @@ static int groups16_from_user(struct group_info *group_info, u16 __user *groupli asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) { + const struct cred *cred = current_cred(); int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; + get_group_info(cred->group_info); + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->cred->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->cred->group_info); + put_group_info(cred->group_info); return i; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index c439ac9ced09..1389b637dae5 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -332,9 +332,9 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; + regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; + regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, (u16 __force __user *)(frame->retcode))) goto give_sigsegv; @@ -400,9 +400,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; + regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; + regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, (u16 __force __user *)(frame->retcode)); } @@ -417,7 +417,7 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, regs->psw.mask = PSW_MASK_BA | (psw_user_bits & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->psw.addr = (__u64 __force) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (__force __u64) &frame->info; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f703d91bf720..c84f33d51f7b 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -16,38 +16,187 @@ #include <asm/os_info.h> #include <asm/elf.h> #include <asm/ipl.h> +#include <asm/sclp.h> #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + /* - * Copy one page from "oldmem" + * Return physical address for virtual address + */ +static inline void *load_real_addr(void *addr) +{ + unsigned long real_addr; + + asm volatile( + " lra %0,0(%1)\n" + " jz 0f\n" + " la %0,0\n" + "0:" + : "=a" (real_addr) : "a" (addr) : "cc"); + return (void *)real_addr; +} + +/* + * Copy up to one page to vmalloc or real memory + */ +static ssize_t copy_page_real(void *buf, void *src, size_t csize) +{ + size_t size; + + if (is_vmalloc_addr(buf)) { + BUG_ON(csize >= PAGE_SIZE); + /* If buf is not page aligned, copy first part */ + size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize); + if (size) { + if (memcpy_real(load_real_addr(buf), src, size)) + return -EFAULT; + buf += size; + src += size; + } + /* Copy second part */ + size = csize - size; + return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0; + } else { + return memcpy_real(buf, src, csize); + } +} + +/* + * Pointer to ELF header in new kernel + */ +static void *elfcorehdr_newmem; + +/* + * Copy one page from zfcpdump "oldmem" + * + * For pages below ZFCPDUMP_HSA_SIZE memory from the HSA is copied. Otherwise + * real memory copy is used. + */ +static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, + unsigned long src, int userbuf) +{ + int rc; + + if (src < ZFCPDUMP_HSA_SIZE) { + rc = memcpy_hsa(buf, src, csize, userbuf); + } else { + if (userbuf) + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); + else + rc = memcpy_real(buf, (void *) src, csize); + } + return rc ? rc : csize; +} + +/* + * Copy one page from kdump "oldmem" * * For the kdump reserved memory this functions performs a swap operation: * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) -{ - unsigned long src; +static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, + unsigned long src, int userbuf) - if (!csize) - return 0; +{ + int rc; - src = (pfn << PAGE_SHIFT) + offset; if (src < OLDMEM_SIZE) src += OLDMEM_BASE; else if (src > OLDMEM_BASE && src < OLDMEM_BASE + OLDMEM_SIZE) src -= OLDMEM_BASE; if (userbuf) - copy_to_user_real((void __force __user *) buf, (void *) src, - csize); + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); + else + rc = copy_page_real(buf, (void *) src, csize); + return (rc == 0) ? rc : csize; +} + +/* + * Copy one page from "oldmem" + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, + unsigned long offset, int userbuf) +{ + unsigned long src; + + if (!csize) + return 0; + src = (pfn << PAGE_SHIFT) + offset; + if (OLDMEM_BASE) + return copy_oldmem_page_kdump(buf, csize, src, userbuf); + else + return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); +} + +/* + * Remap "oldmem" for kdump + * + * For the kdump reserved memory this functions performs a swap operation: + * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + unsigned long size_old; + int rc; + + if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { + size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); + rc = remap_pfn_range(vma, from, + pfn + (OLDMEM_BASE >> PAGE_SHIFT), + size_old, prot); + if (rc || size == size_old) + return rc; + size -= size_old; + from += size_old; + pfn += size_old >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Remap "oldmem" for zfcpdump + * + * We only map available memory above ZFCPDUMP_HSA_SIZE. Memory below + * ZFCPDUMP_HSA_SIZE is read on demand using the copy_oldmem_page() function. + */ +static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, + unsigned long from, + unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + unsigned long size_hsa; + + if (pfn < ZFCPDUMP_HSA_SIZE >> PAGE_SHIFT) { + size_hsa = min(size, ZFCPDUMP_HSA_SIZE - (pfn << PAGE_SHIFT)); + if (size == size_hsa) + return 0; + size -= size_hsa; + from += size_hsa; + pfn += size_hsa >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Remap "oldmem" for kdump or zfcpdump + */ +int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + if (OLDMEM_BASE) + return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); else - memcpy_real(buf, (void *) src, csize); - return csize; + return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, + prot); } /* @@ -58,11 +207,21 @@ int copy_from_oldmem(void *dest, void *src, size_t count) unsigned long copied = 0; int rc; - if ((unsigned long) src < OLDMEM_SIZE) { - copied = min(count, OLDMEM_SIZE - (unsigned long) src); - rc = memcpy_real(dest, src + OLDMEM_BASE, copied); - if (rc) - return rc; + if (OLDMEM_BASE) { + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + } else { + if ((unsigned long) src < ZFCPDUMP_HSA_SIZE) { + copied = min(count, + ZFCPDUMP_HSA_SIZE - (unsigned long) src); + rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); + if (rc) + return rc; + } } return memcpy_real(dest + copied, src + copied, count - copied); } @@ -325,14 +484,6 @@ static int get_mem_chunk_cnt(void) } /* - * Relocate pointer in order to allow vmcore code access the data - */ -static inline unsigned long relocate(unsigned long addr) -{ - return OLDMEM_BASE + addr; -} - -/* * Initialize ELF loads (new kernel) */ static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) @@ -383,7 +534,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) ptr = nt_vmcoreinfo(ptr); memset(phdr, 0, sizeof(*phdr)); phdr->p_type = PT_NOTE; - phdr->p_offset = relocate(notes_offset); + phdr->p_offset = notes_offset; phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); phdr->p_memsz = phdr->p_filesz; return ptr; @@ -392,7 +543,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) /* * Create ELF core header (new kernel) */ -static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) +int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) { Elf64_Phdr *phdr_notes, *phdr_loads; int mem_chunk_cnt; @@ -400,6 +551,12 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) u32 alloc_size; u64 hdr_off; + /* If we are not in kdump or zfcpdump mode return */ + if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) + return 0; + /* If elfcorehdr= has been passed via cmdline, we use that one */ + if (elfcorehdr_addr != ELFCORE_ADDR_MAX) + return 0; mem_chunk_cnt = get_mem_chunk_cnt(); alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + @@ -417,27 +574,52 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); /* Init loads */ hdr_off = PTR_DIFF(ptr, hdr); - loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); - *elfcorebuf_sz = hdr_off; - *elfcorebuf = (void *) relocate((unsigned long) hdr); - BUG_ON(*elfcorebuf_sz > alloc_size); + loads_init(phdr_loads, hdr_off); + *addr = (unsigned long long) hdr; + elfcorehdr_newmem = hdr; + *size = (unsigned long long) hdr_off; + BUG_ON(elfcorehdr_size > alloc_size); + return 0; } /* - * Create kdump ELF core header in new kernel, if it has not been passed via - * the "elfcorehdr" kernel parameter + * Free ELF core header (new kernel) */ -static int setup_kdump_elfcorehdr(void) +void elfcorehdr_free(unsigned long long addr) { - size_t elfcorebuf_sz; - char *elfcorebuf; + if (!elfcorehdr_newmem) + return; + kfree((void *)(unsigned long)addr); +} - if (!OLDMEM_BASE || is_kdump_kernel()) - return -EINVAL; - s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); - elfcorehdr_addr = (unsigned long long) elfcorebuf; - elfcorehdr_size = elfcorebuf_sz; - return 0; +/* + * Read from ELF header + */ +ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + + src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; + memcpy(buf, src, count); + *ppos += count; + return count; } -subsys_initcall(setup_kdump_elfcorehdr); +/* + * Read from ELF notes data + */ +ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + int rc; + + if (elfcorehdr_newmem) { + memcpy(buf, src, count); + } else { + rc = copy_from_oldmem(buf, src, count); + if (rc) + return rc; + } + *ppos += count; + return count; +} diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 298297477257..99e7f6035895 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -40,14 +40,15 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) { struct stack_frame *sf; struct pt_regs *regs; + unsigned long addr; while (1) { sp = sp & PSW_ADDR_INSN; if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); + addr = sf->gprs[8] & PSW_ADDR_INSN; + printk("([<%016lx>] %pSR)\n", addr, (void *)addr); /* Follow the backchain. */ while (1) { low = sp; @@ -57,16 +58,16 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); + addr = sf->gprs[8] & PSW_ADDR_INSN; + printk(" [<%016lx>] %pSR\n", addr, (void *)addr); } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); if (sp <= low || sp > high - sizeof(*regs)) return sp; regs = (struct pt_regs *) sp; - printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); - print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); + addr = regs->psw.addr & PSW_ADDR_INSN; + printk(" [<%016lx>] %pSR\n", addr, (void *)addr); low = sp; sp = regs->gprs[15]; } @@ -74,6 +75,8 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) static void show_trace(struct task_struct *task, unsigned long *stack) { + const unsigned long frame_size = + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); register unsigned long __r15 asm ("15"); unsigned long sp; @@ -82,11 +85,13 @@ static void show_trace(struct task_struct *task, unsigned long *stack) sp = task ? task->thread.ksp : __r15; printk("Call Trace:\n"); #ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, - S390_lowcore.panic_stack); + sp = __show_trace(sp, + S390_lowcore.panic_stack + frame_size - 4096, + S390_lowcore.panic_stack + frame_size); #endif - sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); + sp = __show_trace(sp, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); if (task) __show_trace(sp, (unsigned long) task_stack_page(task), (unsigned long) task_stack_page(task) + THREAD_SIZE); @@ -124,8 +129,7 @@ static void show_last_breaking_event(struct pt_regs *regs) { #ifdef CONFIG_64BIT printk("Last Breaking-Event-Address:\n"); - printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); - print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); + printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); #endif } @@ -139,10 +143,10 @@ void show_registers(struct pt_regs *regs) char *mode; mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %p %p", + printk("%s PSW : %p %p (%pSR)\n", mode, (void *) regs->psw.mask, + (void *) regs->psw.addr, (void *) regs->psw.addr); - print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4d5e6f8a7978..cc30d1fb000c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -18,6 +18,7 @@ #include <asm/unistd.h> #include <asm/page.h> #include <asm/sigp.h> +#include <asm/irq.h> __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 4 @@ -429,11 +430,24 @@ io_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID TRACE_IRQS_OFF xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) +io_loop: l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lhi %r3,THIN_INTERRUPT +io_call: basr %r14,%r1 # call do_IRQ + tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -573,12 +587,13 @@ ext_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS TRACE_IRQS_OFF + l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs - l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code - l %r4,__LC_EXT_PARAMS # get external parameters - l %r1,BASED(.Ldo_extint) - basr %r14,%r1 # call do_extint + lhi %r3,EXT_INTERRUPT + basr %r14,%r1 # call do_IRQ j io_return /* @@ -871,13 +886,13 @@ cleanup_idle: stm %r9,%r10,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) # prepare return psw - n %r8,BASED(cleanup_idle_wait) # clear wait state bit + n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits l %r9,24(%r11) # return from psw_idle br %r14 cleanup_idle_insn: .long psw_idle_lpsw + 0x80000000 cleanup_idle_wait: - .long 0xfffdffff + .long 0xfcfdffff /* * Integer constants @@ -894,7 +909,6 @@ cleanup_idle_wait: .Ldo_machine_check: .long s390_do_machine_check .Lhandle_mcck: .long s390_handle_mcck .Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint .Ldo_signal: .long do_signal .Ldo_notify_resume: .long do_notify_resume .Ldo_per_trap: .long do_per_trap diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index aa0ab02e9595..e9b04c33d383 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -53,27 +53,21 @@ void handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); void do_notify_resume(struct pt_regs *regs); -struct ext_code; -void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long); +void __init init_IRQ(void); +void do_IRQ(struct pt_regs *regs, int irq); void do_restart(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); - +int setup_profiling_timer(unsigned int multiplier); void __init time_init(void); +int pfn_is_nosave(unsigned long); +void s390_early_resume(void); +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; -long sys_mmap2(struct s390_mmap_arg_struct __user *arg); -long sys_s390_ipc(uint call, int first, unsigned long second, - unsigned long third, void __user *ptr); -long sys_s390_personality(unsigned int personality); -long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low, - size_t len, int advice); -long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); -long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, - u32 len_low); long sys_sigreturn(void); long sys_rt_sigreturn(void); long sys32_sigreturn(void); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 4c17eece707e..2b2188b97c6a 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -19,6 +19,7 @@ #include <asm/unistd.h> #include <asm/page.h> #include <asm/sigp.h> +#include <asm/irq.h> __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -47,7 +48,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -81,23 +81,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #endif .endm - .macro HANDLE_SIE_INTERCEPT scratch,pgmcheck + .macro HANDLE_SIE_INTERCEPT scratch,reason #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) tmhh %r8,0x0001 # interrupting from user ? - jnz .+42 + jnz .+62 lgr \scratch,%r9 - slg \scratch,BASED(.Lsie_loop) - clg \scratch,BASED(.Lsie_length) - .if \pgmcheck + slg \scratch,BASED(.Lsie_critical) + clg \scratch,BASED(.Lsie_critical_length) + .if \reason==1 # Some program interrupts are suppressing (e.g. protection). # We must also check the instruction after SIE in that case. # do_protection_exception will rewind to rewind_pad - jh .+22 + jh .+42 .else - jhe .+22 + jhe .+42 .endif - lg %r9,BASED(.Lsie_loop) - LPP BASED(.Lhost_id) # set host id + lg %r14,__SF_EMPTY(%r15) # get control block pointer + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit + mvi __SF_EMPTY+31(%r15),\reason # set exit reason #endif .endm @@ -450,7 +454,7 @@ ENTRY(io_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_IO_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,2 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT tmhh %r8,0x0001 # interrupting from user? jz io_skip @@ -460,10 +464,23 @@ io_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +io_loop: lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lghi %r3,THIN_INTERRUPT +io_call: brasl %r14,do_IRQ + tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -595,7 +612,7 @@ ENTRY(ext_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_EXT_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,3 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT tmhh %r8,0x0001 # interrupting from user ? jz ext_skip @@ -605,14 +622,15 @@ ext_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + lghi %r1,__LC_EXT_PARAMS2 + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r1,4096 lgr %r2,%r11 # pass pointer to pt_regs - llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code - llgf %r4,__LC_EXT_PARAMS # get external parameter - lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter - brasl %r14,do_extint + lghi %r3,EXT_INTERRUPT + brasl %r14,do_IRQ j io_return /* @@ -643,7 +661,7 @@ ENTRY(mcck_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_MCK_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,4 tm __LC_MCCK_CODE,0x80 # system damage? jo mcck_panic # yes -> rest of mcck code invalid lghi %r14,__LC_CPU_TIMER_SAVE_AREA @@ -911,7 +929,7 @@ cleanup_idle: stg %r9,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) # prepare return psw - nihh %r8,0xfffd # clear wait state bit + nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle br %r14 cleanup_idle_insn: @@ -937,56 +955,50 @@ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area - xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 + xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason lmg %r0,%r13,0(%r3) # load guest gprs 0-13 -# some program checks are suppressing. C code (e.g. do_protection_exception) -# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other -# instructions in the sie_loop should not cause program interrupts. So -# lets use a nop (47 00 00 00) as a landing pad. -# See also HANDLE_SIE_INTERCEPT -rewind_pad: - nop 0 -sie_loop: - lg %r14,__LC_THREAD_INFO # pointer thread_info struct - tm __TI_flags+7(%r14),_TIF_EXIT_SIE - jnz sie_exit lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz sie_gmap lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce sie_gmap: lg %r14,__SF_EMPTY(%r15) # get control block pointer + oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now + tm __SIE_PROG20+3(%r14),1 # last exit... + jnz sie_done LPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) sie_done: LPP __SF_EMPTY+16(%r15) # set host id - lg %r14,__LC_THREAD_INFO # pointer thread_info struct -sie_exit: + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce +# some program checks are suppressing. C code (e.g. do_protection_exception) +# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other +# instructions beween sie64a and sie_done should not cause program +# interrupts. So lets use a nop (47 00 00 00) as a landing pad. +# See also HANDLE_SIE_INTERCEPT +rewind_pad: + nop 0 + .globl sie_exit +sie_exit: lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lghi %r2,0 + lg %r2,__SF_EMPTY+24(%r15) # return exit reason code br %r14 sie_fault: - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - lg %r14,__LC_THREAD_INFO # pointer thread_info struct - lg %r14,__SF_EMPTY+8(%r15) # load guest register save area - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 - lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lghi %r2,-EFAULT - br %r14 + lghi %r14,-EFAULT + stg %r14,__SF_EMPTY+24(%r15) # set exit reason code + j sie_exit .align 8 -.Lsie_loop: - .quad sie_loop -.Lsie_length: - .quad sie_done - sie_loop -.Lhost_id: - .quad 0 +.Lsie_critical: + .quad sie_gmap +.Lsie_critical_length: + .quad sie_done - sie_gmap EX_TABLE(rewind_pad,sie_fault) - EX_TABLE(sie_loop,sie_fault) + EX_TABLE(sie_exit,sie_fault) #endif .section .rodata, "a" diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index e3043aef87a9..1014ad5f7693 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -15,6 +15,7 @@ #include <linux/kprobes.h> #include <trace/syscall.h> #include <asm/asm-offsets.h> +#include "entry.h" #ifdef CONFIG_DYNAMIC_FTRACE @@ -177,7 +178,7 @@ int ftrace_enable_ftrace_graph_caller(void) offset = ((void *) prepare_ftrace_return - (void *) ftrace_graph_caller) / 2; - return probe_kernel_write(ftrace_graph_caller + 2, + return probe_kernel_write((void *) ftrace_graph_caller + 2, &offset, sizeof(offset)); } @@ -185,7 +186,7 @@ int ftrace_disable_ftrace_graph_caller(void) { static unsigned short offset = 0x0002; - return probe_kernel_write(ftrace_graph_caller + 2, + return probe_kernel_write((void *) ftrace_graph_caller + 2, &offset, sizeof(offset)); } diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index d8a6a385d048..feb719d3c851 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -754,9 +754,9 @@ static struct bin_attribute sys_reipl_fcp_scp_data_attr = { .write = reipl_fcp_scpdata_write, }; -DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", reipl_block_fcp->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", reipl_block_fcp->ipl_info.fcp.lun); DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", reipl_block_fcp->ipl_info.fcp.bootprog); @@ -1323,9 +1323,9 @@ static struct shutdown_action __refdata reipl_action = { /* FCP dump device attributes */ -DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", dump_block_fcp->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", dump_block_fcp->ipl_info.fcp.lun); DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", dump_block_fcp->ipl_info.fcp.bootprog); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f7fb58903f6a..8ac2097f13d4 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -22,6 +22,7 @@ #include <asm/cputime.h> #include <asm/lowcore.h> #include <asm/irq.h> +#include <asm/hw_irq.h> #include "entry.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); @@ -42,9 +43,10 @@ struct irq_class { * Since the external and I/O interrupt fields are already sums we would end * up with having a sum which accounts each interrupt twice. */ -static const struct irq_class irqclass_main_desc[NR_IRQS] = { - [EXTERNAL_INTERRUPT] = {.name = "EXT"}, - [IO_INTERRUPT] = {.name = "I/O"} +static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { + [EXT_INTERRUPT] = {.name = "EXT"}, + [IO_INTERRUPT] = {.name = "I/O"}, + [THIN_INTERRUPT] = {.name = "AIO"}, }; /* @@ -86,6 +88,28 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, }; +void __init init_IRQ(void) +{ + irq_reserve_irqs(0, THIN_INTERRUPT); + init_cio_interrupts(); + init_airq_interrupts(); + init_ext_interrupts(); +} + +void do_IRQ(struct pt_regs *regs, int irq) +{ + struct pt_regs *old_regs; + + old_regs = set_irq_regs(regs); + irq_enter(); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + /* Serve timer interrupts first. */ + clock_comparator_work(); + generic_handle_irq(irq); + irq_exit(); + set_irq_regs(old_regs); +} + /* * show_interrupts is needed by /proc/interrupts. */ @@ -100,27 +124,36 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(cpu) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); + goto out; } if (irq < NR_IRQS) { + if (irq >= NR_IRQS_BASE) + goto out; seq_printf(p, "%s: ", irqclass_main_desc[irq].name); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]); + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_putc(p, '\n'); - goto skip_arch_irqs; + goto out; } for (irq = 0; irq < NR_ARCH_IRQS; irq++) { seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]); + seq_printf(p, "%10u ", + per_cpu(irq_stat, cpu).irqs[irq]); if (irqclass_sub_desc[irq].desc) seq_printf(p, " %s", irqclass_sub_desc[irq].desc); seq_putc(p, '\n'); } -skip_arch_irqs: +out: put_online_cpus(); return 0; } +int arch_show_interrupts(struct seq_file *p, int prec) +{ + return 0; +} + /* * Switch to the asynchronous interrupt stack for softirq execution. */ @@ -159,41 +192,27 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } -#ifdef CONFIG_PROC_FS -void init_irq_proc(void) -{ - if (proc_mkdir("irq", NULL)) - create_prof_cpu_mask(); -} -#endif - /* * ext_int_hash[index] is the list head for all external interrupts that hash * to this index. */ -static struct list_head ext_int_hash[256]; +static struct hlist_head ext_int_hash[32] ____cacheline_aligned; struct ext_int_info { ext_int_handler_t handler; - u16 code; - struct list_head entry; + struct hlist_node entry; struct rcu_head rcu; + u16 code; }; /* ext_int_hash_lock protects the handler lists for external interrupts */ -DEFINE_SPINLOCK(ext_int_hash_lock); - -static void __init init_external_interrupts(void) -{ - int idx; - - for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) - INIT_LIST_HEAD(&ext_int_hash[idx]); -} +static DEFINE_SPINLOCK(ext_int_hash_lock); static inline int ext_hash(u16 code) { - return (code + (code >> 9)) & 0xff; + BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash))); + + return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1); } int register_external_interrupt(u16 code, ext_int_handler_t handler) @@ -210,7 +229,7 @@ int register_external_interrupt(u16 code, ext_int_handler_t handler) index = ext_hash(code); spin_lock_irqsave(&ext_int_hash_lock, flags); - list_add_rcu(&p->entry, &ext_int_hash[index]); + hlist_add_head_rcu(&p->entry, &ext_int_hash[index]); spin_unlock_irqrestore(&ext_int_hash_lock, flags); return 0; } @@ -223,9 +242,9 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) int index = ext_hash(code); spin_lock_irqsave(&ext_int_hash_lock, flags); - list_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { if (p->code == code && p->handler == handler) { - list_del_rcu(&p->entry); + hlist_del_rcu(&p->entry); kfree_rcu(p, rcu); } } @@ -234,80 +253,64 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) } EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, - unsigned int param32, unsigned long param64) +static irqreturn_t do_ext_interrupt(int irq, void *dummy) { - struct pt_regs *old_regs; + struct pt_regs *regs = get_irq_regs(); + struct ext_code ext_code; struct ext_int_info *p; int index; - old_regs = set_irq_regs(regs); - irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) { - /* Serve timer interrupts first. */ - clock_comparator_work(); - } - kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL); + ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; index = ext_hash(ext_code.code); rcu_read_lock(); - list_for_each_entry_rcu(p, &ext_int_hash[index], entry) - if (likely(p->code == ext_code.code)) - p->handler(ext_code, param32, param64); + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (unlikely(p->code != ext_code.code)) + continue; + p->handler(ext_code, regs->int_parm, regs->int_parm_long); + } rcu_read_unlock(); - irq_exit(); - set_irq_regs(old_regs); -} - -void __init init_IRQ(void) -{ - init_external_interrupts(); + return IRQ_HANDLED; } -static DEFINE_SPINLOCK(sc_irq_lock); -static int sc_irq_refcount; +static struct irqaction external_interrupt = { + .name = "EXT", + .handler = do_ext_interrupt, +}; -void service_subclass_irq_register(void) +void __init init_ext_interrupts(void) { - spin_lock(&sc_irq_lock); - if (!sc_irq_refcount) - ctl_set_bit(0, 9); - sc_irq_refcount++; - spin_unlock(&sc_irq_lock); -} -EXPORT_SYMBOL(service_subclass_irq_register); + int idx; -void service_subclass_irq_unregister(void) -{ - spin_lock(&sc_irq_lock); - sc_irq_refcount--; - if (!sc_irq_refcount) - ctl_clear_bit(0, 9); - spin_unlock(&sc_irq_lock); + for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) + INIT_HLIST_HEAD(&ext_int_hash[idx]); + + irq_set_chip_and_handler(EXT_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + setup_irq(EXT_INTERRUPT, &external_interrupt); } -EXPORT_SYMBOL(service_subclass_irq_unregister); -static DEFINE_SPINLOCK(ma_subclass_lock); -static int ma_subclass_refcount; +static DEFINE_SPINLOCK(irq_subclass_lock); +static unsigned char irq_subclass_refcount[64]; -void measurement_alert_subclass_register(void) +void irq_subclass_register(enum irq_subclass subclass) { - spin_lock(&ma_subclass_lock); - if (!ma_subclass_refcount) - ctl_set_bit(0, 5); - ma_subclass_refcount++; - spin_unlock(&ma_subclass_lock); + spin_lock(&irq_subclass_lock); + if (!irq_subclass_refcount[subclass]) + ctl_set_bit(0, subclass); + irq_subclass_refcount[subclass]++; + spin_unlock(&irq_subclass_lock); } -EXPORT_SYMBOL(measurement_alert_subclass_register); +EXPORT_SYMBOL(irq_subclass_register); -void measurement_alert_subclass_unregister(void) +void irq_subclass_unregister(enum irq_subclass subclass) { - spin_lock(&ma_subclass_lock); - ma_subclass_refcount--; - if (!ma_subclass_refcount) - ctl_clear_bit(0, 5); - spin_unlock(&ma_subclass_lock); + spin_lock(&irq_subclass_lock); + irq_subclass_refcount[subclass]--; + if (!irq_subclass_refcount[subclass]) + ctl_clear_bit(0, subclass); + spin_unlock(&irq_subclass_lock); } -EXPORT_SYMBOL(measurement_alert_subclass_unregister); +EXPORT_SYMBOL(irq_subclass_unregister); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 3388b2b2a07d..0ce9fb245034 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -37,6 +37,26 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = { }; +DEFINE_INSN_CACHE_OPS(dmainsn); + +static void *alloc_dmainsn_page(void) +{ + return (void *)__get_free_page(GFP_KERNEL | GFP_DMA); +} + +static void free_dmainsn_page(void *page) +{ + free_page((unsigned long)page); +} + +struct kprobe_insn_cache kprobe_dmainsn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex), + .alloc = alloc_dmainsn_page, + .free = free_dmainsn_page, + .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages), + .insn_size = MAX_INSN_SIZE, +}; + static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) { switch (insn[0] >> 8) { @@ -100,35 +120,161 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn) fixup |= FIXUP_RETURN_REGISTER; break; case 0xc0: - if ((insn[0] & 0x0f) == 0x00 || /* larl */ - (insn[0] & 0x0f) == 0x05) /* brasl */ - fixup |= FIXUP_RETURN_REGISTER; + if ((insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; break; case 0xeb: - if ((insn[2] & 0xff) == 0x44 || /* bxhg */ - (insn[2] & 0xff) == 0x45) /* bxleg */ + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } break; case 0xe3: /* bctg */ if ((insn[2] & 0xff) == 0x46) fixup = FIXUP_BRANCH_NOT_TAKEN; break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; } return fixup; } +static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) +{ + /* Check if we have a RIL-b or RIL-c format instruction which + * we need to modify in order to avoid instruction emulation. */ + switch (insn[0] >> 8) { + case 0xc0: + if ((insn[0] & 0x0f) == 0x00) /* larl */ + return true; + break; + case 0xc4: + switch (insn[0] & 0x0f) { + case 0x02: /* llhrl */ + case 0x04: /* lghrl */ + case 0x05: /* lhrl */ + case 0x06: /* llghrl */ + case 0x07: /* sthrl */ + case 0x08: /* lgrl */ + case 0x0b: /* stgrl */ + case 0x0c: /* lgfrl */ + case 0x0d: /* lrl */ + case 0x0e: /* llgfrl */ + case 0x0f: /* strl */ + return true; + } + break; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + case 0x02: /* pfdrl */ + case 0x04: /* cghrl */ + case 0x05: /* chrl */ + case 0x06: /* clghrl */ + case 0x07: /* clhrl */ + case 0x08: /* cgrl */ + case 0x0a: /* clgrl */ + case 0x0c: /* cgfrl */ + case 0x0d: /* crl */ + case 0x0e: /* clgfrl */ + case 0x0f: /* clrl */ + return true; + } + break; + } + return false; +} + +static void __kprobes copy_instruction(struct kprobe *p) +{ + s64 disp, new_disp; + u64 addr, new_addr; + + memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2); + if (!is_insn_relative_long(p->ainsn.insn)) + return; + /* + * For pc-relative instructions in RIL-b or RIL-c format patch the + * RI2 displacement field. We have already made sure that the insn + * slot for the patched instruction is within the same 2GB area + * as the original instruction (either kernel image or module area). + * Therefore the new displacement will always fit. + */ + disp = *(s32 *)&p->ainsn.insn[1]; + addr = (u64)(unsigned long)p->addr; + new_addr = (u64)(unsigned long)p->ainsn.insn; + new_disp = ((addr + (disp * 2)) - new_addr) / 2; + *(s32 *)&p->ainsn.insn[1] = new_disp; +} + +static inline int is_kernel_addr(void *addr) +{ + return addr < (void *)_end; +} + +static inline int is_module_addr(void *addr) +{ +#ifdef CONFIG_64BIT + BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); + if (addr < (void *)MODULES_VADDR) + return 0; + if (addr > (void *)MODULES_END) + return 0; +#endif + return 1; +} + +static int __kprobes s390_get_insn_slot(struct kprobe *p) +{ + /* + * Get an insn slot that is within the same 2GB area like the original + * instruction. That way instructions with a 32bit signed displacement + * field can be patched and executed within the insn slot. + */ + p->ainsn.insn = NULL; + if (is_kernel_addr(p->addr)) + p->ainsn.insn = get_dmainsn_slot(); + if (is_module_addr(p->addr)) + p->ainsn.insn = get_insn_slot(); + return p->ainsn.insn ? 0 : -ENOMEM; +} + +static void __kprobes s390_free_insn_slot(struct kprobe *p) +{ + if (!p->ainsn.insn) + return; + if (is_kernel_addr(p->addr)) + free_dmainsn_slot(p->ainsn.insn, 0); + else + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; +} + int __kprobes arch_prepare_kprobe(struct kprobe *p) { if ((unsigned long) p->addr & 0x01) return -EINVAL; - /* Make sure the probe isn't going on a difficult instruction */ if (is_prohibited_opcode(p->addr)) return -EINVAL; - + if (s390_get_insn_slot(p)) + return -ENOMEM; p->opcode = *p->addr; - memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2); - + copy_instruction(p); return 0; } @@ -169,6 +315,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { + s390_free_insn_slot(p); } static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index ac2178161ec3..719e27b2cf22 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -50,7 +50,7 @@ static void add_elf_notes(int cpu) /* * Initialize CPU ELF notes */ -void setup_regs(void) +static void setup_regs(void) { unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; int cpu, this_cpu; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 504175ebf8b0..c4c033819879 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci) : "0", "cc"); #endif /* Revalidate clock comparator register */ - if (S390_lowcore.clock_comparator == -1) - set_clock_comparator(S390_lowcore.mcck_clock); - else - set_clock_comparator(S390_lowcore.clock_comparator); + set_clock_comparator(S390_lowcore.clock_comparator); /* Check if old PSW is valid */ if (!mci->wp) /* diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 390d9ae57bb2..1105502bf6e9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -274,7 +274,7 @@ static int reserve_pmc_hardware(void) int flags = PMC_INIT; on_each_cpu(setup_pmc_cpu, &flags, 1); - measurement_alert_subclass_register(); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); return 0; } @@ -285,7 +285,7 @@ static void release_pmc_hardware(void) int flags = PMC_RELEASE; on_each_cpu(setup_pmc_cpu, &flags, 1); - measurement_alert_subclass_unregister(); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); } /* Release the PMU if event is the last perf event */ @@ -639,8 +639,8 @@ static struct pmu cpumf_pmu = { .cancel_txn = cpumf_pmu_cancel_txn, }; -static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) +static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action, + void *hcpu) { unsigned int cpu = (long) hcpu; int flags; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index f58f37f66824..2343c218b8f9 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/perf_event.h> +#include <linux/kvm_host.h> #include <linux/percpu.h> #include <linux/export.h> #include <asm/irq.h> @@ -39,6 +40,58 @@ int perf_num_counters(void) } EXPORT_SYMBOL(perf_num_counters); +static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) +{ + struct stack_frame *stack = (struct stack_frame *) regs->gprs[15]; + + if (!stack) + return NULL; + + return (struct kvm_s390_sie_block *) stack->empty1[0]; +} + +static bool is_in_guest(struct pt_regs *regs) +{ + if (user_mode(regs)) + return false; +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + return instruction_pointer(regs) == (unsigned long) &sie_exit; +#else + return false; +#endif +} + +static unsigned long guest_is_user_mode(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE; +} + +static unsigned long instruction_pointer_guest(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + return is_in_guest(regs) ? instruction_pointer_guest(regs) + : instruction_pointer(regs); +} + +static unsigned long perf_misc_guest_flags(struct pt_regs *regs) +{ + return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + if (is_in_guest(regs)) + return perf_misc_guest_flags(regs); + + return user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; +} + void perf_event_print_debug(void) { struct cpumf_ctr_info cf_info; @@ -52,13 +105,10 @@ void perf_event_print_debug(void) cpu = smp_processor_id(); memset(&cf_info, 0, sizeof(cf_info)); - if (!qctri(&cf_info)) { + if (!qctri(&cf_info)) pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", cpu, cf_info.cfvn, cf_info.csvn, cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); - print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET, - &cf_info, sizeof(cf_info)); - } local_irq_restore(flags); } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 2bc3eddae34a..c5dbb335716d 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -71,6 +71,7 @@ void arch_cpu_idle(void) } /* Halt the cpu and keep track of cpu time accounting. */ vtime_stop_cpu(); + local_irq_enable(); } void arch_cpu_idle_exit(void) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 753c41d0ffd3..24612029f450 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); /* * cpu_init - initializes state that is per-CPU. */ -void __cpuinit cpu_init(void) +void cpu_init(void) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct cpuid *id = &__get_cpu_var(cpu_id); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index a314c57f4e94..9556905bd3ce 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -47,7 +47,7 @@ enum s390_regset { REGSET_GENERAL_EXTENDED, }; -void update_per_regs(struct task_struct *task) +void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; @@ -56,17 +56,25 @@ void update_per_regs(struct task_struct *task) #ifdef CONFIG_64BIT /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr0, cr0_new; + unsigned long cr[3], cr_new[3]; - __ctl_store(cr0, 0, 0); - /* set or clear transaction execution bits 8 and 9. */ + __ctl_store(cr, 0, 2); + cr_new[1] = cr[1]; + /* Set or clear transaction execution TXC bit 8. */ if (task->thread.per_flags & PER_FLAG_NO_TE) - cr0_new = cr0 & ~(3UL << 54); + cr_new[0] = cr[0] & ~(1UL << 55); else - cr0_new = cr0 | (3UL << 54); - /* Only load control register 0 if necessary. */ - if (cr0 != cr0_new) - __ctl_load(cr0_new, 0, 0); + cr_new[0] = cr[0] | (1UL << 55); + /* Set or clear transaction execution TDC bits 62 and 63. */ + cr_new[2] = cr[2] & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) + cr_new[2] |= 1UL; + else + cr_new[2] |= 2UL; + } + if (memcmp(&cr_new, &cr, sizeof(cr))) + __ctl_load(cr_new, 0, 2); } #endif /* Copy user specified PER registers */ @@ -100,14 +108,14 @@ void user_enable_single_step(struct task_struct *task) { set_tsk_thread_flag(task, TIF_SINGLE_STEP); if (task == current) - update_per_regs(task); + update_cr_regs(task); } void user_disable_single_step(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_SINGLE_STEP); if (task == current) - update_per_regs(task); + update_cr_regs(task); } /* @@ -447,6 +455,26 @@ long arch_ptrace(struct task_struct *child, long request, if (!MACHINE_HAS_TE) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + return 0; + case PTRACE_TE_ABORT_RAND: + if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + return -EIO; + switch (data) { + case 0UL: + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + break; + case 1UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND; + break; + case 2UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND; + break; + default: + return -EINVAL; + } return 0; default: /* Removing high order bit from addr (only for 31 bit). */ @@ -1271,7 +1299,7 @@ int regs_query_register_offset(const char *name) if (!name || *name != 'r') return -EINVAL; - if (strict_strtoul(name + 1, 10, &offset)) + if (kstrtoul(name + 1, 10, &offset)) return -EINVAL; if (offset >= NUM_GPRS) return -EINVAL; diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 077a99389b07..e1c9d1c292fa 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -139,10 +139,10 @@ static int __init runtime_instr_init(void) if (!runtime_instr_avail()) return 0; - measurement_alert_subclass_register(); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); rc = register_external_interrupt(0x1407, runtime_instr_int_handler); if (rc) - measurement_alert_subclass_unregister(); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); else pr_info("Runtime instrumentation facility initialized\n"); return rc; diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 9bdbcef1da9e..3bac589844a7 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount); #endif #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) EXPORT_SYMBOL(sie64a); +EXPORT_SYMBOL(sie_exit); #endif EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index b6506ee32a36..29bd7bec4176 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -225,7 +225,7 @@ _sclp_print: ahi %r2,1 ltr %r0,%r0 # end of string? jz .LfinalizemtoS4 - chi %r0,0x15 # end of line (NL)? + chi %r0,0x0a # end of line (NL)? jz .LfinalizemtoS4 stc %r0,0(%r6,%r7) # copy to mto la %r11,0(%r6,%r7) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0a49095104c9..aeed8a61fa0d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -719,10 +719,6 @@ static void reserve_oldmem(void) } create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); - if (OLDMEM_BASE + OLDMEM_SIZE == real_size) - saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; - else - saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } @@ -998,6 +994,7 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z196"); break; case 0x2827: + case 0x2828: strcpy(elf_platform, "zEC12"); break; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 4f977d0d25c2..1a4313a1b60f 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -49,7 +49,6 @@ enum { ec_schedule = 0, - ec_call_function, ec_call_function_single, ec_stop_cpu, }; @@ -166,7 +165,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) pcpu_sigp_retry(pcpu, order, 0); } -static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { struct _lowcore *lc; @@ -363,7 +362,7 @@ void smp_yield_cpu(int cpu) * Send cpus emergency shutdown signal. This gives the cpus the * opportunity to complete outstanding interrupts. */ -void smp_emergency_stop(cpumask_t *cpumask) +static void smp_emergency_stop(cpumask_t *cpumask) { u64 end; int cpu; @@ -438,8 +437,6 @@ static void smp_handle_ext_call(void) smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) scheduler_ipi(); - if (test_bit(ec_call_function, &bits)) - generic_smp_call_function_interrupt(); if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); } @@ -456,7 +453,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) @@ -619,10 +616,9 @@ static struct sclp_cpu_info *smp_get_cpu_info(void) return info; } -static int __cpuinit smp_add_present_cpu(int cpu); +static int smp_add_present_cpu(int cpu); -static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info, - int sysfs_add) +static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) { struct pcpu *pcpu; cpumask_t avail; @@ -688,7 +684,7 @@ static void __init smp_detect_cpus(void) /* * Activate a secondary processor. */ -static void __cpuinit smp_start_secondary(void *cpuvoid) +static void smp_start_secondary(void *cpuvoid) { S390_lowcore.last_update_clock = get_tod_clock(); S390_lowcore.restart_stack = (unsigned long) restart_stack; @@ -711,7 +707,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid) } /* Upping and downing of CPUs */ -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { struct pcpu *pcpu; int rc; @@ -967,8 +963,8 @@ static struct attribute_group cpu_online_attr_group = { .attrs = cpu_online_attrs, }; -static int __cpuinit smp_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int smp_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; struct cpu *c = &pcpu_devices[cpu].cpu; @@ -986,7 +982,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, return notifier_from_errno(err); } -static int __cpuinit smp_add_present_cpu(int cpu) +static int smp_add_present_cpu(int cpu) { struct cpu *c = &pcpu_devices[cpu].cpu; struct device *s = &c->dev; diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index c479d2f9605b..a7a7537ce1e7 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -10,6 +10,10 @@ #include <linux/suspend.h> #include <linux/mm.h> #include <asm/ctl_reg.h> +#include <asm/ipl.h> +#include <asm/cio.h> +#include <asm/pci.h> +#include "entry.h" /* * References to section boundaries @@ -211,3 +215,11 @@ void restore_processor_state(void) __ctl_set_bit(0,28); local_mcck_enable(); } + +/* Called at the end of swsusp_arch_resume */ +void s390_early_resume(void) +{ + lgr_info_log(); + channel_subsystem_reinit(); + zpci_rescan(); +} diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index c487be4cfc81..6b09fdffbd2f 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -281,11 +281,8 @@ restore_registers: lghi %r2,0 brasl %r14,arch_set_page_states - /* Log potential guest relocation */ - brasl %r14,lgr_info_log - - /* Reinitialize the channel subsystem */ - brasl %r14,channel_subsystem_reinit + /* Call arch specific early resume code */ + brasl %r14,s390_early_resume /* Return 0 */ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 62f89d98e880..811f542b8ed4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -418,7 +418,7 @@ void s390_adjust_jiffies(void) /* * calibrate the delay loop */ -void __cpuinit calibrate_delay(void) +void calibrate_delay(void) { s390_adjust_jiffies(); /* Print the good old Bogomips line .. */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 876546b9cfa1..064c3082ab33 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -92,7 +92,6 @@ void clock_comparator_work(void) struct clock_event_device *cd; S390_lowcore.clock_comparator = -1ULL; - set_clock_comparator(S390_lowcore.clock_comparator); cd = &__get_cpu_var(comparators); cd->event_handler(cd); } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d7776281cb60..05d75c413137 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -63,7 +63,7 @@ static int __init vdso_setup(char *s) else if (strncmp(s, "off", 4) == 0) vdso_enabled = 0; else { - rc = strict_strtoul(s, 0, &val); + rc = kstrtoul(s, 0, &val); vdso_enabled = rc ? 0 : !!val; } return !rc; @@ -113,11 +113,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore) clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE << SEGMENT_ORDER); - clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) page_table, _PAGE_INVALID, 256*sizeof(unsigned long)); *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; - *(unsigned long *) page_table = _PAGE_RO + page_frame; + *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; psal = (u32 *) (page_table + 256*sizeof(unsigned long)); aste = psal + 32; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 3fb09359eda6..abcfab55f99b 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -19,6 +19,7 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/vtimer.h> +#include <asm/vtime.h> #include <asm/irq.h> #include "entry.h" @@ -371,14 +372,14 @@ EXPORT_SYMBOL(del_virt_timer); /* * Start the virtual CPU timer on the current CPU. */ -void __cpuinit init_cpu_vtimer(void) +void init_cpu_vtimer(void) { /* set initial cpu timer */ set_vtimer(VTIMER_MAX_SLICE); } -static int __cpuinit s390_nohz_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int s390_nohz_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { struct s390_idle_data *idle; long cpu = (long) hcpu; diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 8fe9d65a4585..40b4c6470f88 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1c01a9912989..3a74d8af0d69 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -119,12 +119,21 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * The layout is as follows: * - gpr 2 contains the subchannel id (passed as addr) * - gpr 3 contains the virtqueue index (passed as datamatch) + * - gpr 4 contains the index on the bus (optionally) */ - ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, - vcpu->run->s.regs.gprs[2], - 8, &vcpu->run->s.regs.gprs[3]); + ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2], + 8, &vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); srcu_read_unlock(&vcpu->kvm->srcu, idx); - /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ + + /* + * Return cookie in gpr 2, but don't overwrite the register if the + * diagnose will be handled by userspace. + */ + if (ret != -EOPNOTSUPP) + vcpu->run->s.regs.gprs[2] = ret; + /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */ return ret < 0 ? ret : 0; } @@ -132,6 +141,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + trace_kvm_s390_handle_diag(vcpu, code); switch (code) { case 0x10: diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 302e0e52b009..99d789e8a018 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -42,9 +42,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, ({ \ __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ int __mask = sizeof(__typeof__(*(gptr))) - 1; \ - int __ret = PTR_RET((void __force *)__uptr); \ + int __ret; \ \ - if (!__ret) { \ + if (IS_ERR((void __force *)__uptr)) { \ + __ret = PTR_ERR((void __force *)__uptr); \ + } else { \ BUG_ON((unsigned long)__uptr & __mask); \ __ret = get_user(x, __uptr); \ } \ @@ -55,9 +57,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, ({ \ __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ int __mask = sizeof(__typeof__(*(gptr))) - 1; \ - int __ret = PTR_RET((void __force *)__uptr); \ + int __ret; \ \ - if (!__ret) { \ + if (IS_ERR((void __force *)__uptr)) { \ + __ret = PTR_ERR((void __force *)__uptr); \ + } else { \ BUG_ON((unsigned long)__uptr & __mask); \ __ret = put_user(x, __uptr); \ } \ diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b7d1b2edeeb3..5ee56e5acc23 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -22,87 +22,6 @@ #include "trace.h" #include "trace-s390.h" -static int handle_lctlg(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 useraddr; - int reg, rc; - - vcpu->stat.instruction_lctlg++; - - useraddr = kvm_s390_get_base_disp_rsy(vcpu); - - if (useraddr & 7) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - reg = reg1; - - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, - useraddr); - trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); - - do { - rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], - (u64 __user *) useraddr); - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - useraddr += 8; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static int handle_lctl(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 useraddr; - u32 val = 0; - int reg, rc; - - vcpu->stat.instruction_lctl++; - - useraddr = kvm_s390_get_base_disp_rs(vcpu); - - if (useraddr & 3) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, - useraddr); - trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); - - reg = reg1; - do { - rc = get_guest(vcpu, val, (u32 __user *) useraddr); - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; - vcpu->arch.sie_block->gcr[reg] |= val; - useraddr += 4; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static const intercept_handler_t eb_handlers[256] = { - [0x2f] = handle_lctlg, - [0x8a] = kvm_s390_handle_priv_eb, -}; - -static int handle_eb(struct kvm_vcpu *vcpu) -{ - intercept_handler_t handler; - - handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; -} static const intercept_handler_t instruction_handlers[256] = { [0x01] = kvm_s390_handle_01, @@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = { [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, - [0xb7] = handle_lctl, + [0xb7] = kvm_s390_handle_lctl, [0xb9] = kvm_s390_handle_b9, [0xe5] = kvm_s390_handle_e5, - [0xeb] = handle_eb, + [0xeb] = kvm_s390_handle_eb, }; static int handle_noop(struct kvm_vcpu *vcpu) @@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu) static int handle_validity(struct kvm_vcpu *vcpu) { - unsigned long vmaddr; int viwhy = vcpu->arch.sie_block->ipb >> 16; - int rc; vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - if (viwhy == 0x37) { - vmaddr = gmap_fault(vcpu->arch.sie_block->prefix, - vcpu->arch.gmap); - if (IS_ERR_VALUE(vmaddr)) { - rc = -EOPNOTSUPP; - goto out; - } - rc = fault_in_pages_writeable((char __user *) vmaddr, - PAGE_SIZE); - if (rc) { - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - goto out; - } - vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE, - vcpu->arch.gmap); - if (IS_ERR_VALUE(vmaddr)) { - rc = -EOPNOTSUPP; - goto out; - } - rc = fault_in_pages_writeable((char __user *) vmaddr, - PAGE_SIZE); - if (rc) { - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - goto out; - } - } else - rc = -EOPNOTSUPP; - -out: - if (rc) - VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", - viwhy); - return rc; + WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); + return -EOPNOTSUPP; } static int handle_instruction(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5c948177529e..7f35cb33e510 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) no_timer: spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); - add_wait_queue(&vcpu->arch.local_int.wq, &wait); + add_wait_queue(&vcpu->wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && list_empty(&vcpu->arch.local_int.float_int->list) && (!vcpu->arch.local_int.timer_due) && @@ -452,7 +452,7 @@ no_timer: } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->arch.local_int.wq, &wait); + remove_wait_queue(&vcpu->wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); @@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm) spin_lock(&vcpu->arch.local_int.lock); vcpu->arch.local_int.timer_due = 1; - if (waitqueue_active(&vcpu->arch.local_int.wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock(&vcpu->arch.local_int.lock); } @@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) spin_lock_bh(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); - BUG_ON(waitqueue_active(&li->wq)); + BUG_ON(waitqueue_active(li->wq)); spin_unlock_bh(&li->lock); return 0; } @@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm, li = fi->local_int[sigcpu]; spin_lock_bh(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (inti->type == KVM_S390_SIGP_STOP) li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock_bh(&li->lock); mutex_unlock(&vcpu->kvm->lock); return 0; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c1c7c683fa26..776dafe918db 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -28,6 +28,7 @@ #include <asm/pgtable.h> #include <asm/nmi.h> #include <asm/switch_to.h> +#include <asm/facility.h> #include <asm/sclp.h> #include "kvm-s390.h" #include "gaccess.h" @@ -59,6 +60,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, { "instruction_stidp", VCPU_STAT(instruction_stidp) }, { "instruction_spx", VCPU_STAT(instruction_spx) }, { "instruction_stpx", VCPU_STAT(instruction_stpx) }, @@ -83,7 +85,14 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -static unsigned long long *facilities; +unsigned long *vfacilities; +static struct gmap_notifier gmap_notifier; + +/* test availability of vfacility */ +static inline int test_vfacility(unsigned long nr) +{ + return __test_facility(nr, (void *) vfacilities); +} /* Section: not file related */ int kvm_arch_hardware_enable(void *garbage) @@ -96,13 +105,18 @@ void kvm_arch_hardware_disable(void *garbage) { } +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); + int kvm_arch_hardware_setup(void) { + gmap_notifier.notifier_call = kvm_gmap_notifier; + gmap_register_ipte_notifier(&gmap_notifier); return 0; } void kvm_arch_hardware_unsetup(void) { + gmap_unregister_ipte_notifier(&gmap_notifier); } void kvm_arch_check_processor_compat(void *rtn) @@ -239,6 +253,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.gmap = gmap_alloc(current->mm); if (!kvm->arch.gmap) goto out_nogmap; + kvm->arch.gmap->private = kvm; } kvm->arch.css_support = 0; @@ -270,7 +285,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -309,6 +324,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.gmap = gmap_alloc(current->mm); if (!vcpu->arch.gmap) return -ENOMEM; + vcpu->arch.gmap->private = vcpu->kvm; return 0; } @@ -373,10 +389,12 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM | - CPUSTAT_STOPPED); + CPUSTAT_STOPPED | + CPUSTAT_GED); vcpu->arch.sie_block->ecb = 6; + vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002001U; - vcpu->arch.sie_block->fac = (int) (long) facilities; + vcpu->arch.sie_block->fac = (int) (long) vfacilities; hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, (unsigned long) vcpu); @@ -397,7 +415,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, rc = -ENOMEM; - vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) goto out; @@ -427,7 +445,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.float_int = &kvm->arch.float_int; spin_lock(&kvm->arch.float_int.lock); kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; - init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; spin_unlock(&kvm->arch.float_int.lock); @@ -442,7 +460,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); out: return ERR_PTR(rc); } @@ -454,6 +472,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return 0; } +void s390_vcpu_block(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +void s390_vcpu_unblock(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +/* + * Kick a guest cpu out of SIE and wait until SIE is not running. + * If the CPU is not running (e.g. waiting as idle) the function will + * return immediately. */ +void exit_sie(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); + while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) + cpu_relax(); +} + +/* Kick a guest cpu out of SIE and prevent SIE-reentry */ +void exit_sie_sync(struct kvm_vcpu *vcpu) +{ + s390_vcpu_block(vcpu); + exit_sie(vcpu); +} + +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) +{ + int i; + struct kvm *kvm = gmap->private; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + /* match against both prefix pages */ + if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) { + VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + exit_sie_sync(vcpu); + } + } +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { /* kvm common code refers to this, but never calls it */ @@ -606,6 +668,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } +static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) +{ + /* + * We use MMU_RELOAD just to re-arm the ipte notifier for the + * guest prefix page. gmap_ipte_notify will wait on the ptl lock. + * This ensures that the ipte instruction for this request has + * already finished. We might race against a second unmapper that + * wants to set the blocking bit. Lets just retry the request loop. + */ + while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { + int rc; + rc = gmap_ipte_notify(vcpu->arch.gmap, + vcpu->arch.sie_block->prefix, + PAGE_SIZE * 2); + if (rc) + return rc; + s390_vcpu_unblock(vcpu); + } + return 0; +} + static int __vcpu_run(struct kvm_vcpu *vcpu) { int rc; @@ -621,16 +704,33 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) if (!kvm_is_ucontrol(vcpu->kvm)) kvm_s390_deliver_pending_interrupts(vcpu); + rc = kvm_s390_handle_requests(vcpu); + if (rc) + return rc; + vcpu->arch.sie_block->icptcode = 0; - preempt_disable(); - kvm_guest_enter(); - preempt_enable(); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); trace_kvm_s390_sie_enter(vcpu, atomic_read(&vcpu->arch.sie_block->cpuflags)); + + /* + * As PF_VCPU will be used in fault handler, between guest_enter + * and guest_exit should be no uaccess. + */ + preempt_disable(); + kvm_guest_enter(); + preempt_enable(); rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); - if (rc) { + kvm_guest_exit(); + + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", + vcpu->arch.sie_block->icptcode); + trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); + + if (rc > 0) + rc = 0; + if (rc < 0) { if (kvm_is_ucontrol(vcpu->kvm)) { rc = SIE_INTERCEPT_UCONTROL; } else { @@ -639,10 +739,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } } - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", - vcpu->arch.sie_block->icptcode); - trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); - kvm_guest_exit(); memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); return rc; @@ -974,6 +1070,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -1040,22 +1140,31 @@ static int __init kvm_s390_init(void) * to hold the maximum amount of facilities. On the other hand, we * only set facilities that are known to work in KVM. */ - facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); - if (!facilities) { + vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); + if (!vfacilities) { kvm_exit(); return -ENOMEM; } - memcpy(facilities, S390_lowcore.stfle_fac_list, 16); - facilities[0] &= 0xff00fff3f47c0000ULL; - facilities[1] &= 0x001c000000000000ULL; + memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); + vfacilities[0] &= 0xff82fff3f47c0000UL; + vfacilities[1] &= 0x001c000000000000UL; return 0; } static void __exit kvm_s390_exit(void) { - free_page((unsigned long) facilities); + free_page((unsigned long) vfacilities); kvm_exit(); } module_init(kvm_s390_init); module_exit(kvm_s390_exit); + +/* + * Enable autoloading of the kvm module. + * Note that we add the module alias here instead of virt/kvm/kvm_main.c + * since x86 takes a different approach. + */ +#include <linux/miscdevice.h> +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index efc14f687265..dc99f1ca4267 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -24,6 +24,9 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); +/* declare vfacilities extern */ +extern unsigned long *vfacilities; + /* negativ values are error codes, positive values for internal conditions */ #define SIE_INTERCEPT_RERUNVCPU (1<<0) #define SIE_INTERCEPT_UCONTROL (1<<1) @@ -63,6 +66,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) { vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); } static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) @@ -85,6 +89,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) +{ + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; +} + static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; @@ -105,6 +115,13 @@ static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +/* Set the condition code in the guest program status word */ +static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) +{ + vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44); + vcpu->arch.sie_block->gpsw.mask |= cc << 44; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); @@ -125,7 +142,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); -int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); @@ -133,6 +151,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); +void s390_vcpu_block(struct kvm_vcpu *vcpu); +void s390_vcpu_unblock(struct kvm_vcpu *vcpu); +void exit_sie(struct kvm_vcpu *vcpu); +void exit_sie_sync(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6bbd7b5a0bbe..59200ee275e5 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1,7 +1,7 @@ /* * handling privileged instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -16,10 +16,14 @@ #include <linux/errno.h> #include <linux/compat.h> #include <asm/asm-offsets.h> +#include <asm/facility.h> #include <asm/current.h> #include <asm/debug.h> #include <asm/ebcdic.h> #include <asm/sysinfo.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> #include "gaccess.h" @@ -34,6 +38,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_spx++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ @@ -65,6 +72,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stpx++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ @@ -89,6 +99,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stap++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + useraddr = kvm_s390_get_base_disp_s(vcpu); if (useraddr & 1) @@ -105,7 +118,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) static int handle_skey(struct kvm_vcpu *vcpu) { vcpu->stat.instruction_storage_key++; - vcpu->arch.sie_block->gpsw.addr -= 4; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } @@ -129,9 +147,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu) * Store the two-word I/O interruption code into the * provided area. */ - put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); - put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); - put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); + if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr) + || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2)) + || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } else { /* * Store the three-word I/O interruption code into @@ -145,8 +164,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) kfree(inti); no_interrupt: /* Set condition code and we're done. */ - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, cc); return 0; } @@ -182,6 +200,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + if (vcpu->kvm->arch.css_support) { /* * Most I/O instructions will be handled by userspace. @@ -198,27 +219,27 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) * Set condition code 3 to stop the guest from issueing channel * I/O instructions. */ - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, 3); return 0; } } static int handle_stfl(struct kvm_vcpu *vcpu) { - unsigned int facility_list; int rc; vcpu->stat.instruction_stfl++; - /* only pass the facility bits, which we can handle */ - facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), - &facility_list, sizeof(facility_list)); + vfacilities, 4); if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); - trace_kvm_s390_handle_stfl(vcpu, facility_list); + VCPU_EVENT(vcpu, 5, "store facility list value %x", + *(unsigned int *) vfacilities); + trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); return 0; } @@ -255,8 +276,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) u64 addr; if (gpsw->mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -278,6 +299,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) psw_t new_psw; u64 addr; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -296,6 +320,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stidp++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); if (operand2 & 7) @@ -351,16 +378,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); - operand2 = kvm_s390_get_base_disp_s(vcpu); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (fc > 3) { + kvm_s390_set_psw_cc(vcpu, 3); + return 0; + } - if (operand2 & 0xfff && fc > 0) + if (vcpu->run->s.regs.gprs[0] & 0x0fffff00 + || vcpu->run->s.regs.gprs[1] & 0xffff0000) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - switch (fc) { - case 0: + if (fc == 0) { vcpu->run->s.regs.gprs[0] = 3 << 28; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + kvm_s390_set_psw_cc(vcpu, 0); return 0; + } + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + if (operand2 & 0xfff) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { case 1: /* same handling for 1 and 2 */ case 2: mem = get_zeroed_page(GFP_KERNEL); @@ -377,8 +418,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu) goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; - default: - goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { @@ -387,12 +426,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu) } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + kvm_s390_set_psw_cc(vcpu, 0); vcpu->run->s.regs.gprs[0] = 0; return 0; out_no_data: - /* condition code 3 */ - vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; + kvm_s390_set_psw_cc(vcpu, 3); out_exception: free_page(mem); return rc; @@ -432,20 +470,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) intercept_handler_t handler; /* - * a lot of B2 instructions are priviledged. We first check for - * the privileged ones, that we can handle in the kernel. If the - * kernel can handle this instruction, we check for the problem - * state bit and (a) handle the instruction or (b) send a code 2 - * program check. - * Anything else goes to userspace.*/ + * A lot of B2 instructions are priviledged. Here we check for + * the privileged ones, that we can handle in the kernel. + * Anything else goes to userspace. + */ handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); - else - return handler(vcpu); - } + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; } @@ -453,23 +485,100 @@ static int handle_epsw(struct kvm_vcpu *vcpu) { int reg1, reg2; - reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24; - reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; + kvm_s390_get_regs_rre(vcpu, ®1, ®2); /* This basically extracts the mask half of the psw. */ - vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL; vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; if (reg2) { - vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL; vcpu->run->s.regs.gprs[reg2] |= - vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; + vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL; + } + return 0; +} + +#define PFMF_RESERVED 0xfffc0101UL +#define PFMF_SK 0x00020000UL +#define PFMF_CF 0x00010000UL +#define PFMF_UI 0x00008000UL +#define PFMF_FSC 0x00007000UL +#define PFMF_NQ 0x00000800UL +#define PFMF_MR 0x00000400UL +#define PFMF_MC 0x00000200UL +#define PFMF_KEY 0x000000feUL + +static int handle_pfmf(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + unsigned long start, end; + + vcpu->stat.instruction_pfmf++; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + if (!MACHINE_HAS_PFMF) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Only provide non-quiescing support if the host supports it */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* No support for conditional-SSKE */ + if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + case 0x00000000: + end = (start + (1UL << 12)) & ~((1UL << 12) - 1); + break; + case 0x00001000: + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + break; + /* We dont support EDAT2 + case 0x00002000: + end = (start + (1UL << 31)) & ~((1UL << 31) - 1); + break;*/ + default: + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + } + while (start < end) { + unsigned long useraddr; + + useraddr = gmap_translate(start, vcpu->arch.gmap); + if (IS_ERR((void *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (clear_user((void __user *)useraddr, PAGE_SIZE)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { + if (set_guest_storage_key(current->mm, useraddr, + vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, + vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + start += PAGE_SIZE; } + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) + vcpu->run->s.regs.gprs[reg2] = end; return 0; } static const intercept_handler_t b9_handlers[256] = { [0x8d] = handle_epsw, [0x9c] = handle_io_inst, + [0xaf] = handle_pfmf, }; int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) @@ -478,29 +587,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) /* This is handled just as for the B2 instructions. */ handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if ((handler != handle_epsw) && - (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); - else - return handler(vcpu); - } + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; } +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 useraddr; + u32 val = 0; + int reg, rc; + + vcpu->stat.instruction_lctl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + useraddr = kvm_s390_get_base_disp_rs(vcpu); + + if (useraddr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, + useraddr); + trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); + + reg = reg1; + do { + rc = get_guest(vcpu, val, (u32 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + useraddr += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static int handle_lctlg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 useraddr; + int reg, rc; + + vcpu->stat.instruction_lctlg++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + useraddr = kvm_s390_get_base_disp_rsy(vcpu); + + if (useraddr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, + useraddr); + trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); + + do { + rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], + (u64 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + useraddr += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + static const intercept_handler_t eb_handlers[256] = { + [0x2f] = handle_lctlg, [0x8a] = handle_io_inst, }; -int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu) +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) { intercept_handler_t handler; - /* All eb instructions that end up here are privileged. */ - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; if (handler) return handler(vcpu); @@ -515,6 +691,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu) vcpu->stat.instruction_tprot++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); /* we only handle the Linux memory detection case: @@ -560,8 +739,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) u32 value; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) return kvm_s390_inject_program_int(vcpu, diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1c48ab2845e0..bec398c57acf 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); @@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); @@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); out: spin_unlock_bh(&li->lock); @@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); @@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) /* sigp in userspace can exit */ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); order_code = kvm_s390_get_base_disp_rs(vcpu); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index c61b9fad43cc..57c87d7d7ede 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -44,7 +44,6 @@ static void __udelay_disabled(unsigned long long usecs) do { set_clock_comparator(end); vtime_stop_cpu(); - local_irq_disable(); } while (get_tod_clock() < end); lockdep_on(); __ctl_load(cr0, 0, 0); @@ -64,7 +63,6 @@ static void __udelay_enabled(unsigned long long usecs) set_clock_comparator(end); } vtime_stop_cpu(); - local_irq_disable(); if (clock_saved) local_tick_enable(clock_saved); } while (get_tod_clock() < end); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 50ea137a2d3c..1694d738b175 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -86,28 +86,28 @@ static unsigned long follow_table(struct mm_struct *mm, switch (mm->context.asce_bits & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: table = table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x39UL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION2: table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x3aUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION3: table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x3bUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_SEGMENT: table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) return -0x10UL; if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) { - if (write && (*table & _SEGMENT_ENTRY_RO)) + if (write && (*table & _SEGMENT_ENTRY_PROTECT)) return -0x04UL; return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) + (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE); @@ -117,7 +117,7 @@ static unsigned long follow_table(struct mm_struct *mm, table = table + ((address >> 12) & 0xff); if (unlikely(*table & _PAGE_INVALID)) return -0x11UL; - if (write && (*table & _PAGE_RO)) + if (write && (*table & _PAGE_PROTECT)) return -0x04UL; return (*table & PAGE_MASK) + (address & ~PAGE_MASK); } @@ -130,13 +130,13 @@ static unsigned long follow_table(struct mm_struct *mm, unsigned long *table = (unsigned long *)__pa(mm->pgd); table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) return -0x10UL; table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); table = table + ((address >> 12) & 0xff); if (unlikely(*table & _PAGE_INVALID)) return -0x11UL; - if (write && (*table & _PAGE_RO)) + if (write && (*table & _PAGE_PROTECT)) return -0x04UL; return (*table & PAGE_MASK) + (address & ~PAGE_MASK); } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 3ad65b04ac15..46d517c3c763 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -53,7 +53,7 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) seq_printf(m, "I\n"); return; } - seq_printf(m, "%s", pr & _PAGE_RO ? "RO " : "RW "); + seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); seq_putc(m, '\n'); } @@ -105,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st, } /* - * The actual page table walker functions. In order to keep the implementation - * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO - * flags to note_page() if a region, segment or page table entry is invalid or - * read-only. - * After all it's just a hint that the current level being walked contains an - * invalid or read-only entry. + * The actual page table walker functions. In order to keep the + * implementation of print_prot() short, we only check and pass + * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region, + * segment or page table entry is invalid or read-only. + * After all it's just a hint that the current level being walked + * contains an invalid or read-only entry. */ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t *pmd, unsigned long addr) @@ -122,14 +122,14 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { st->current_address = addr; pte = pte_offset_kernel(pmd, addr); - prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID); + prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID); note_page(m, st, prot, 4); addr += PAGE_SIZE; } } #ifdef CONFIG_64BIT -#define _PMD_PROT_MASK (_SEGMENT_ENTRY_RO | _SEGMENT_ENTRY_CO) +#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) #else #define _PMD_PROT_MASK 0 #endif diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 047c3e4c59a2..fc6679210d83 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access) address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); @@ -639,8 +641,8 @@ out: put_task_struct(tsk); } -static int __cpuinit pfault_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int pfault_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { struct thread_struct *thread, *next; struct task_struct *tsk; @@ -673,7 +675,7 @@ static int __init pfault_irq_init(void) rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; if (rc) goto out_pfault; - service_subclass_irq_register(); + irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); hotcpu_notifier(pfault_cpu_notify, 0); return 0; diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 1f5315d1215c..5d758db27bdc 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, pte_t *ptep, pte; struct page *page; - mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; + mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); do { @@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, struct page *head, *page, *tail; int refs; - result = write ? 0 : _SEGMENT_ENTRY_RO; - mask = result | _SEGMENT_ENTRY_INV; + result = write ? 0 : _SEGMENT_ENTRY_PROTECT; + mask = result | _SEGMENT_ENTRY_INVALID; if ((pmd_val(pmd) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 121089d57802..d261c62e40a6 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -8,21 +8,127 @@ #include <linux/mm.h> #include <linux/hugetlb.h> +static inline pmd_t __pte_to_pmd(pte_t pte) +{ + int none, young, prot; + pmd_t pmd; + + /* + * Convert encoding pte bits pmd bits + * .IR...wrdytp ..R...I...y. + * empty .10...000000 -> ..0...1...0. + * prot-none, clean, old .11...000001 -> ..0...1...1. + * prot-none, clean, young .11...000101 -> ..1...1...1. + * prot-none, dirty, old .10...001001 -> ..0...1...1. + * prot-none, dirty, young .10...001101 -> ..1...1...1. + * read-only, clean, old .11...010001 -> ..1...1...0. + * read-only, clean, young .01...010101 -> ..1...0...1. + * read-only, dirty, old .11...011001 -> ..1...1...0. + * read-only, dirty, young .01...011101 -> ..1...0...1. + * read-write, clean, old .11...110001 -> ..0...1...0. + * read-write, clean, young .01...110101 -> ..0...0...1. + * read-write, dirty, old .10...111001 -> ..0...1...0. + * read-write, dirty, young .00...111101 -> ..0...0...1. + * Huge ptes are dirty by definition, a clean pte is made dirty + * by the conversion. + */ + if (pte_present(pte)) { + pmd_val(pmd) = pte_val(pte) & PAGE_MASK; + if (pte_val(pte) & _PAGE_INVALID) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + none = (pte_val(pte) & _PAGE_PRESENT) && + !(pte_val(pte) & _PAGE_READ) && + !(pte_val(pte) & _PAGE_WRITE); + prot = (pte_val(pte) & _PAGE_PROTECT) && + !(pte_val(pte) & _PAGE_WRITE); + young = pte_val(pte) & _PAGE_YOUNG; + if (none || young) + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + if (prot || (none && young)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else + pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; + return pmd; +} + +static inline pte_t __pmd_to_pte(pmd_t pmd) +{ + pte_t pte; + + /* + * Convert encoding pmd bits pte bits + * ..R...I...y. .IR...wrdytp + * empty ..0...1...0. -> .10...000000 + * prot-none, old ..0...1...1. -> .10...001001 + * prot-none, young ..1...1...1. -> .10...001101 + * read-only, old ..1...1...0. -> .11...011001 + * read-only, young ..1...0...1. -> .01...011101 + * read-write, old ..0...1...0. -> .10...111001 + * read-write, young ..0...0...1. -> .00...111101 + * Huge ptes are dirty by definition + */ + if (pmd_present(pmd)) { + pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY | + (pmd_val(pmd) & PAGE_MASK); + if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) + pte_val(pte) |= _PAGE_INVALID; + if (pmd_prot_none(pmd)) { + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_YOUNG; + } else { + pte_val(pte) |= _PAGE_READ; + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_WRITE; + if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) + pte_val(pte) |= _PAGE_YOUNG; + } + } else + pte_val(pte) = _PAGE_INVALID; + return pte; +} void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *pteptr, pte_t pteval) + pte_t *ptep, pte_t pte) { - pmd_t *pmdp = (pmd_t *) pteptr; - unsigned long mask; + pmd_t pmd; + pmd = __pte_to_pmd(pte); if (!MACHINE_HAS_HPAGE) { - pteptr = (pte_t *) pte_page(pteval)[1].index; - mask = pte_val(pteval) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= pte_page(pte)[1].index; + } else + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; + *(pmd_t *) ptep = pmd; +} + +pte_t huge_ptep_get(pte_t *ptep) +{ + unsigned long origin; + pmd_t pmd; + + pmd = *(pmd_t *) ptep; + if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { + origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= *(unsigned long *) origin; } + return __pmd_to_pte(pmd); +} - pmd_val(*pmdp) = pte_val(pteval); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pmd_t *pmdp = (pmd_t *) ptep; + pte_t pte = huge_ptep_get(ptep); + + if (MACHINE_HAS_IDTE) + __pmd_idte(addr, pmdp); + else + __pmd_csp(pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + return pte; } int arch_prepare_hugepage(struct page *page) @@ -58,7 +164,7 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; - clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) ptep, _PAGE_INVALID, PTRS_PER_PTE * sizeof(pte_t)); page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; @@ -117,6 +223,11 @@ int pud_huge(pud_t pud) return 0; } +int pmd_huge_support(void) +{ + return 1; +} + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmdp, int write) { diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 89ebae4008f2..ad446b0c55b6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -69,6 +69,7 @@ static void __init setup_zero_pages(void) order = 2; break; case 0x2827: /* zEC12 */ + case 0x2828: /* zEC12 */ default: order = 5; break; @@ -135,30 +136,17 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ - reservedpages = 0; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >>10, - initsize >> 10); + mem_init_print_info(NULL); printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&_stext, PFN_ALIGN((unsigned long)&_eshared) - 1); @@ -166,13 +154,14 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 921fa541dc04..d1e0e0c7a7e2 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -14,6 +14,7 @@ #include <linux/gfp.h> #include <linux/cpu.h> #include <asm/ctl_reg.h> +#include <asm/io.h> /* * This function writes to kernel memory bypassing DAT and possible diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c index 3cbd3b8bf311..cca388253a39 100644 --- a/arch/s390/mm/mem_detect.c +++ b/arch/s390/mm/mem_detect.c @@ -123,7 +123,8 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, continue; } else if ((addr <= chunk->addr) && (addr + size >= chunk->addr + chunk->size)) { - memset(chunk, 0 , sizeof(*chunk)); + memmove(chunk, chunk + 1, (MEMORY_CHUNKS-i-1) * sizeof(*chunk)); + memset(&mem_chunk[MEMORY_CHUNKS-1], 0, sizeof(*chunk)); } else if (addr + size < chunk->addr + chunk->size) { chunk->size = chunk->addr + chunk->size - addr - size; chunk->addr = addr + size; diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 06bafec00278..40023290ee5b 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -91,11 +91,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } @@ -176,11 +174,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = s390_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = s390_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 80adfbf75065..990397420e6b 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -118,7 +118,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) pte = pte_offset_kernel(pmd, address); if (!enable) { __ptep_ipte(address, pte); - pte_val(*pte) = _PAGE_TYPE_EMPTY; + pte_val(*pte) = _PAGE_INVALID; continue; } pte_val(*pte) = __pa(address); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a938b548f07e..de8cbc30dcd1 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) struct gmap_rmap *rmap; struct page *page; - if (*table & _SEGMENT_ENTRY_INV) + if (*table & _SEGMENT_ENTRY_INVALID) return 0; page = pfn_to_page(*table >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; @@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) kfree(rmap); break; } - *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; return 1; } @@ -245,7 +245,9 @@ EXPORT_SYMBOL_GPL(gmap_disable); * gmap_alloc_table is assumed to be called with mmap_sem held */ static int gmap_alloc_table(struct gmap *gmap, - unsigned long *table, unsigned long init) + unsigned long *table, unsigned long init) + __releases(&gmap->mm->page_table_lock) + __acquires(&gmap->mm->page_table_lock) { struct page *page; unsigned long *new; @@ -258,7 +260,7 @@ static int gmap_alloc_table(struct gmap *gmap, return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - if (*table & _REGION_ENTRY_INV) { + if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); @@ -292,22 +294,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) for (off = 0; off < len; off += PMD_SIZE) { /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 20) & 0x7ff); /* Clear segment table entry in guest address space. */ flush |= gmap_unlink_segment(gmap, table); - *table = _SEGMENT_ENTRY_INV; + *table = _SEGMENT_ENTRY_INVALID; } out: spin_unlock(&gmap->mm->page_table_lock); @@ -335,7 +337,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; - if (len == 0 || from + len > PGDIR_SIZE || + if (len == 0 || from + len > TASK_MAX_SIZE || from + len < from || to + len < to) return -EINVAL; @@ -345,17 +347,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, for (off = 0; off < len; off += PMD_SIZE) { /* Walk the gmap address space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); @@ -363,7 +365,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, /* Store 'from' address in an invalid segment table entry. */ flush |= gmap_unlink_segment(gmap, table); - *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); + *table = (from + off) | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); } spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); @@ -384,15 +387,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) unsigned long *table; table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 20) & 0x7ff); @@ -422,11 +425,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) return PTR_ERR(segment_ptr); /* Convert the gmap address to an mm address. */ segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INV)) { + if (!(segment & _SEGMENT_ENTRY_INVALID)) { page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); - } else if (segment & _SEGMENT_ENTRY_RO) { + } else if (segment & _SEGMENT_ENTRY_PROTECT) { vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; return vmaddr | (address & ~PMD_MASK); } @@ -517,8 +520,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) page = pfn_to_page(__pa(table) >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; list_for_each_entry_safe(rmap, next, &mp->mapper, list) { - *rmap->entry = - _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); list_del(&rmap->list); kfree(rmap); flush = 1; @@ -545,13 +548,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) /* Convert the gmap address to an mm address. */ while (1) { segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INV)) { + if (!(segment & _SEGMENT_ENTRY_INVALID)) { /* Page table is present */ page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); } - if (!(segment & _SEGMENT_ENTRY_RO)) + if (!(segment & _SEGMENT_ENTRY_PROTECT)) /* Nothing mapped in the gmap address space. */ break; rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); @@ -586,25 +589,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) while (address < to) { /* Walk the gmap address space page table */ table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -687,9 +690,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) continue; /* Set notification bit in the pgste of the pte */ entry = *ptep; - if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= RCP_IN_BIT; + pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); start += PAGE_SIZE; len -= PAGE_SIZE; @@ -731,6 +734,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) spin_unlock(&gmap_notifier_lock); } +static inline int page_table_with_pgste(struct page *page) +{ + return atomic_read(&page->_mapcount) == 0; +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { @@ -750,10 +758,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, mp->vmaddr = vmaddr & PMD_MASK; INIT_LIST_HEAD(&mp->mapper); page->index = (unsigned long) mp; - atomic_set(&page->_mapcount, 3); + atomic_set(&page->_mapcount, 0); table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, + PAGE_SIZE/2); return table; } @@ -771,8 +780,56 @@ static inline void page_table_free_pgste(unsigned long *table) __free_page(page); } +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq) +{ + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(current->mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + unsigned long address, bits, skey; + + address = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set storage key ACC and FP */ + page_set_storage_key(address, skey, !nq); + /* Merge host changed & referenced into pgste */ + pgste_val(new) |= bits << 52; + } + /* changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & + (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) + pgste_val(new) |= PGSTE_HC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(*ptep, ptl); + up_read(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL(set_guest_storage_key); + #else /* CONFIG_PGSTE */ +static inline int page_table_with_pgste(struct page *page) +{ + return 0; +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { @@ -830,7 +887,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) pgtable_page_ctor(page); atomic_set(&page->_mapcount, 1); table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } else { @@ -849,12 +906,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned int bit, mask; - if (mm_has_pgste(mm)) { + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { gmap_disconnect_pgtable(mm, table); return page_table_free_pgste(table); } /* Free 1K/2K page table fragment of a 4K page */ - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); spin_lock_bh(&mm->context.list_lock); if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) @@ -892,14 +949,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) unsigned int bit, mask; mm = tlb->mm; - if (mm_has_pgste(mm)) { + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { gmap_disconnect_pgtable(mm, table); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; } bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) list_del(&page->lru); @@ -911,7 +968,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) tlb_remove_table(tlb, table); } -void __tlb_remove_table(void *_table) +static void __tlb_remove_table(void *_table) { const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; void *table = (void *)((unsigned long) _table & ~mask); @@ -959,7 +1016,6 @@ void tlb_table_flush(struct mmu_gather *tlb) struct mmu_table_batch **batch = &tlb->batch; if (*batch) { - __tlb_flush_mm(tlb->mm); call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } @@ -969,11 +1025,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; + tlb->mm->context.flush_mm = 1; if (*batch == NULL) { *batch = (struct mmu_table_batch *) __get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { - __tlb_flush_mm(tlb->mm); + __tlb_flush_mm_lazy(tlb->mm); tlb_remove_table_one(table); return; } @@ -981,40 +1038,124 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) } (*batch)->tables[(*batch)->nr++] = table; if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_table_flush(tlb); + tlb_flush_mmu(tlb); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -void thp_split_vma(struct vm_area_struct *vma) +static inline void thp_split_vma(struct vm_area_struct *vma) { unsigned long addr; - struct page *page; - for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { - page = follow_page(vma, addr, FOLL_SPLIT); - } + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); } -void thp_split_mm(struct mm_struct *mm) +static inline void thp_split_mm(struct mm_struct *mm) { - struct vm_area_struct *vma = mm->mmap; + struct vm_area_struct *vma; - while (vma != NULL) { + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { thp_split_vma(vma); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; - vma = vma->vm_next; } + mm->def_flags |= VM_NOHUGEPAGE; +} +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, + struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end) +{ + unsigned long next, *table, *new; + struct page *page; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); +again: + if (pmd_none_or_clear_bad(pmd)) + continue; + table = (unsigned long *) pmd_deref(*pmd); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) + continue; + /* Allocate new page table with pgstes */ + new = page_table_alloc_pgste(mm, addr); + if (!new) { + mm->context.has_pgste = 0; + continue; + } + spin_lock(&mm->page_table_lock); + if (likely((unsigned long *) pmd_deref(*pmd) == table)) { + /* Nuke pmd entry pointing to the "short" page table */ + pmdp_flush_lazy(mm, addr, pmd); + pmd_clear(pmd); + /* Copy ptes from old table to new table */ + memcpy(new, table, PAGE_SIZE/2); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + /* Establish new table */ + pmd_populate(mm, pmd, (pte_t *) new); + /* Free old table with rcu, there might be a walker! */ + page_table_free_rcu(tlb, table); + new = NULL; + } + spin_unlock(&mm->page_table_lock); + if (new) { + page_table_free_pgste(new); + goto again; + } + } while (pmd++, addr = next, addr != end); + + return addr; +} + +static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, + struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + next = page_table_realloc_pmd(tlb, mm, pud, addr, next); + } while (pud++, addr = next, addr != end); + + return addr; +} + +static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + next = page_table_realloc_pud(tlb, mm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + /* * switch on pgstes for its userspace process (for kvm) */ int s390_enable_sie(void) { struct task_struct *tsk = current; - struct mm_struct *mm, *old_mm; + struct mm_struct *mm = tsk->mm; + struct mmu_gather tlb; /* Do we have switched amode? If no, we cannot do sie */ if (s390_user_mode == HOME_SPACE_MODE) @@ -1024,57 +1165,16 @@ int s390_enable_sie(void) if (mm_has_pgste(tsk->mm)) return 0; - /* lets check if we are allowed to replace the mm */ - task_lock(tsk); - if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || -#ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || -#endif - tsk->mm != tsk->active_mm) { - task_unlock(tsk); - return -EINVAL; - } - task_unlock(tsk); - - /* we copy the mm and let dup_mm create the page tables with_pgstes */ - tsk->mm->context.alloc_pgste = 1; - /* make sure that both mms have a correct rss state */ - sync_mm_rss(tsk->mm); - mm = dup_mm(tsk); - tsk->mm->context.alloc_pgste = 0; - if (!mm) - return -ENOMEM; - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE + down_write(&mm->mmap_sem); /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); - mm->def_flags |= VM_NOHUGEPAGE; -#endif - - /* Now lets check again if something happened */ - task_lock(tsk); - if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || -#ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || -#endif - tsk->mm != tsk->active_mm) { - mmput(mm); - task_unlock(tsk); - return -EINVAL; - } - - /* ok, we are alone. No ptrace, no threads, etc. */ - old_mm = tsk->mm; - tsk->mm = tsk->active_mm = mm; - preempt_disable(); - update_mm(mm, tsk); - atomic_inc(&mm->context.attach_count); - atomic_dec(&old_mm->context.attach_count); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - preempt_enable(); - task_unlock(tsk); - mmput(old_mm); - return 0; + /* Reallocate the page tables with pgstes */ + mm->context.has_pgste = 1; + tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); + page_table_realloc(&tlb, mm, 0, TASK_SIZE); + tlb_finish_mmu(&tlb, 0, TASK_SIZE); + up_write(&mm->mmap_sem); + return mm->context.has_pgste ? 0 : -ENOMEM; } EXPORT_SYMBOL_GPL(s390_enable_sie); @@ -1117,7 +1217,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, } } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { struct list_head *lh = (struct list_head *) pgtable; @@ -1131,7 +1232,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) mm->pmd_huge_pte = pgtable; } -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { struct list_head *lh; pgtable_t pgtable; @@ -1149,9 +1250,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) list_del(lh); } ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; ptep++; - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; return pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8b268fcc4612..bcfb70b60be6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -69,7 +69,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address) pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); if (!pte) return NULL; - clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) pte, _PAGE_INVALID, PTRS_PER_PTE * sizeof(pte_t)); return pte; } @@ -101,7 +101,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { pud_val(*pu_dir) = __pa(address) | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | - (ro ? _REGION_ENTRY_RO : 0); + (ro ? _REGION_ENTRY_PROTECT : 0); address += PUD_SIZE; continue; } @@ -118,7 +118,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { pmd_val(*pm_dir) = __pa(address) | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | - (ro ? _SEGMENT_ENTRY_RO : 0); + _SEGMENT_ENTRY_YOUNG | + (ro ? _SEGMENT_ENTRY_PROTECT : 0); address += PMD_SIZE; continue; } @@ -131,7 +132,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) } pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0); + pte_val(*pt_dir) = __pa(address) | + pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); address += PAGE_SIZE; } ret = 0; @@ -154,7 +156,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) pte_t *pt_dir; pte_t pte; - pte_val(pte) = _PAGE_TYPE_EMPTY; + pte_val(pte) = _PAGE_INVALID; while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -255,7 +257,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) new_page =__pa(vmem_alloc_pages(0)); if (!new_page) goto out; - pte_val(*pt_dir) = __pa(new_page); + pte_val(*pt_dir) = + __pa(new_page) | pgprot_val(PAGE_KERNEL); } address += PAGE_SIZE; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 82f165f8078c..709239285869 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -9,6 +9,8 @@ #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/filter.h> +#include <linux/random.h> +#include <linux/init.h> #include <asm/cacheflush.h> #include <asm/processor.h> #include <asm/facility.h> @@ -221,6 +223,37 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) EMIT2(0x07fe); } +/* Helper to find the offset of pkt_type in sk_buff + * Make sure its still a 3bit field starting at the MSBs within a byte. + */ +#define PKT_TYPE_MAX 0xe0 +static int pkt_type_offset; + +static int __init bpf_pkt_type_offset_init(void) +{ + struct sk_buff skb_probe = { + .pkt_type = ~0, + }; + char *ct = (char *)&skb_probe; + int off; + + pkt_type_offset = -1; + for (off = 0; off < sizeof(struct sk_buff); off++) { + if (!ct[off]) + continue; + if (ct[off] == PKT_TYPE_MAX) + pkt_type_offset = off; + else { + /* Found non matching bit pattern, fix needed. */ + WARN_ON_ONCE(1); + pkt_type_offset = -1; + return -1; + } + } + return 0; +} +device_initcall(bpf_pkt_type_offset_init); + /* * make sure we dont leak kernel information to user */ @@ -720,6 +753,16 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ EMIT4_DISP(0x88500000, 12); } break; + case BPF_S_ANC_PKTTYPE: + if (pkt_type_offset < 0) + goto out; + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* ic %r5,<d(pkt_type_offset)>(%r2) */ + EMIT4_DISP(0x43502000, pkt_type_offset); + /* srl %r5,5 */ + EMIT4_DISP(0x88500000, 5); + break; case BPF_S_ANC_CPU: /* A = smp_processor_id() */ #ifdef CONFIG_SMP /* l %r5,<d(cpu_nr)> */ @@ -738,8 +781,41 @@ out: return -1; } +/* + * Note: for security reasons, bpf code will follow a randomly + * sized amount of illegal instructions. + */ +struct bpf_binary_header { + unsigned int pages; + u8 image[]; +}; + +static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, + u8 **image_ptr) +{ + struct bpf_binary_header *header; + unsigned int sz, hole; + + /* Most BPF filters are really small, but if some of them fill a page, + * allow at least 128 extra bytes for illegal instructions. + */ + sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE); + header = module_alloc(sz); + if (!header) + return NULL; + memset(header, 0, sz); + header->pages = sz / PAGE_SIZE; + hole = sz - (bpfsize + sizeof(*header)); + /* Insert random number of illegal instructions before BPF code + * and make sure the first instruction starts at an even address. + */ + *image_ptr = &header->image[(prandom_u32() % hole) & -2]; + return header; +} + void bpf_jit_compile(struct sk_filter *fp) { + struct bpf_binary_header *header = NULL; unsigned long size, prg_len, lit_len; struct bpf_jit jit, cjit; unsigned int *addrs; @@ -772,12 +848,11 @@ void bpf_jit_compile(struct sk_filter *fp) } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) { prg_len = jit.prg - jit.start; lit_len = jit.lit - jit.mid; - size = max_t(unsigned long, prg_len + lit_len, - sizeof(struct work_struct)); + size = prg_len + lit_len; if (size >= BPF_SIZE_MAX) goto out; - jit.start = module_alloc(size); - if (!jit.start) + header = bpf_alloc_binary(size, &jit.start); + if (!header) goto out; jit.prg = jit.mid = jit.start + prg_len; jit.lit = jit.end = jit.start + prg_len + lit_len; @@ -788,37 +863,25 @@ void bpf_jit_compile(struct sk_filter *fp) cjit = jit; } if (bpf_jit_enable > 1) { - pr_err("flen=%d proglen=%lu pass=%d image=%p\n", - fp->len, jit.end - jit.start, pass, jit.start); - if (jit.start) { - printk(KERN_ERR "JIT code:\n"); + bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start); + if (jit.start) print_fn_code(jit.start, jit.mid - jit.start); - print_hex_dump(KERN_ERR, "JIT literals:\n", - DUMP_PREFIX_ADDRESS, 16, 1, - jit.mid, jit.end - jit.mid, false); - } } - if (jit.start) + if (jit.start) { + set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *) jit.start; + } out: kfree(addrs); } -static void jit_free_defer(struct work_struct *arg) -{ - module_free(NULL, arg); -} - -/* run from softirq, we must use a work_struct to call - * module_free() from process context - */ void bpf_jit_free(struct sk_filter *fp) { - struct work_struct *work; + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; if (fp->bpf_func == sk_run_filter) return; - work = (struct work_struct *)fp->bpf_func; - INIT_WORK(work, jit_free_defer); - schedule_work(work); + set_memory_rw(addr, header->pages); + module_free(NULL, header); } diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index b5b2916895e0..231cecafc2f1 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -1001,7 +1001,7 @@ int hwsampler_deallocate(void) if (hws_state != HWS_STOPPED) goto deallocate_exit; - measurement_alert_subclass_unregister(); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); deallocate_sdbt(); hws_state = HWS_DEALLOCATED; @@ -1115,7 +1115,7 @@ int hwsampler_shutdown(void) mutex_lock(&hws_sem); if (hws_state == HWS_STOPPED) { - measurement_alert_subclass_unregister(); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); deallocate_sdbt(); } if (hws_wq) { @@ -1190,7 +1190,7 @@ start_all_exit: hws_oom = 1; hws_flush_all = 0; /* now let them in, 1407 CPUMF external interrupts */ - measurement_alert_subclass_register(); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); return 0; } diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h index 1912f3bb190c..0022e1ebfbde 100644 --- a/arch/s390/oprofile/hwsampler.h +++ b/arch/s390/oprofile/hwsampler.h @@ -81,8 +81,8 @@ struct hws_data_entry { unsigned int:16; unsigned int prim_asn:16; /* primary ASN */ unsigned long long ia; /* Instruction Address */ - unsigned long long lpp; /* Logical-Partition Program Param. */ - unsigned long long vpp; /* Virtual-Machine Program Param. */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ }; struct hws_trailer_entry { diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index ffeb17ce7f31..04e1b6a85362 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -346,16 +346,15 @@ static const struct file_operations timer_enabled_fops = { }; -static int oprofile_create_hwsampling_files(struct super_block *sb, - struct dentry *root) +static int oprofile_create_hwsampling_files(struct dentry *root) { struct dentry *dir; - dir = oprofilefs_mkdir(sb, root, "timer"); + dir = oprofilefs_mkdir(root, "timer"); if (!dir) return -EINVAL; - oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); + oprofilefs_create_file(dir, "enabled", &timer_enabled_fops); if (!hwsampler_available) return 0; @@ -376,17 +375,17 @@ static int oprofile_create_hwsampling_files(struct super_block *sb, * and can only be set to 0. */ - dir = oprofilefs_mkdir(sb, root, "0"); + dir = oprofilefs_mkdir(root, "0"); if (!dir) return -EINVAL; - oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); - oprofilefs_create_file(sb, dir, "event", &zero_fops); - oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); - oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); - oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); - oprofilefs_create_file(sb, dir, "user", &user_fops); - oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + oprofilefs_create_file(dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(dir, "event", &zero_fops); + oprofilefs_create_file(dir, "count", &hw_interval_fops); + oprofilefs_create_file(dir, "unit_mask", &zero_fops); + oprofilefs_create_file(dir, "kernel", &kernel_fops); + oprofilefs_create_file(dir, "user", &user_fops); + oprofilefs_create_ulong(dir, "hw_sdbt_blocks", &oprofile_sdbt_blocks); } else { @@ -396,19 +395,19 @@ static int oprofile_create_hwsampling_files(struct super_block *sb, * space tools. The /dev/oprofile/hwsampling fs is * provided in that case. */ - dir = oprofilefs_mkdir(sb, root, "hwsampling"); + dir = oprofilefs_mkdir(root, "hwsampling"); if (!dir) return -EINVAL; - oprofilefs_create_file(sb, dir, "hwsampler", + oprofilefs_create_file(dir, "hwsampler", &hwsampler_fops); - oprofilefs_create_file(sb, dir, "hw_interval", + oprofilefs_create_file(dir, "hw_interval", &hw_interval_fops); - oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", + oprofilefs_create_ro_ulong(dir, "hw_min_interval", &oprofile_min_interval); - oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", + oprofilefs_create_ro_ulong(dir, "hw_max_interval", &oprofile_max_interval); - oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + oprofilefs_create_ulong(dir, "hw_sdbt_blocks", &oprofile_sdbt_blocks); } return 0; @@ -440,7 +439,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) switch (id.machine) { case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; - case 0x2827: ops->cpu_type = "s390/zEC12"; break; + case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; default: return -ENODEV; } } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 086a2e37935d..a9e1dc4ae442 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -2,5 +2,5 @@ # Makefile for the s390 PCI subsystem. # -obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \ +obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e6f15b5d8b7d..f17a8343e360 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -42,78 +42,43 @@ #define SIC_IRQ_MODE_SINGLE 1 #define ZPCI_NR_DMA_SPACES 1 -#define ZPCI_MSI_VEC_BITS 6 #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS /* list of all detected zpci devices */ -LIST_HEAD(zpci_list); -EXPORT_SYMBOL_GPL(zpci_list); -DEFINE_MUTEX(zpci_list_lock); -EXPORT_SYMBOL_GPL(zpci_list_lock); +static LIST_HEAD(zpci_list); +static DEFINE_SPINLOCK(zpci_list_lock); -static struct pci_hp_callback_ops *hotplug_ops; +static void zpci_enable_irq(struct irq_data *data); +static void zpci_disable_irq(struct irq_data *data); + +static struct irq_chip zpci_irq_chip = { + .name = "zPCI", + .irq_unmask = zpci_enable_irq, + .irq_mask = zpci_disable_irq, +}; static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); static DEFINE_SPINLOCK(zpci_domain_lock); -struct callback { - irq_handler_t handler; - void *data; -}; +static struct airq_iv *zpci_aisb_iv; +static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES]; -struct zdev_irq_map { - unsigned long aibv; /* AI bit vector */ - int msi_vecs; /* consecutive MSI-vectors used */ - int __unused; - struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */ - spinlock_t lock; /* protect callbacks against de-reg */ -}; +/* Adapter interrupt definitions */ +static void zpci_irq_handler(struct airq_struct *airq); -struct intr_bucket { - /* amap of adapters, one bit per dev, corresponds to one irq nr */ - unsigned long *alloc; - /* AI summary bit, global page for all devices */ - unsigned long *aisb; - /* pointer to aibv and callback data in zdev */ - struct zdev_irq_map *imap[ZPCI_NR_DEVICES]; - /* protects the whole bucket struct */ - spinlock_t lock; +static struct airq_struct zpci_airq = { + .handler = zpci_irq_handler, + .isc = PCI_ISC, }; -static struct intr_bucket *bucket; - -/* Adapter local summary indicator */ -static u8 *zpci_irq_si; - -static atomic_t irq_retries = ATOMIC_INIT(0); - /* I/O Map */ static DEFINE_SPINLOCK(zpci_iomap_lock); static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); struct zpci_iomap_entry *zpci_iomap_start; EXPORT_SYMBOL_GPL(zpci_iomap_start); -/* highest irq summary bit */ -static int __read_mostly aisb_max; - -static struct kmem_cache *zdev_irq_cache; static struct kmem_cache *zdev_fmb_cache; -static inline int irq_to_msi_nr(unsigned int irq) -{ - return irq & ZPCI_MSI_MASK; -} - -static inline int irq_to_dev_nr(unsigned int irq) -{ - return irq >> ZPCI_MSI_VEC_BITS; -} - -static inline struct zdev_irq_map *get_imap(unsigned int irq) -{ - return bucket->imap[irq_to_dev_nr(irq)]; -} - struct zpci_dev *get_zdev(struct pci_dev *pdev) { return (struct zpci_dev *) pdev->sysdata; @@ -123,22 +88,17 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; - mutex_lock(&zpci_list_lock); + spin_lock(&zpci_list_lock); list_for_each_entry(tmp, &zpci_list, entry) { if (tmp->fid == fid) { zdev = tmp; break; } } - mutex_unlock(&zpci_list_lock); + spin_unlock(&zpci_list_lock); return zdev; } -bool zpci_fid_present(u32 fid) -{ - return (get_zdev_by_fid(fid) != NULL) ? true : false; -} - static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) { return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; @@ -157,8 +117,7 @@ int pci_proc_domain(struct pci_bus *bus) EXPORT_SYMBOL_GPL(pci_proc_domain); /* Modify PCI: Register adapter interruptions */ -static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, - u64 aibv) +static int zpci_set_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); struct zpci_fib *fib; @@ -169,14 +128,14 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, return -ENOMEM; fib->isc = PCI_ISC; - fib->noi = zdev->irq_map->msi_vecs; fib->sum = 1; /* enable summary notifications */ - fib->aibv = aibv; - fib->aibvo = 0; /* every function has its own page */ - fib->aisb = (u64) bucket->aisb + aisb / 8; - fib->aisbo = aisb & ZPCI_MSI_MASK; + fib->noi = airq_iv_end(zdev->aibv); + fib->aibv = (unsigned long) zdev->aibv->vector; + fib->aibvo = 0; /* each zdev has its own interrupt vector */ + fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8; + fib->aisbo = zdev->aisb & 63; - rc = s390pci_mod_fc(req, fib); + rc = zpci_mod_fc(req, fib); pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi); free_page((unsigned long) fib); @@ -206,7 +165,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args fib->iota = args->iota; fib->fmb_addr = args->fmb_addr; - rc = s390pci_mod_fc(req, fib); + rc = zpci_mod_fc(req, fib); free_page((unsigned long) fib); return rc; } @@ -231,7 +190,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) } /* Modify PCI: Unregister adapter interruptions */ -static int zpci_unregister_airq(struct zpci_dev *zdev) +static int zpci_clear_airq(struct zpci_dev *zdev) { struct mod_pci_args args = { 0, 0, 0, 0 }; @@ -280,7 +239,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) u64 data; int rc; - rc = s390pci_load(&data, req, offset); + rc = zpci_load(&data, req, offset); if (!rc) { data = data << ((8 - len) * 8); data = le64_to_cpu(data); @@ -298,58 +257,46 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) data = cpu_to_le64(data); data = data >> ((8 - len) * 8); - rc = s390pci_store(data, req, offset); + rc = zpci_store(data, req, offset); return rc; } -void synchronize_irq(unsigned int irq) +static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) { - /* - * Not needed, the handler is protected by a lock and IRQs that occur - * after the handler is deleted are just NOPs. - */ -} -EXPORT_SYMBOL_GPL(synchronize_irq); + int offset, pos; + u32 mask_bits; -void enable_irq(unsigned int irq) -{ - struct msi_desc *msi = irq_get_msi_desc(irq); - - zpci_msi_set_mask_bits(msi, 1, 0); -} -EXPORT_SYMBOL_GPL(enable_irq); - -void disable_irq(unsigned int irq) -{ - struct msi_desc *msi = irq_get_msi_desc(irq); + if (msi->msi_attrib.is_msix) { + offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + msi->masked = readl(msi->mask_base + offset); + writel(flag, msi->mask_base + offset); + } else if (msi->msi_attrib.maskbit) { + pos = (long) msi->mask_base; + pci_read_config_dword(msi->dev, pos, &mask_bits); + mask_bits &= ~(mask); + mask_bits |= flag & mask; + pci_write_config_dword(msi->dev, pos, mask_bits); + } else + return 0; - zpci_msi_set_mask_bits(msi, 1, 1); + msi->msi_attrib.maskbit = !!flag; + return 1; } -EXPORT_SYMBOL_GPL(disable_irq); -void disable_irq_nosync(unsigned int irq) +static void zpci_enable_irq(struct irq_data *data) { - disable_irq(irq); -} -EXPORT_SYMBOL_GPL(disable_irq_nosync); + struct msi_desc *msi = irq_get_msi_desc(data->irq); -unsigned long probe_irq_on(void) -{ - return 0; + zpci_msi_set_mask_bits(msi, 1, 0); } -EXPORT_SYMBOL_GPL(probe_irq_on); -int probe_irq_off(unsigned long val) +static void zpci_disable_irq(struct irq_data *data) { - return 0; -} -EXPORT_SYMBOL_GPL(probe_irq_off); + struct msi_desc *msi = irq_get_msi_desc(data->irq); -unsigned int probe_irq_mask(unsigned long val) -{ - return val; + zpci_msi_set_mask_bits(msi, 1, 1); } -EXPORT_SYMBOL_GPL(probe_irq_mask); void pcibios_fixup_bus(struct pci_bus *bus) { @@ -434,156 +381,165 @@ static struct pci_ops pci_root_ops = { .write = pci_write, }; -/* store the last handled bit to implement fair scheduling of devices */ -static DEFINE_PER_CPU(unsigned long, next_sbit); - -static void zpci_irq_handler(void *dont, void *need) +static void zpci_irq_handler(struct airq_struct *airq) { - unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit); - int rescan = 0, max = aisb_max; - struct zdev_irq_map *imap; + unsigned long si, ai; + struct airq_iv *aibv; + int irqs_on = 0; inc_irq_stat(IRQIO_PCI); - sbit = start; - -scan: - /* find summary_bit */ - for_each_set_bit_left_cont(sbit, bucket->aisb, max) { - clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6)); - last = sbit; + for (si = 0;;) { + /* Scan adapter summary indicator bit vector */ + si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv)); + if (si == -1UL) { + if (irqs_on++) + /* End of second scan with interrupts on. */ + break; + /* First scan complete, reenable interrupts. */ + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + si = 0; + continue; + } - /* find vector bit */ - imap = bucket->imap[sbit]; - for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) { + /* Scan the adapter interrupt vector for this device. */ + aibv = zpci_aibv[si]; + for (ai = 0;;) { + ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv)); + if (ai == -1UL) + break; inc_irq_stat(IRQIO_MSI); - clear_bit(63 - mbit, &imap->aibv); - - spin_lock(&imap->lock); - if (imap->cb[mbit].handler) - imap->cb[mbit].handler(mbit, - imap->cb[mbit].data); - spin_unlock(&imap->lock); + airq_iv_lock(aibv, ai); + generic_handle_irq(airq_iv_get_data(aibv, ai)); + airq_iv_unlock(aibv, ai); } } - - if (rescan) - goto out; - - /* scan the skipped bits */ - if (start > 0) { - sbit = 0; - max = start; - start = 0; - goto scan; - } - - /* enable interrupts again */ - set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); - - /* check again to not lose initiative */ - rmb(); - max = aisb_max; - sbit = find_first_bit_left(bucket->aisb, max); - if (sbit != max) { - atomic_inc(&irq_retries); - rescan++; - goto scan; - } -out: - /* store next device bit to scan */ - __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last; } -/* msi_vecs - number of requested interrupts, 0 place function to error state */ -static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs) +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct zpci_dev *zdev = get_zdev(pdev); - unsigned int aisb, msi_nr; + unsigned int hwirq, irq, msi_vecs; + unsigned long aisb; struct msi_desc *msi; + struct msi_msg msg; int rc; - /* store the number of used MSI vectors */ - zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS); - - spin_lock(&bucket->lock); - aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE); - /* alloc map exhausted? */ - if (aisb == PAGE_SIZE) { - spin_unlock(&bucket->lock); - return -EIO; - } - set_bit(aisb, bucket->alloc); - spin_unlock(&bucket->lock); + pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec); + if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) + return -EINVAL; + msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); + msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI); + /* Allocate adapter summary indicator bit */ + rc = -EIO; + aisb = airq_iv_alloc_bit(zpci_aisb_iv); + if (aisb == -1UL) + goto out; zdev->aisb = aisb; - if (aisb + 1 > aisb_max) - aisb_max = aisb + 1; - /* wire up IRQ shortcut pointer */ - bucket->imap[zdev->aisb] = zdev->irq_map; - pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map); + /* Create adapter interrupt vector */ + rc = -ENOMEM; + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); + if (!zdev->aibv) + goto out_si; - /* TODO: irq number 0 wont be found if we return less than requested MSIs. - * ignore it for now and fix in common code. - */ - msi_nr = aisb << ZPCI_MSI_VEC_BITS; + /* Wire up shortcut pointer */ + zpci_aibv[aisb] = zdev->aibv; + /* Request MSI interrupts */ + hwirq = 0; list_for_each_entry(msi, &pdev->msi_list, list) { - rc = zpci_setup_msi_irq(zdev, msi, msi_nr, - aisb << ZPCI_MSI_VEC_BITS); + rc = -EIO; + irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ + if (irq == NO_IRQ) + goto out_msi; + rc = irq_set_msi_desc(irq, msi); if (rc) - return rc; - msi_nr++; + goto out_msi; + irq_set_chip_and_handler(irq, &zpci_irq_chip, + handle_simple_irq); + msg.data = hwirq; + msg.address_lo = zdev->msi_addr & 0xffffffff; + msg.address_hi = zdev->msi_addr >> 32; + write_msi_msg(irq, &msg); + airq_iv_set_data(zdev->aibv, hwirq, irq); + hwirq++; } - rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv); - if (rc) { - clear_bit(aisb, bucket->alloc); - dev_err(&pdev->dev, "register MSI failed with: %d\n", rc); - return rc; + /* Enable adapter interrupts */ + rc = zpci_set_airq(zdev); + if (rc) + goto out_msi; + + return (msi_vecs == nvec) ? 0 : msi_vecs; + +out_msi: + list_for_each_entry(msi, &pdev->msi_list, list) { + if (hwirq-- == 0) + break; + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; } - return (zdev->irq_map->msi_vecs == msi_vecs) ? - 0 : zdev->irq_map->msi_vecs; + zpci_aibv[aisb] = NULL; + airq_iv_release(zdev->aibv); +out_si: + airq_iv_free_bit(zpci_aisb_iv, aisb); +out: + dev_err(&pdev->dev, "register MSI failed with: %d\n", rc); + return rc; } -static void zpci_teardown_msi(struct pci_dev *pdev) +void arch_teardown_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); struct msi_desc *msi; - int aisb, rc; + int rc; - rc = zpci_unregister_airq(zdev); + pr_info("%s: on pdev: %p\n", __func__, pdev); + + /* Disable adapter interrupts */ + rc = zpci_clear_airq(zdev); if (rc) { dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc); return; } - msi = list_first_entry(&pdev->msi_list, struct msi_desc, list); - aisb = irq_to_dev_nr(msi->irq); - - list_for_each_entry(msi, &pdev->msi_list, list) - zpci_teardown_msi_irq(zdev, msi); + /* Release MSI interrupts */ + list_for_each_entry(msi, &pdev->msi_list, list) { + zpci_msi_set_mask_bits(msi, 1, 1); + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; + } - clear_bit(aisb, bucket->alloc); - if (aisb + 1 == aisb_max) - aisb_max--; + zpci_aibv[zdev->aisb] = NULL; + airq_iv_release(zdev->aibv); + airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +static void zpci_map_resources(struct zpci_dev *zdev) { - pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec); - if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) - return -EINVAL; - return zpci_setup_msi(pdev, nvec); -} + struct pci_dev *pdev = zdev->pdev; + resource_size_t len; + int i; -void arch_teardown_msi_irqs(struct pci_dev *pdev) -{ - pr_info("%s: on pdev: %p\n", __func__, pdev); - zpci_teardown_msi(pdev); + for (i = 0; i < PCI_BAR_COUNT; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0); + pdev->resource[i].end = pdev->resource[i].start + len - 1; + } } -static void zpci_map_resources(struct zpci_dev *zdev) +static void zpci_unmap_resources(struct zpci_dev *zdev) { struct pci_dev *pdev = zdev->pdev; resource_size_t len; @@ -593,12 +549,9 @@ static void zpci_map_resources(struct zpci_dev *zdev) len = pci_resource_len(pdev, i); if (!len) continue; - pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0); - pdev->resource[i].end = pdev->resource[i].start + len - 1; - pr_debug("BAR%i: -> start: %Lx end: %Lx\n", - i, pdev->resource[i].start, pdev->resource[i].end); + pci_iounmap(pdev, (void *) pdev->resource[i].start); } -}; +} struct zpci_dev *zpci_alloc_device(void) { @@ -606,178 +559,47 @@ struct zpci_dev *zpci_alloc_device(void) /* Alloc memory for our private pci device data */ zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); - if (!zdev) - return ERR_PTR(-ENOMEM); - - /* Alloc aibv & callback space */ - zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL); - if (!zdev->irq_map) - goto error; - WARN_ON((u64) zdev->irq_map & 0xff); - return zdev; - -error: - kfree(zdev); - return ERR_PTR(-ENOMEM); + return zdev ? : ERR_PTR(-ENOMEM); } void zpci_free_device(struct zpci_dev *zdev) { - kmem_cache_free(zdev_irq_cache, zdev->irq_map); kfree(zdev); } -/* - * Too late for any s390 specific setup, since interrupts must be set up - * already which requires DMA setup too and the pci scan will access the - * config space, which only works if the function handle is enabled. - */ -int pcibios_enable_device(struct pci_dev *pdev, int mask) -{ - struct resource *res; - u16 cmd; - int i; - - pci_read_config_word(pdev, PCI_COMMAND, &cmd); - - for (i = 0; i < PCI_BAR_COUNT; i++) { - res = &pdev->resource[i]; - - if (res->flags & IORESOURCE_IO) - return -EINVAL; - - if (res->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - pci_write_config_word(pdev, PCI_COMMAND, cmd); - return 0; -} - int pcibios_add_platform_entries(struct pci_dev *pdev) { return zpci_sysfs_add_device(&pdev->dev); } -int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data) -{ - int msi_nr = irq_to_msi_nr(irq); - struct zdev_irq_map *imap; - struct msi_desc *msi; - - msi = irq_get_msi_desc(irq); - if (!msi) - return -EIO; - - imap = get_imap(irq); - spin_lock_init(&imap->lock); - - pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr); - imap->cb[msi_nr].handler = handler; - imap->cb[msi_nr].data = data; - - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - zpci_msi_set_mask_bits(msi, 1, 0); - return 0; -} - -void zpci_free_irq(unsigned int irq) -{ - struct zdev_irq_map *imap = get_imap(irq); - int msi_nr = irq_to_msi_nr(irq); - unsigned long flags; - - pr_debug("%s: for irq: %d\n", __func__, irq); - - spin_lock_irqsave(&imap->lock, flags); - imap->cb[msi_nr].handler = NULL; - imap->cb[msi_nr].data = NULL; - spin_unlock_irqrestore(&imap->lock, flags); -} - -int request_irq(unsigned int irq, irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n", - __func__, irq, handler, irqflags, devname); - - return zpci_request_irq(irq, handler, dev_id); -} -EXPORT_SYMBOL_GPL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) -{ - zpci_free_irq(irq); -} -EXPORT_SYMBOL_GPL(free_irq); - static int __init zpci_irq_init(void) { - int cpu, rc; - - bucket = kzalloc(sizeof(*bucket), GFP_KERNEL); - if (!bucket) - return -ENOMEM; - - bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!bucket->aisb) { - rc = -ENOMEM; - goto out_aisb; - } - - bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!bucket->alloc) { - rc = -ENOMEM; - goto out_alloc; - } + int rc; - isc_register(PCI_ISC); - zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC); - if (IS_ERR(zpci_irq_si)) { - rc = PTR_ERR(zpci_irq_si); - zpci_irq_si = NULL; - goto out_ai; - } + rc = register_adapter_interrupt(&zpci_airq); + if (rc) + goto out; + /* Set summary to 1 to be called every time for the ISC. */ + *zpci_airq.lsi_ptr = 1; - for_each_online_cpu(cpu) - per_cpu(next_sbit, cpu) = 0; + rc = -ENOMEM; + zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC); + if (!zpci_aisb_iv) + goto out_airq; - spin_lock_init(&bucket->lock); - /* set summary to 1 to be called every time for the ISC */ - *zpci_irq_si = 1; - set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; -out_ai: - isc_unregister(PCI_ISC); - free_page((unsigned long) bucket->alloc); -out_alloc: - free_page((unsigned long) bucket->aisb); -out_aisb: - kfree(bucket); +out_airq: + unregister_adapter_interrupt(&zpci_airq); +out: return rc; } static void zpci_irq_exit(void) { - free_page((unsigned long) bucket->alloc); - free_page((unsigned long) bucket->aisb); - s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC); - isc_unregister(PCI_ISC); - kfree(bucket); -} - -void zpci_debug_info(struct zpci_dev *zdev, struct seq_file *m) -{ - if (!zdev) - return; - - seq_printf(m, "global irq retries: %u\n", atomic_read(&irq_retries)); - seq_printf(m, "aibv[0]:%016lx aibv[1]:%016lx aisb:%016lx\n", - get_imap(0)->aibv, get_imap(1)->aibv, *bucket->aisb); + airq_iv_release(zpci_aisb_iv); + unregister_adapter_interrupt(&zpci_airq); } static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size, @@ -834,15 +656,58 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry) int pcibios_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); + struct resource *res; + int i; + + zdev->pdev = pdev; + zpci_map_resources(zdev); + + for (i = 0; i < PCI_BAR_COUNT; i++) { + res = &pdev->resource[i]; + if (res->parent || !res->flags) + continue; + pci_claim_resource(pdev, i); + } + + return 0; +} + +int pcibios_enable_device(struct pci_dev *pdev, int mask) +{ + struct zpci_dev *zdev = get_zdev(pdev); + struct resource *res; + u16 cmd; + int i; zdev->pdev = pdev; zpci_debug_init_device(zdev); zpci_fmb_enable_device(zdev); zpci_map_resources(zdev); + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + for (i = 0; i < PCI_BAR_COUNT; i++) { + res = &pdev->resource[i]; + + if (res->flags & IORESOURCE_IO) + return -EINVAL; + + if (res->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + pci_write_config_word(pdev, PCI_COMMAND, cmd); return 0; } +void pcibios_disable_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zpci_unmap_resources(zdev); + zpci_fmb_disable_device(zdev); + zpci_debug_exit_device(zdev); + zdev->pdev = NULL; +} + static int zpci_scan_bus(struct zpci_dev *zdev) { struct resource *res; @@ -921,6 +786,8 @@ int zpci_enable_device(struct zpci_dev *zdev) rc = zpci_dma_init_device(zdev); if (rc) goto out_dma; + + zdev->state = ZPCI_FN_STATE_ONLINE; return 0; out_dma: @@ -949,18 +816,16 @@ int zpci_create_device(struct zpci_dev *zdev) rc = zpci_enable_device(zdev); if (rc) goto out_free; - - zdev->state = ZPCI_FN_STATE_ONLINE; } rc = zpci_scan_bus(zdev); if (rc) goto out_disable; - mutex_lock(&zpci_list_lock); + spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); - if (hotplug_ops) - hotplug_ops->create_slot(zdev); - mutex_unlock(&zpci_list_lock); + spin_unlock(&zpci_list_lock); + + zpci_init_slot(zdev); return 0; @@ -983,25 +848,6 @@ void zpci_stop_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_stop_device); -int zpci_scan_device(struct zpci_dev *zdev) -{ - zdev->pdev = pci_scan_single_device(zdev->bus, ZPCI_DEVFN); - if (!zdev->pdev) { - pr_err("pci_scan_single_device failed for fid: 0x%x\n", - zdev->fid); - goto out; - } - - pci_bus_add_devices(zdev->bus); - - return 0; -out: - zpci_dma_exit_device(zdev); - clp_disable_fh(zdev); - return -EIO; -} -EXPORT_SYMBOL_GPL(zpci_scan_device); - static inline int barsize(u8 size) { return (size) ? (1 << size) >> 10 : 0; @@ -1009,15 +855,10 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { - zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map), - L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL); - if (!zdev_irq_cache) - goto error_zdev; - zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), 16, 0, NULL); if (!zdev_fmb_cache) - goto error_fmb; + goto error_zdev; /* TODO: use realloc */ zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start), @@ -1028,8 +869,6 @@ static int zpci_mem_init(void) error_iomap: kmem_cache_destroy(zdev_fmb_cache); -error_fmb: - kmem_cache_destroy(zdev_irq_cache); error_zdev: return -ENOMEM; } @@ -1037,28 +876,10 @@ error_zdev: static void zpci_mem_exit(void) { kfree(zpci_iomap_start); - kmem_cache_destroy(zdev_irq_cache); kmem_cache_destroy(zdev_fmb_cache); } -void zpci_register_hp_ops(struct pci_hp_callback_ops *ops) -{ - mutex_lock(&zpci_list_lock); - hotplug_ops = ops; - mutex_unlock(&zpci_list_lock); -} -EXPORT_SYMBOL_GPL(zpci_register_hp_ops); - -void zpci_deregister_hp_ops(void) -{ - mutex_lock(&zpci_list_lock); - hotplug_ops = NULL; - mutex_unlock(&zpci_list_lock); -} -EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops); - -unsigned int s390_pci_probe; -EXPORT_SYMBOL_GPL(s390_pci_probe); +static unsigned int s390_pci_probe; char * __init pcibios_setup(char *str) { @@ -1086,16 +907,12 @@ static int __init pci_base_init(void) rc = zpci_debug_init(); if (rc) - return rc; + goto out; rc = zpci_mem_init(); if (rc) goto out_mem; - rc = zpci_msihash_init(); - if (rc) - goto out_hash; - rc = zpci_irq_init(); if (rc) goto out_irq; @@ -1104,7 +921,7 @@ static int __init pci_base_init(void) if (rc) goto out_dma; - rc = clp_find_pci_devices(); + rc = clp_scan_pci_devices(); if (rc) goto out_find; @@ -1115,11 +932,15 @@ out_find: out_dma: zpci_irq_exit(); out_irq: - zpci_msihash_exit(); -out_hash: zpci_mem_exit(); out_mem: zpci_debug_exit(); +out: return rc; } -subsys_initcall(pci_base_init); +subsys_initcall_sync(pci_base_init); + +void zpci_rescan(void) +{ + clp_rescan_pci_devices_simple(); +} diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index bd34359d1546..475563c3d1e4 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -36,9 +36,9 @@ static inline u8 clp_instr(void *data) return cc; } -static void *clp_alloc_block(void) +static void *clp_alloc_block(gfp_t gfp_mask) { - return (void *) __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE)); + return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE)); } static void clp_free_block(void *ptr) @@ -70,7 +70,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) struct clp_req_rsp_query_pci_grp *rrb; int rc; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -113,7 +113,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) struct clp_req_rsp_query_pci *rrb; int rc; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -179,9 +179,9 @@ error: static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; - int rc, retries = 1000; + int rc, retries = 100; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -199,7 +199,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) retries--; if (retries < 0) break; - msleep(1); + msleep(20); } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); @@ -236,7 +236,6 @@ int clp_disable_fh(struct zpci_dev *zdev) if (!zdev_enabled(zdev)) return 0; - dev_info(&zdev->pdev->dev, "disabling fn handle: 0x%x\n", fh); rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); if (!rc) /* Success -> store disabled handle in zdev */ @@ -246,49 +245,12 @@ int clp_disable_fh(struct zpci_dev *zdev) return rc; } -static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry) +static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, + void (*cb)(struct clp_fh_list_entry *entry)) { - int present, rc; - - if (!entry->vendor_id) - return; - - /* TODO: be a little bit more scalable */ - present = zpci_fid_present(entry->fid); - - if (present) - pr_debug("%s: device %x already present\n", __func__, entry->fid); - - /* skip already used functions */ - if (present && entry->config_state) - return; - - /* aev 306: function moved to stand-by state */ - if (present && !entry->config_state) { - /* - * The handle is already disabled, that means no iota/irq freeing via - * the firmware interfaces anymore. Need to free resources manually - * (DMA memory, debug, sysfs)... - */ - zpci_stop_device(get_zdev_by_fid(entry->fid)); - return; - } - - rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state); - if (rc) - pr_err("Failed to add fid: 0x%x\n", entry->fid); -} - -int clp_find_pci_devices(void) -{ - struct clp_req_rsp_list_pci *rrb; u64 resume_token = 0; int entries, i, rc; - rrb = clp_alloc_block(); - if (!rrb) - return -ENOMEM; - do { memset(rrb, 0, sizeof(*rrb)); rrb->request.hdr.len = sizeof(rrb->request); @@ -317,12 +279,101 @@ int clp_find_pci_devices(void) resume_token = rrb->response.resume_token; for (i = 0; i < entries; i++) - clp_check_pcifn_entry(&rrb->response.fh_list[i]); + cb(&rrb->response.fh_list[i]); } while (resume_token); pr_debug("Maximum number of supported PCI functions: %u\n", rrb->response.max_fn); out: + return rc; +} + +static void __clp_add(struct clp_fh_list_entry *entry) +{ + if (!entry->vendor_id) + return; + + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); +} + +static void __clp_rescan(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) { + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); + return; + } + + if (!entry->config_state) { + /* + * The handle is already disabled, that means no iota/irq freeing via + * the firmware interfaces anymore. Need to free resources manually + * (DMA memory, debug, sysfs)... + */ + zpci_stop_device(zdev); + } +} + +static void __clp_update(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) + return; + + zdev->fh = entry->fh; +} + +int clp_scan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_add); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_rescan); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices_simple(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_NOWAIT); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_update); + clp_free_block(rrb); return rc; } diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 771b82359af4..75c69b402e05 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -115,27 +115,6 @@ static const struct file_operations debugfs_pci_perf_fops = { .release = single_release, }; -static int pci_debug_show(struct seq_file *m, void *v) -{ - struct zpci_dev *zdev = m->private; - - zpci_debug_info(zdev, m); - return 0; -} - -static int pci_debug_seq_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, pci_debug_show, - file_inode(filp)->i_private); -} - -static const struct file_operations debugfs_pci_debug_fops = { - .open = pci_debug_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - void zpci_debug_init_device(struct zpci_dev *zdev) { zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), @@ -149,19 +128,11 @@ void zpci_debug_init_device(struct zpci_dev *zdev) &debugfs_pci_perf_fops); if (IS_ERR(zdev->debugfs_perf)) zdev->debugfs_perf = NULL; - - zdev->debugfs_debug = debugfs_create_file("debug", - S_IFREG | S_IRUGO | S_IWUSR, - zdev->debugfs_dev, zdev, - &debugfs_pci_debug_fops); - if (IS_ERR(zdev->debugfs_debug)) - zdev->debugfs_debug = NULL; } void zpci_debug_exit_device(struct zpci_dev *zdev) { debugfs_remove(zdev->debugfs_perf); - debugfs_remove(zdev->debugfs_debug); debugfs_remove(zdev->debugfs_dev); } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f8e69d5bc0a9..7e5573acb063 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -10,6 +10,7 @@ #include <linux/export.h> #include <linux/iommu-helper.h> #include <linux/dma-mapping.h> +#include <linux/vmalloc.h> #include <linux/pci.h> #include <asm/pci_dma.h> @@ -170,8 +171,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, */ goto no_refresh; - rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, - nr_pages * PAGE_SIZE); + rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, + nr_pages * PAGE_SIZE); no_refresh: spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); @@ -263,7 +264,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); unsigned long nr_pages, iommu_page_index; unsigned long pa = page_to_phys(page) + offset; int flags = ZPCI_PTE_VALID; @@ -304,7 +305,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); unsigned long iommu_page_index; int npages; @@ -323,7 +324,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); struct page *page; unsigned long pa; dma_addr_t map; @@ -407,7 +408,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int zpci_dma_init_device(struct zpci_dev *zdev) { - unsigned int bitmap_order; int rc; spin_lock_init(&zdev->iommu_bitmap_lock); @@ -421,12 +421,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - bitmap_order = get_order(zdev->iommu_pages / 8); - pr_info("iommu_size: 0x%lx iommu_pages: 0x%lx bitmap_order: %i\n", - zdev->iommu_size, zdev->iommu_pages, bitmap_order); - - zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, - bitmap_order); + zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { rc = -ENOMEM; goto out_reg; @@ -451,8 +446,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev) { zpci_unregister_ioat(zdev, 0); dma_cleanup_tables(zdev); - free_pages((unsigned long) zdev->iommu_bitmap, - get_order(zdev->iommu_pages / 8)); + vfree(zdev->iommu_bitmap); zdev->iommu_bitmap = NULL; zdev->next_bit = 0; } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index ec62e3a0dc09..0aecaf954845 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -69,7 +69,7 @@ static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf) clp_add_pci_device(ccdf->fid, ccdf->fh, 0); break; case 0x0306: - clp_find_pci_devices(); + clp_rescan_pci_devices(); break; default: break; diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 22eeb9d7ffeb..85267c058af8 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -27,7 +27,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) return cc; } -int s390pci_mod_fc(u64 req, struct zpci_fib *fib) +int zpci_mod_fc(u64 req, struct zpci_fib *fib) { u8 cc, status; @@ -61,7 +61,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) return cc; } -int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) +int zpci_refresh_trans(u64 fn, u64 addr, u64 range) { u8 cc, status; @@ -78,7 +78,7 @@ int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) } /* Set Interruption Controls */ -void set_irq_ctrl(u16 ctl, char *unused, u8 isc) +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) { asm volatile ( " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" @@ -109,7 +109,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_load(u64 *data, u64 req, u64 offset) +int zpci_load(u64 *data, u64 req, u64 offset) { u8 status; int cc; @@ -125,7 +125,7 @@ int s390pci_load(u64 *data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_load); +EXPORT_SYMBOL_GPL(zpci_load); /* PCI Store */ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) @@ -147,7 +147,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_store(u64 data, u64 req, u64 offset) +int zpci_store(u64 data, u64 req, u64 offset) { u8 status; int cc; @@ -163,7 +163,7 @@ int s390pci_store(u64 data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_store); +EXPORT_SYMBOL_GPL(zpci_store); /* PCI Store Block */ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) @@ -183,7 +183,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_store_block(const u64 *data, u64 req, u64 offset) +int zpci_store_block(const u64 *data, u64 req, u64 offset) { u8 status; int cc; @@ -199,4 +199,4 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_store_block); +EXPORT_SYMBOL_GPL(zpci_store_block); diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c deleted file mode 100644 index b097aed05a9b..000000000000 --- a/arch/s390/pci/pci_msi.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright IBM Corp. 2012 - * - * Author(s): - * Jan Glauber <jang@linux.vnet.ibm.com> - */ - -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt - -#include <linux/kernel.h> -#include <linux/err.h> -#include <linux/rculist.h> -#include <linux/hash.h> -#include <linux/pci.h> -#include <linux/msi.h> -#include <asm/hw_irq.h> - -/* mapping of irq numbers to msi_desc */ -static struct hlist_head *msi_hash; -static const unsigned int msi_hash_bits = 8; -#define MSI_HASH_BUCKETS (1U << msi_hash_bits) -#define msi_hashfn(nr) hash_long(nr, msi_hash_bits) - -static DEFINE_SPINLOCK(msi_map_lock); - -struct msi_desc *__irq_get_msi_desc(unsigned int irq) -{ - struct msi_map *map; - - hlist_for_each_entry_rcu(map, - &msi_hash[msi_hashfn(irq)], msi_chain) - if (map->irq == irq) - return map->msi; - return NULL; -} - -int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) -{ - if (msi->msi_attrib.is_msix) { - int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - msi->masked = readl(msi->mask_base + offset); - writel(flag, msi->mask_base + offset); - } else { - if (msi->msi_attrib.maskbit) { - int pos; - u32 mask_bits; - - pos = (long) msi->mask_base; - pci_read_config_dword(msi->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(msi->dev, pos, mask_bits); - } else { - return 0; - } - } - - msi->msi_attrib.maskbit = !!flag; - return 1; -} - -int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi, - unsigned int nr, int offset) -{ - struct msi_map *map; - struct msi_msg msg; - int rc; - - map = kmalloc(sizeof(*map), GFP_KERNEL); - if (map == NULL) - return -ENOMEM; - - map->irq = nr; - map->msi = msi; - zdev->msi_map[nr & ZPCI_MSI_MASK] = map; - INIT_HLIST_NODE(&map->msi_chain); - - pr_debug("%s hashing irq: %u to bucket nr: %llu\n", - __func__, nr, msi_hashfn(nr)); - hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]); - - spin_lock(&msi_map_lock); - rc = irq_set_msi_desc(nr, msi); - if (rc) { - spin_unlock(&msi_map_lock); - hlist_del_rcu(&map->msi_chain); - kfree(map); - zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL; - return rc; - } - spin_unlock(&msi_map_lock); - - msg.data = nr - offset; - msg.address_lo = zdev->msi_addr & 0xffffffff; - msg.address_hi = zdev->msi_addr >> 32; - write_msi_msg(nr, &msg); - return 0; -} - -void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi) -{ - int irq = msi->irq & ZPCI_MSI_MASK; - struct msi_map *map; - - msi->msg.address_lo = 0; - msi->msg.address_hi = 0; - msi->msg.data = 0; - msi->irq = 0; - zpci_msi_set_mask_bits(msi, 1, 1); - - spin_lock(&msi_map_lock); - map = zdev->msi_map[irq]; - hlist_del_rcu(&map->msi_chain); - kfree(map); - zdev->msi_map[irq] = NULL; - spin_unlock(&msi_map_lock); -} - -/* - * The msi hash table has 256 entries which is good for 4..20 - * devices (a typical device allocates 10 + CPUs MSI's). Maybe make - * the hash table size adjustable later. - */ -int __init zpci_msihash_init(void) -{ - unsigned int i; - - msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL); - if (!msi_hash) - return -ENOMEM; - - for (i = 0; i < MSI_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&msi_hash[i]); - return 0; -} - -void __init zpci_msihash_exit(void) -{ - kfree(msi_hash); -} diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index a42cce69d0a0..cf8a12ff733b 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -15,48 +15,71 @@ static ssize_t show_fid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%08x\n", zdev->fid); - return strlen(buf); + return sprintf(buf, "0x%08x\n", zdev->fid); } static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL); static ssize_t show_fh(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%08x\n", zdev->fh); - return strlen(buf); + return sprintf(buf, "0x%08x\n", zdev->fh); } static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL); static ssize_t show_pchid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%04x\n", zdev->pchid); - return strlen(buf); + return sprintf(buf, "0x%04x\n", zdev->pchid); } static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL); static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%02x\n", zdev->pfgid); - return strlen(buf); + return sprintf(buf, "0x%02x\n", zdev->pfgid); } static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); +static void recover_callback(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); + int ret; + + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + return; + + ret = zpci_enable_device(zdev); + if (ret) + return; + + pci_rescan_bus(zdev->bus); +} + +static ssize_t store_recover(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc = device_schedule_callback(dev, recover_callback); + return rc ? rc : count; +} +static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover); + static struct device_attribute *zpci_dev_attrs[] = { &dev_attr_function_id, &dev_attr_function_handle, &dev_attr_pchid, &dev_attr_pfgid, + &dev_attr_recover, NULL, }; |