52 files changed, 551 insertions, 2176 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 59c216af6264..2167bce993ff 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -102,13 +102,13 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_KEEP_MEMBLOCK
-	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	select ARCH_WANT_DEFAULT_BPF_JIT
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS2
@@ -451,14 +451,6 @@ config NR_CPUS
 config HOTPLUG_CPU
 	def_bool y
 
-# Some NUMA nodes have memory ranges that span
-# other nodes.	Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node.	See memmap_init_zone()
-# for details. <- They meant memory holes!
-config NODES_SPAN_OTHER_NODES
-	def_bool NUMA
-
 config NUMA
 	bool "NUMA support"
 	depends on SCHED_TOPOLOGY
@@ -468,58 +460,9 @@ config NUMA
 
 	  This option adds NUMA support to the kernel.
 
-	  An operation mode can be selected by appending
-	  numa=<method> to the kernel command line.
-
-	  The default behaviour is identical to appending numa=plain to
-	  the command line. This will create just one node with all
-	  available memory and all CPUs in it.
-
 config NODES_SHIFT
-	int "Maximum NUMA nodes (as a power of 2)"
-	range 1 10
-	depends on NUMA
-	default "4"
-	help
-	  Specify the maximum number of NUMA nodes available on the target
-	  system. Increases memory reserved to accommodate various tables.
-
-menu "Select NUMA modes"
-	depends on NUMA
-
-config NUMA_EMU
-	bool "NUMA emulation"
-	default y
-	help
-	  Numa emulation mode will split the available system memory into
-	  equal chunks which then are distributed over the configured number
-	  of nodes in a round-robin manner.
-
-	  The number of fake nodes is limited by the number of available memory
-	  chunks (i.e. memory size / fake size) and the number of supported
-	  nodes in the kernel.
-
-	  The CPUs are assigned to the nodes in a way that partially respects
-	  the original machine topology (if supported by the machine).
-	  Fair distribution of the CPUs is not guaranteed.
-
-config EMU_SIZE
-	hex "NUMA emulation memory chunk size"
-	default 0x10000000
-	range 0x400000 0x100000000
-	depends on NUMA_EMU
-	help
-	  Select the default size by which the memory is chopped and then
-	  assigned to emulated NUMA nodes.
-
-	  This can be overridden by specifying
-
-	  emu_size=<n>
-
-	  on the kernel command line where also suffixes K, M, G, and T are
-	  supported.
-
-endmenu
+	int
+	default "1"
 
 config SCHED_SMT
 	def_bool n
@@ -867,15 +810,6 @@ config SECCOMP
 
 	  If unsure, say Y.
 
-menu "Power Management"
-
-config ARCH_HIBERNATION_POSSIBLE
-	def_bool y
-
-source "kernel/power/Kconfig"
-
-endmenu
-
 config CCW
 	def_bool y
 
@@ -1010,7 +944,6 @@ config S390_GUEST
 	select TTY
 	select VIRTUALIZATION
 	select VIRTIO
-	select VIRTIO_CONSOLE
 	help
 	  Enabling this option adds support for virtio based paravirtual device
 	  drivers on s390.
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 54f375627532..8bf46d705957 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -75,7 +75,7 @@ struct appldata_os_data {
 				   (waiting for I/O)               */
 
 	/* per cpu data */
-	struct appldata_os_per_cpu os_cpu[0];
+	struct appldata_os_per_cpu os_cpu[];
 } __attribute__((packed));
 
 static struct appldata_os_data *appldata_os_data;
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index bed227f267ae..515b27a996b3 100644
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -21,15 +21,10 @@
 if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
 if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
 
-# Default install - same as make zlilo
+echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
+     "bootloader config required" >&2
+if [ -f $4/vmlinuz-$1 ]; then mv $4/vmlinuz-$1 $4/vmlinuz-$1.old; fi
+if [ -f $4/System.map-$1 ]; then mv $4/System.map-$1 $4/System.map-$1.old; fi
 
-if [ -f $4/vmlinuz ]; then
-	mv $4/vmlinuz $4/vmlinuz.old
-fi
-
-if [ -f $4/System.map ]; then
-	mv $4/System.map $4/System.old
-fi
-
-cat $2 > $4/vmlinuz
-cp $3 $4/System.map
+cat $2 > $4/vmlinuz-$1
+cp $3 $4/System.map-$1
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 0c86ba19fa2b..46038bc58c9e 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -532,6 +532,7 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_SERIO is not set
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_NULL_TTY=m
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
 CONFIG_HANGCHECK_TIMER=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 6b27d861a9a3..7cd0648c1f4e 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -528,6 +528,7 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_SERIO is not set
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_NULL_TTY=m
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
 CONFIG_HANGCHECK_TIMER=m
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 1c23d84a9097..73044634d342 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -342,6 +342,7 @@ static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 		memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
 		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
+	memzero_explicit(&param, sizeof(param));
 	return ret;
 }
 
@@ -470,6 +471,8 @@ static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 			 walk.dst.virt.addr, walk.src.virt.addr, n);
 		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
+	memzero_explicit(&pcc_param, sizeof(pcc_param));
+	memzero_explicit(&xts_param, sizeof(xts_param));
 	return ret;
 }
 
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index adae176757ae..9078b5b6b837 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -7,6 +7,5 @@
 
 void __init init_airq_interrupts(void);
 void __init init_cio_interrupts(void);
-void __init init_ext_interrupts(void);
 
 #endif
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 084e71b7272a..b63bd66404b8 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -119,6 +119,7 @@ enum diag308_subcode  {
 	DIAG308_LOAD_NORMAL_DUMP = 4,
 	DIAG308_SET = 5,
 	DIAG308_STORE = 6,
+	DIAG308_LOAD_NORMAL = 7,
 };
 
 enum diag308_rc {
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 237ee0c4169f..612ed3c6d581 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -141,7 +141,9 @@ struct lowcore {
 
 	/* br %r1 trampoline */
 	__u16	br_r1_trampoline;		/* 0x0400 */
-	__u8	pad_0x0402[0x0e00-0x0402];	/* 0x0402 */
+	__u32	return_lpswe;			/* 0x0402 */
+	__u32	return_mcck_lpswe;		/* 0x0406 */
+	__u8	pad_0x040a[0x0e00-0x040a];	/* 0x040a */
 
 	/*
 	 * 0xe00 contains the address of the IPL Parameter Information
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index e21b618ad432..e12ff0f29d1a 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -34,8 +34,6 @@ typedef struct {
 	unsigned int uses_cmm:1;
 	/* The gmaps associated with this context are allowed to use huge pages. */
 	unsigned int allow_gmap_hpage_1m:1;
-	/* The mmu context is for compat task */
-	unsigned int compat_mm:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						   \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index afa836014076..c9f3d8a52756 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -18,6 +18,8 @@
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	unsigned long asce_type, init_entry;
+
 	spin_lock_init(&mm->context.lock);
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
@@ -26,7 +28,6 @@ static inline int init_new_context(struct task_struct *tsk,
 	atomic_set(&mm->context.is_protected, 0);
 	mm->context.gmap_asce = 0;
 	mm->context.flush_mm = 0;
-	mm->context.compat_mm = test_thread_flag(TIF_31BIT);
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste ||
 		test_thread_flag(TIF_PGSTE) ||
@@ -37,33 +38,34 @@ static inline int init_new_context(struct task_struct *tsk,
 	mm->context.allow_gmap_hpage_1m = 0;
 #endif
 	switch (mm->context.asce_limit) {
-	case _REGION2_SIZE:
+	default:
 		/*
-		 * forked 3-level task, fall through to set new asce with new
-		 * mm->pgd
+		 * context created by exec, the value of asce_limit can
+		 * only be zero in this case
 		 */
-	case 0:
-		/* context created by exec, set asce limit to 4TB */
-		mm->context.asce_limit = STACK_TOP_MAX;
-		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+		VM_BUG_ON(mm->context.asce_limit);
+		/* continue as 3-level task */
+		mm->context.asce_limit = _REGION2_SIZE;
+		fallthrough;
+	case _REGION2_SIZE:
+		/* forked 3-level task */
+		init_entry = _REGION3_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION3;
 		break;
-	case -PAGE_SIZE:
-		/* forked 5-level task, set new asce with new_mm->pgd */
-		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-			_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+	case TASK_SIZE_MAX:
+		/* forked 5-level task */
+		init_entry = _REGION1_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION1;
 		break;
 	case _REGION1_SIZE:
-		/* forked 4-level task, set new asce with new mm->pgd */
-		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				   _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+		/* forked 4-level task */
+		init_entry = _REGION2_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION2;
 		break;
-	case _REGION3_SIZE:
-		/* forked 2-level compat task, set new asce with new mm->pgd */
-		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
 	}
-	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
+	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			   _ASCE_USER_BITS | asce_type;
+	crst_table_init((unsigned long *) mm->pgd, init_entry);
 	return 0;
 }
 
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
index 35f8cbe7e5bb..23cd5d1b734b 100644
--- a/arch/s390/include/asm/numa.h
+++ b/arch/s390/include/asm/numa.h
@@ -13,24 +13,13 @@
 #ifdef CONFIG_NUMA
 
 #include <linux/numa.h>
-#include <linux/cpumask.h>
 
 void numa_setup(void);
-int numa_pfn_to_nid(unsigned long pfn);
-int __node_distance(int a, int b);
-void numa_update_cpu_topology(void);
-
-extern cpumask_t node_to_cpumask_map[MAX_NUMNODES];
-extern int numa_debug_enabled;
 
 #else
 
 static inline void numa_setup(void) { }
-static inline void numa_update_cpu_topology(void) { }
-static inline int numa_pfn_to_nid(unsigned long pfn)
-{
-	return 0;
-}
 
 #endif /* CONFIG_NUMA */
+
 #endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 62440a82731a..f2d4c1bd3429 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -166,20 +166,20 @@ int arch_make_page_accessible(struct page *page);
 #define __pa(x)			((unsigned long)(x))
 #define __va(x)			((void *)(unsigned long)(x))
 
-#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn)	__va((pfn) << PAGE_SHIFT)
+#define phys_to_pfn(phys)	((phys) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(phys)	pfn_to_page(phys_to_pfn(phys))
+#define page_to_phys(page)	pfn_to_phys(page_to_pfn(page))
+
+#define pfn_to_virt(pfn)	__va(pfn_to_phys(pfn))
+#define virt_to_pfn(kaddr)	(phys_to_pfn(__pa(kaddr)))
 #define pfn_to_kaddr(pfn)	pfn_to_virt(pfn)
 
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define page_to_virt(page)	pfn_to_virt(page_to_pfn(page))
 
-#define phys_to_pfn(kaddr)	((kaddr) >> PAGE_SHIFT)
-#define pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
-
-#define phys_to_page(kaddr)	pfn_to_page(phys_to_pfn(kaddr))
-#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
-
-#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(virt_to_pfn(kaddr))
 
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b05187ce5dbd..7485ee561fec 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -5,6 +5,7 @@
 #include <linux/pci.h>
 #include <linux/mutex.h>
 #include <linux/iommu.h>
+#include <linux/pci_hotplug.h>
 #include <asm-generic/pci.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_debug.h>
@@ -25,6 +26,7 @@ int pci_proc_domain(struct pci_bus *);
 
 #define ZPCI_NR_DMA_SPACES		1
 #define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
+#define ZPCI_DOMAIN_BITMAP_SIZE		(1 << 16)
 
 /* PCI Function Controls */
 #define ZPCI_FC_FN_ENABLED		0x80
@@ -96,6 +98,7 @@ struct s390_domain;
 struct zpci_dev {
 	struct pci_bus	*bus;
 	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
+	struct hotplug_slot hotplug_slot;
 
 	enum zpci_state state;
 	u32		fid;		/* function ID, used by sclp */
@@ -186,6 +189,9 @@ int clp_enable_fh(struct zpci_dev *, u8);
 int clp_disable_fh(struct zpci_dev *);
 int clp_get_state(u32 fid, enum zpci_state *state);
 
+/* UID */
+void update_uid_checking(bool new);
+
 /* IOMMU Interface */
 int zpci_init_iommu(struct zpci_dev *zdev);
 void zpci_destroy_iommu(struct zpci_dev *zdev);
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 77606c4acd58..74a352f8c0d1 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -34,19 +34,21 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry)
 	memset64((u64 *)crst, entry, _CRST_ENTRIES);
 }
 
-static inline unsigned long pgd_entry_type(struct mm_struct *mm)
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
+
+static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr,
+					     unsigned long len)
 {
-	if (mm_pmd_folded(mm))
-		return _SEGMENT_ENTRY_EMPTY;
-	if (mm_pud_folded(mm))
-		return _REGION3_ENTRY_EMPTY;
-	if (mm_p4d_folded(mm))
-		return _REGION2_ENTRY_EMPTY;
-	return _REGION1_ENTRY_EMPTY;
-}
+	int rc;
 
-int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
-void crst_table_downgrade(struct mm_struct *);
+	if (addr + len > mm->context.asce_limit &&
+	    addr + len <= TASK_SIZE) {
+		rc = crst_table_upgrade(mm, addr + len);
+		if (rc)
+			return (unsigned long) rc;
+	}
+	return addr;
+}
 
 static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
 {
@@ -116,24 +118,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	unsigned long *table = crst_table_alloc(mm);
-
-	if (!table)
-		return NULL;
-	if (mm->context.asce_limit == _REGION3_SIZE) {
-		/* Forking a compat process with 2 page table levels */
-		if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
-			crst_table_free(mm, table);
-			return NULL;
-		}
-	}
-	return (pgd_t *) table;
+	return (pgd_t *) crst_table_alloc(mm);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	if (mm->context.asce_limit == _REGION3_SIZE)
-		pgtable_pmd_page_dtor(virt_to_page(pgd));
 	crst_table_free(mm, (unsigned long *) pgd);
 }
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index aadb3d0e2adc..555d148ccf32 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -92,15 +92,15 @@ extern void __bpon(void);
  */
 
 #define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_31BIT) ? \
-					(1UL << 31) : -PAGE_SIZE)
+					_REGION3_SIZE : TASK_SIZE_MAX)
 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
-					(1UL << 30) : (1UL << 41))
+					(_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
 #define TASK_SIZE		TASK_SIZE_OF(current)
 #define TASK_SIZE_MAX		(-PAGE_SIZE)
 
 #define STACK_TOP		(test_thread_flag(TIF_31BIT) ? \
-					(1UL << 31) : (1UL << 42))
-#define STACK_TOP_MAX		(1UL << 42)
+					_REGION3_SIZE : _REGION2_SIZE)
+#define STACK_TOP_MAX		_REGION2_SIZE
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
@@ -161,6 +161,7 @@ typedef struct thread_struct thread_struct;
 #define INIT_THREAD {							\
 	.ksp = sizeof(init_stack) + (unsigned long) &init_stack,	\
 	.fpu.regs = (void *) init_task.thread.fpu.fprs,			\
+	.last_break = 1,						\
 }
 
 /*
@@ -177,7 +178,6 @@ typedef struct thread_struct thread_struct;
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_BA;			\
 	regs->psw.addr	= new_psw;					\
 	regs->gprs[15]	= new_stackp;					\
-	crst_table_downgrade(current->mm);				\
 	execve_tail();							\
 } while (0)
 
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index b241ddb67caf..534f212753d6 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -8,6 +8,7 @@
 
 #include <linux/bits.h>
 #include <uapi/asm/setup.h>
+#include <linux/build_bug.h>
 
 #define EP_OFFSET		0x10008
 #define EP_STRING		"S390EP"
@@ -162,6 +163,12 @@ static inline unsigned long kaslr_offset(void)
 	return __kaslr_offset;
 }
 
+static inline u32 gen_lpswe(unsigned long addr)
+{
+	BUILD_BUG_ON(addr > 0xfff);
+	return 0xb2b20000 | addr;
+}
+
 #else /* __ASSEMBLY__ */
 
 #define IPL_DEVICE	(IPL_DEVICE_OFFSET)
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index b157a81fb977..231a51e870fe 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -34,6 +34,7 @@ extern int smp_vcpu_scheduled(int cpu);
 extern void smp_yield_cpu(int cpu);
 extern void smp_cpu_set_polarization(int cpu, int val);
 extern int smp_cpu_get_polarization(int cpu);
+extern int smp_cpu_get_cpu_address(int cpu);
 extern void smp_fill_possible_mask(void);
 extern void smp_detect_cpus(void);
 
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index cca406fdbe51..fbb507504a3b 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -16,8 +16,8 @@ struct cpu_topology_s390 {
 	unsigned short socket_id;
 	unsigned short book_id;
 	unsigned short drawer_id;
-	unsigned short node_id;
 	unsigned short dedicated : 1;
+	int booted_cores;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
@@ -25,7 +25,6 @@ struct cpu_topology_s390 {
 };
 
 extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
-extern cpumask_t cpus_with_topology;
 
 #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
 #define topology_thread_id(cpu)		  (cpu_topology[cpu].thread_id)
@@ -37,6 +36,7 @@ extern cpumask_t cpus_with_topology;
 #define topology_drawer_id(cpu)		  (cpu_topology[cpu].drawer_id)
 #define topology_drawer_cpumask(cpu)	  (&cpu_topology[cpu].drawer_mask)
 #define topology_cpu_dedicated(cpu)	  (cpu_topology[cpu].dedicated)
+#define topology_booted_cores(cpu)	  (cpu_topology[cpu].booted_cores)
 
 #define mc_capable() 1
 
@@ -45,6 +45,7 @@ int topology_cpu_init(struct cpu *);
 int topology_set_cpu_management(int fc);
 void topology_schedule_update(void);
 void store_topology(struct sysinfo_15_1_x *info);
+void update_cpu_masks(void);
 void topology_expect_change(void);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 
@@ -54,6 +55,8 @@ static inline void topology_init_early(void) { }
 static inline void topology_schedule_update(void) { }
 static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
 static inline int topology_cpu_dedicated(int cpu_nr) { return 0; }
+static inline int topology_booted_cores(int cpu_nr) { return 1; }
+static inline void update_cpu_masks(void) { }
 static inline void topology_expect_change(void) { }
 
 #endif /* CONFIG_SCHED_TOPOLOGY */
@@ -71,19 +74,23 @@ static inline void topology_expect_change(void) { }
 #define cpu_to_node cpu_to_node
 static inline int cpu_to_node(int cpu)
 {
-	return cpu_topology[cpu].node_id;
+	return 0;
 }
 
 /* Returns a pointer to the cpumask of CPUs on node 'node'. */
 #define cpumask_of_node cpumask_of_node
 static inline const struct cpumask *cpumask_of_node(int node)
 {
-	return &node_to_cpumask_map[node];
+	return cpu_possible_mask;
 }
 
 #define pcibus_to_node(bus) __pcibus_to_node(bus)
 
 #define node_distance(a, b) __node_distance(a, b)
+static inline int __node_distance(int a, int b)
+{
+	return 0;
+}
 
 #else /* !CONFIG_NUMA */
 
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 33d4de233c5b..75f26d775027 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -54,7 +54,6 @@ CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
 
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_SCHED_TOPOLOGY)	+= topology.o
-obj-$(CONFIG_HIBERNATION)	+= suspend.o swsusp.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
 obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ce33406cfe83..e80f0e6f5972 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -124,6 +124,8 @@ int main(void)
 	OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
 	OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
 	OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr);
+	OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe);
+	OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe);
 	OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
 	OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
 	OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index e9dac9a24d3f..61f2b0412345 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -84,7 +84,7 @@ static int show_diag_stat(struct seq_file *m, void *v)
 
 static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
 {
-	return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+	return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL;
 }
 
 static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 9205add8481d..3ae64914bd14 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -115,26 +115,29 @@ _LPP_OFFSET	= __LC_LPP
 
 	.macro	SWITCH_ASYNC savearea,timer
 	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	1f
+	jnz	2f
 	lgr	%r14,%r9
+	cghi	%r14,__LC_RETURN_LPSWE
+	je	0f
 	slg	%r14,BASED(.Lcritical_start)
 	clg	%r14,BASED(.Lcritical_length)
-	jhe	0f
+	jhe	1f
+0:
 	lghi	%r11,\savearea		# inside critical section, do cleanup
 	brasl	%r14,cleanup_critical
 	tmhh	%r8,0x0001		# retest problem state after cleanup
-	jnz	1f
-0:	lg	%r14,__LC_ASYNC_STACK	# are we already on the target stack?
+	jnz	2f
+1:	lg	%r14,__LC_ASYNC_STACK	# are we already on the target stack?
 	slgr	%r14,%r15
 	srag	%r14,%r14,STACK_SHIFT
-	jnz	2f
+	jnz	3f
 	CHECK_STACK \savearea
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	3f
-1:	UPDATE_VTIME %r14,%r15,\timer
+	j	4f
+2:	UPDATE_VTIME %r14,%r15,\timer
 	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-2:	lg	%r15,__LC_ASYNC_STACK	# load async stack
-3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+3:	lg	%r15,__LC_ASYNC_STACK	# load async stack
+4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	.endm
 
 	.macro UPDATE_VTIME w1,w2,enter_timer
@@ -401,7 +404,7 @@ ENTRY(system_call)
 	stpt	__LC_EXIT_TIMER
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 	lmg	%r11,%r15,__PT_R11(%r11)
-	lpswe	__LC_RETURN_PSW
+	b	__LC_RETURN_LPSWE(%r0)
 .Lsysc_done:
 
 #
@@ -608,43 +611,50 @@ ENTRY(pgm_check_handler)
 	BPOFF
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	lg	%r10,__LC_LAST_BREAK
-	lg	%r12,__LC_CURRENT
+	srag	%r11,%r10,12
+	jnz	0f
+	/* if __LC_LAST_BREAK is < 4096, it contains one of
+	 * the lpswe addresses in lowcore. Set it to 1 (initial state)
+	 * to prevent leaking that address to userspace.
+	 */
+	lghi	%r10,1
+0:	lg	%r12,__LC_CURRENT
 	lghi	%r11,0
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
 	tmhh	%r8,0x0001		# test problem state bit
-	jnz	2f			# -> fault in user space
+	jnz	3f			# -> fault in user space
 #if IS_ENABLED(CONFIG_KVM)
 	# cleanup critical section for program checks in sie64a
 	lgr	%r14,%r9
 	slg	%r14,BASED(.Lsie_critical_start)
 	clg	%r14,BASED(.Lsie_critical_length)
-	jhe	0f
+	jhe	1f
 	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
 	lghi	%r11,_PIF_GUEST_FAULT
 #endif
-0:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
-	jnz	1f			# -> enabled, can't be a double fault
+1:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
+	jnz	2f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
-1:	CHECK_STACK __LC_SAVE_AREA_SYNC
+2:	CHECK_STACK __LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	# CHECK_VMAP_STACK branches to stack_overflow or 4f
-	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-2:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
+	# CHECK_VMAP_STACK branches to stack_overflow or 5f
+	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f
+3:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
 	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
 	lg	%r15,__LC_KERNEL_STACK
 	lgr	%r14,%r12
 	aghi	%r14,__TASK_thread	# pointer to thread_struct
 	lghi	%r13,__LC_PGM_TDB
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
-	jz	3f
+	jz	4f
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-3:	stg	%r10,__THREAD_last_break(%r14)
-4:	lgr	%r13,%r11
+4:	stg	%r10,__THREAD_last_break(%r14)
+5:	lgr	%r13,%r11
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	# clear user controlled registers to prevent speculative use
@@ -663,14 +673,14 @@ ENTRY(pgm_check_handler)
 	stg	%r13,__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	5f
+	jz	6f
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-5:	REENABLE_IRQS
+6:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
@@ -775,7 +785,7 @@ ENTRY(io_int_handler)
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 .Lio_exit_kernel:
 	lmg	%r11,%r15,__PT_R11(%r11)
-	lpswe	__LC_RETURN_PSW
+	b	__LC_RETURN_LPSWE(%r0)
 .Lio_done:
 
 #
@@ -1214,7 +1224,7 @@ ENTRY(mcck_int_handler)
 	stpt	__LC_EXIT_TIMER
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 0:	lmg	%r11,%r15,__PT_R11(%r11)
-	lpswe	__LC_RETURN_MCCK_PSW
+	b	__LC_RETURN_MCCK_LPSWE
 
 .Lmcck_panic:
 	lg	%r15,__LC_NODAT_STACK
@@ -1271,6 +1281,8 @@ ENDPROC(stack_overflow)
 #endif
 
 ENTRY(cleanup_critical)
+	cghi	%r9,__LC_RETURN_LPSWE
+	je	.Lcleanup_lpswe
 #if IS_ENABLED(CONFIG_KVM)
 	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
 	jl	0f
@@ -1424,6 +1436,7 @@ ENDPROC(cleanup_critical)
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
 	mvc	0(64,%r11),__PT_R8(%r9)
 	lmg	%r0,%r7,__PT_R0(%r9)
+.Lcleanup_lpswe:
 1:	lmg	%r8,%r9,__LC_RETURN_PSW
 	BR_EX	%r14,%r11
 .Lcleanup_sysc_restore_insn:
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 6837affc19e8..4a71061974fd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -144,6 +144,9 @@ static struct ipl_parameter_block *dump_block_ccw;
 
 static struct sclp_ipl_info sclp_ipl_info;
 
+static bool reipl_fcp_clear;
+static bool reipl_ccw_clear;
+
 static inline int __diag308(unsigned long subcode, void *addr)
 {
 	register unsigned long _addr asm("0") = (unsigned long) addr;
@@ -691,6 +694,21 @@ static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
 	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
 					    reipl_fcp_loadparm_store);
 
+static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_fcp_clear);
+}
+
+static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t len)
+{
+	if (strtobool(buf, &reipl_fcp_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
 static struct attribute *reipl_fcp_attrs[] = {
 	&sys_reipl_fcp_device_attr.attr,
 	&sys_reipl_fcp_wwpn_attr.attr,
@@ -706,6 +724,9 @@ static struct attribute_group reipl_fcp_attr_group = {
 	.bin_attrs = reipl_fcp_bin_attrs,
 };
 
+static struct kobj_attribute sys_reipl_fcp_clear_attr =
+	__ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store);
+
 /* CCW reipl device attributes */
 DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
 
@@ -741,16 +762,36 @@ static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
 	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
 					    reipl_ccw_loadparm_store);
 
+static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_ccw_clear);
+}
+
+static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t len)
+{
+	if (strtobool(buf, &reipl_ccw_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_ccw_clear_attr =
+	__ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store);
+
 static struct attribute *reipl_ccw_attrs_vm[] = {
 	&sys_reipl_ccw_device_attr.attr,
 	&sys_reipl_ccw_loadparm_attr.attr,
 	&sys_reipl_ccw_vmparm_attr.attr,
+	&sys_reipl_ccw_clear_attr.attr,
 	NULL,
 };
 
 static struct attribute *reipl_ccw_attrs_lpar[] = {
 	&sys_reipl_ccw_device_attr.attr,
 	&sys_reipl_ccw_loadparm_attr.attr,
+	&sys_reipl_ccw_clear_attr.attr,
 	NULL,
 };
 
@@ -892,11 +933,17 @@ static void __reipl_run(void *unused)
 	switch (reipl_type) {
 	case IPL_TYPE_CCW:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		diag308(DIAG308_LOAD_CLEAR, NULL);
+		if (reipl_ccw_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 		break;
 	case IPL_TYPE_FCP:
 		diag308(DIAG308_SET, reipl_block_fcp);
-		diag308(DIAG308_LOAD_CLEAR, NULL);
+		if (reipl_fcp_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
 		break;
 	case IPL_TYPE_NSS:
 		diag308(DIAG308_SET, reipl_block_nss);
@@ -1008,11 +1055,16 @@ static int __init reipl_fcp_init(void)
 	}
 
 	rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
-	if (rc) {
-		kset_unregister(reipl_fcp_kset);
-		free_page((unsigned long) reipl_block_fcp);
-		return rc;
-	}
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_fcp_kset->kobj,
+				       &sys_reipl_fcp_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else
+		reipl_fcp_clear = true;
 
 	if (ipl_info.type == IPL_TYPE_FCP) {
 		memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
@@ -1032,6 +1084,13 @@ static int __init reipl_fcp_init(void)
 	}
 	reipl_capabilities |= IPL_TYPE_FCP;
 	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+out1:
+	kset_unregister(reipl_fcp_kset);
+	free_page((unsigned long) reipl_block_fcp);
+	return rc;
 }
 
 static int __init reipl_type_init(void)
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 8371855042dc..3514420f0259 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -95,14 +95,6 @@ static const struct irq_class irqclass_sub_desc[] = {
 	{.irq = CPU_RST,    .name = "RST", .desc = "[CPU] CPU Restart"},
 };
 
-void __init init_IRQ(void)
-{
-	BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
-	init_cio_interrupts();
-	init_airq_interrupts();
-	init_ext_interrupts();
-}
-
 void do_IRQ(struct pt_regs *regs, int irq)
 {
 	struct pt_regs *old_regs;
@@ -294,12 +286,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy)
 	return IRQ_HANDLED;
 }
 
-static struct irqaction external_interrupt = {
-	.name	 = "EXT",
-	.handler = do_ext_interrupt,
-};
-
-void __init init_ext_interrupts(void)
+static void __init init_ext_interrupts(void)
 {
 	int idx;
 
@@ -308,7 +295,16 @@ void __init init_ext_interrupts(void)
 
 	irq_set_chip_and_handler(EXT_INTERRUPT,
 				 &dummy_irq_chip, handle_percpu_irq);
-	setup_irq(EXT_INTERRUPT, &external_interrupt);
+	if (request_irq(EXT_INTERRUPT, do_ext_interrupt, 0, "EXT", NULL))
+		panic("Failed to register EXT interrupt\n");
+}
+
+void __init init_IRQ(void)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+	init_cio_interrupts();
+	init_airq_interrupts();
+	init_ext_interrupts();
 }
 
 static DEFINE_SPINLOCK(irq_subclass_lock);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index cb8b1cc285c9..3a854cb5a4c6 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -14,7 +14,6 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
-#include <linux/suspend.h>
 #include <asm/cio.h>
 #include <asm/setup.h>
 #include <asm/pgtable.h>
@@ -39,36 +38,6 @@ extern const unsigned long long relocate_kernel_len;
 #ifdef CONFIG_CRASH_DUMP
 
 /*
- * PM notifier callback for kdump
- */
-static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
-			       void *ptr)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		if (kexec_crash_image)
-			arch_kexec_unprotect_crashkres();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (kexec_crash_image)
-			arch_kexec_protect_crashkres();
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-	return NOTIFY_OK;
-}
-
-static int __init machine_kdump_pm_init(void)
-{
-	pm_notifier(machine_kdump_pm_cb, 0);
-	return 0;
-}
-arch_initcall(machine_kdump_pm_init);
-
-/*
  * Reset the system, copy boot CPU registers to absolute zero,
  * and jump to the kdump image
  */
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 8b33e03e47b8..1e3df52b2b65 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -238,6 +238,64 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
 CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
 CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
 
+CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCERROR, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
 static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
 	CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
 	CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS),
@@ -516,6 +574,67 @@ static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = {
 	NULL,
 };
 
+static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CC),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CCERROR),
+	CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
 /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
 
 static struct attribute_group cpumcf_pmu_events_group = {
@@ -624,9 +743,11 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
 		break;
 	case 0x3906:
 	case 0x3907:
+		model = cpumcf_z14_pmu_event_attr;
+		break;
 	case 0x8561:
 	case 0x8562:
-		model = cpumcf_z14_pmu_event_attr;
+		model = cpumcf_z15_pmu_event_attr;
 		break;
 	default:
 		model = none;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b095b1c78987..85a711d783eb 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -372,28 +372,33 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
 
 static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
-	unsigned long n_sdb, freq, factor;
+	unsigned long n_sdb, freq;
 	size_t sample_size;
 
 	/* Calculate sampling buffers using 4K pages
 	 *
-	 *    1. Determine the sample data size which depends on the used
-	 *	 sampling functions, for example, basic-sampling or
-	 *	 basic-sampling with diagnostic-sampling.
+	 *    1. The sampling size is 32 bytes for basic sampling. This size
+	 *	 is the same for all machine types. Diagnostic
+	 *	 sampling uses auxlilary data buffer setup which provides the
+	 *	 memory for SDBs using linux common code auxiliary trace
+	 *	 setup.
 	 *
-	 *    2. Use the sampling frequency as input.  The sampling buffer is
-	 *	 designed for almost one second.  This can be adjusted through
-	 *	 the "factor" variable.
-	 *	 In any case, alloc_sampling_buffer() sets the Alert Request
+	 *    2. Function alloc_sampling_buffer() sets the Alert Request
 	 *	 Control indicator to trigger a measurement-alert to harvest
-	 *	 sample-data-blocks (sdb).
+	 *	 sample-data-blocks (SDB). This is done per SDB. This
+	 *	 measurement alert interrupt fires quick enough to handle
+	 *	 one SDB, on very high frequency and work loads there might
+	 *	 be 2 to 3 SBDs available for sample processing.
+	 *	 Currently there is no need for setup alert request on every
+	 *	 n-th page. This is counterproductive as one IRQ triggers
+	 *	 a very high number of samples to be processed at one IRQ.
 	 *
-	 *    3. Compute the number of sample-data-blocks and ensure a minimum
-	 *	 of CPUM_SF_MIN_SDB.  Also ensure the upper limit does not
-	 *	 exceed a "calculated" maximum.  The symbolic maximum is
-	 *	 designed for basic-sampling only and needs to be increased if
-	 *	 diagnostic-sampling is active.
-	 *	 See also the remarks for these symbolic constants.
+	 *    3. Use the sampling frequency as input.
+	 *	 Compute the number of SDBs and ensure a minimum
+	 *	 of CPUM_SF_MIN_SDB.  Depending on frequency add some more
+	 *	 SDBs to handle a higher sampling rate.
+	 *	 Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples
+	 *	 (one SDB) for every 10000 HZ frequency increment.
 	 *
 	 *    4. Compute the number of sample-data-block-tables (SDBT) and
 	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
@@ -401,10 +406,7 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 	 */
 	sample_size = sizeof(struct hws_basic_entry);
 	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
-	factor = 1;
-	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
-	if (n_sdb < CPUM_SF_MIN_SDB)
-		n_sdb = CPUM_SF_MIN_SDB;
+	n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
 
 	/* If there is already a sampling buffer allocated, it is very likely
 	 * that the sampling facility is enabled too.  If the event to be
@@ -1576,6 +1578,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 	unsigned long range = 0, size;
 	unsigned long long overflow = 0;
 	struct perf_output_handle *handle = &cpuhw->handle;
+	unsigned long num_sdb;
 
 	aux = perf_get_aux(handle);
 	if (WARN_ON_ONCE(!aux))
@@ -1587,13 +1590,14 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 			    size >> PAGE_SHIFT);
 	perf_aux_output_end(handle, size);
 
+	num_sdb = aux->sfb.num_sdb;
 	while (!done) {
 		/* Get an output handle */
 		aux = perf_aux_output_begin(handle, cpuhw->event);
 		if (handle->size == 0) {
 			pr_err("The AUX buffer with %lu pages for the "
 			       "diagnostic-sampling mode is full\n",
-				aux->sfb.num_sdb);
+				num_sdb);
 			debug_sprintf_event(sfdbg, 1,
 					    "%s: AUX buffer used up\n",
 					    __func__);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 6ccef5f29761..eb6e23ad15a2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -106,6 +106,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
 	p->thread.system_timer = 0;
 	p->thread.hardirq_timer = 0;
 	p->thread.softirq_timer = 0;
+	p->thread.last_break = 1;
 
 	frame->sf.back_chain = 0;
 	/* new return point is ret_from_fork */
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 6ebc2117c66c..c92d04f876cb 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -151,10 +151,35 @@ static void show_cpu_summary(struct seq_file *m, void *v)
 	}
 }
 
+static void show_cpu_topology(struct seq_file *m, unsigned long n)
+{
+#ifdef CONFIG_SCHED_TOPOLOGY
+	seq_printf(m, "physical id     : %d\n", topology_physical_package_id(n));
+	seq_printf(m, "core id         : %d\n", topology_core_id(n));
+	seq_printf(m, "book id         : %d\n", topology_book_id(n));
+	seq_printf(m, "drawer id       : %d\n", topology_drawer_id(n));
+	seq_printf(m, "dedicated       : %d\n", topology_cpu_dedicated(n));
+	seq_printf(m, "address         : %d\n", smp_cpu_get_cpu_address(n));
+	seq_printf(m, "siblings        : %d\n", cpumask_weight(topology_core_cpumask(n)));
+	seq_printf(m, "cpu cores       : %d\n", topology_booted_cores(n));
+#endif /* CONFIG_SCHED_TOPOLOGY */
+}
+
+static void show_cpu_ids(struct seq_file *m, unsigned long n)
+{
+	struct cpuid *id = &per_cpu(cpu_info.cpu_id, n);
+
+	seq_printf(m, "version         : %02X\n", id->version);
+	seq_printf(m, "identification  : %06X\n", id->ident);
+	seq_printf(m, "machine         : %04X\n", id->machine);
+}
+
 static void show_cpu_mhz(struct seq_file *m, unsigned long n)
 {
 	struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
 
+	if (!machine_has_cpu_mhz)
+		return;
 	seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
 	seq_printf(m, "cpu MHz static  : %d\n", c->cpu_mhz_static);
 }
@@ -165,12 +190,13 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n)
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	unsigned long n = (unsigned long) v - 1;
+	unsigned long first = cpumask_first(cpu_online_mask);
 
-	if (!n)
+	if (n == first)
 		show_cpu_summary(m, v);
-	if (!machine_has_cpu_mhz)
-		return 0;
 	seq_printf(m, "\ncpu number      : %ld\n", n);
+	show_cpu_topology(m, n);
+	show_cpu_ids(m, n);
 	show_cpu_mhz(m, n);
 	return 0;
 }
@@ -179,6 +205,8 @@ static inline void *c_update(loff_t *pos)
 {
 	if (*pos)
 		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	else
+		*pos = cpumask_first(cpu_online_mask);
 	return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL;
 }
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 1423090a2259..36445dd40fdb 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -73,6 +73,7 @@
 #include <asm/nospec-branch.h>
 #include <asm/mem_detect.h>
 #include <asm/uv.h>
+#include <asm/asm-offsets.h>
 #include "entry.h"
 
 /*
@@ -446,6 +447,8 @@ static void __init setup_lowcore_dat_off(void)
 	lc->spinlock_index = 0;
 	arch_spin_lock_setup(0);
 	lc->br_r1_trampoline = 0x07f1;	/* br %r1 */
+	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
 
 	set_prefix((u32)(unsigned long) lc);
 	lowcore_ptr[0] = lc;
@@ -789,6 +792,7 @@ static void __init memblock_add_mem_detect_info(void)
 		memblock_physmem_add(start, end - start);
 	}
 	memblock_set_bottom_up(false);
+	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
 	memblock_dump_all();
 }
 
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index e6fca5498e1f..b295090e2ce6 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -487,7 +487,7 @@ void do_signal(struct pt_regs *regs)
 					regs->gprs[2] = -EINTR;
 					break;
 				}
-			/* fallthrough */
+				fallthrough;
 			case -ERESTARTNOINTR:
 				regs->gprs[2] = regs->orig_gpr2;
 				regs->psw.addr =
@@ -514,7 +514,7 @@ void do_signal(struct pt_regs *regs)
 		case -ERESTART_RESTARTBLOCK:
 			/* Restart with sys_restart_syscall */
 			regs->int_code = __NR_restart_syscall;
-		/* fallthrough */
+			fallthrough;
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a08bd2522dd9..7eaabbab2213 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -212,6 +212,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
 	lc->spinlock_index = 0;
 	lc->br_r1_trampoline = 0x07f1;	/* br %r1 */
+	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
 	if (nmi_alloc_per_cpu(lc))
 		goto out_async;
 	if (vdso_alloc_per_cpu(lc))
@@ -701,6 +703,11 @@ int smp_cpu_get_polarization(int cpu)
 	return pcpu_devices[cpu].polarization;
 }
 
+int smp_cpu_get_cpu_address(int cpu)
+{
+	return pcpu_devices[cpu].address;
+}
+
 static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
 {
 	static int use_sigp_detection;
@@ -851,12 +858,13 @@ static void smp_init_secondary(void)
 	init_cpu_timer();
 	vtime_init();
 	pfault_init();
-	notify_cpu_starting(smp_processor_id());
+	notify_cpu_starting(cpu);
 	if (topology_cpu_dedicated(cpu))
 		set_cpu_flag(CIF_DEDICATED_CPU);
 	else
 		clear_cpu_flag(CIF_DEDICATED_CPU);
-	set_cpu_online(smp_processor_id(), true);
+	set_cpu_online(cpu, true);
+	update_cpu_masks();
 	inc_irq_stat(CPU_RST);
 	local_irq_enable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -928,6 +936,7 @@ int __cpu_disable(void)
 	/* Handle possible pending IPIs */
 	smp_handle_ext_call();
 	set_cpu_online(smp_processor_id(), false);
+	update_cpu_masks();
 	/* Disable pseudo page faults on this cpu. */
 	pfault_fini();
 	/* Disable interrupt sources via control register. */
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
deleted file mode 100644
index 75b7b307946e..000000000000
--- a/arch/s390/kernel/suspend.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Suspend support specific for s390.
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- */
-
-#include <linux/pfn.h>
-#include <linux/suspend.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <asm/ctl_reg.h>
-#include <asm/ipl.h>
-#include <asm/cio.h>
-#include <asm/sections.h>
-#include "entry.h"
-
-/*
- * The restore of the saved pages in an hibernation image will set
- * the change and referenced bits in the storage key for each page.
- * Overindication of the referenced bits after an hibernation cycle
- * does not cause any harm but the overindication of the change bits
- * would cause trouble.
- * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
- * page to the most significant byte of the associated page frame
- * number in the hibernation image.
- */
-
-/*
- * Key storage is allocated as a linked list of pages.
- * The size of the keys array is (PAGE_SIZE - sizeof(long))
- */
-struct page_key_data {
-	struct page_key_data *next;
-	unsigned char data[];
-};
-
-#define PAGE_KEY_DATA_SIZE	(PAGE_SIZE - sizeof(struct page_key_data *))
-
-static struct page_key_data *page_key_data;
-static struct page_key_data *page_key_rp, *page_key_wp;
-static unsigned long page_key_rx, page_key_wx;
-unsigned long suspend_zero_pages;
-
-/*
- * For each page in the hibernation image one additional byte is
- * stored in the most significant byte of the page frame number.
- * On suspend no additional memory is required but on resume the
- * keys need to be memorized until the page data has been restored.
- * Only then can the storage keys be set to their old state.
- */
-unsigned long page_key_additional_pages(unsigned long pages)
-{
-	return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
-}
-
-/*
- * Free page_key_data list of arrays.
- */
-void page_key_free(void)
-{
-	struct page_key_data *pkd;
-
-	while (page_key_data) {
-		pkd = page_key_data;
-		page_key_data = pkd->next;
-		free_page((unsigned long) pkd);
-	}
-}
-
-/*
- * Allocate page_key_data list of arrays with enough room to store
- * one byte for each page in the hibernation image.
- */
-int page_key_alloc(unsigned long pages)
-{
-	struct page_key_data *pk;
-	unsigned long size;
-
-	size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
-	while (size--) {
-		pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
-		if (!pk) {
-			page_key_free();
-			return -ENOMEM;
-		}
-		pk->next = page_key_data;
-		page_key_data = pk;
-	}
-	page_key_rp = page_key_wp = page_key_data;
-	page_key_rx = page_key_wx = 0;
-	return 0;
-}
-
-/*
- * Save the storage key into the upper 8 bits of the page frame number.
- */
-void page_key_read(unsigned long *pfn)
-{
-	struct page *page;
-	unsigned long addr;
-	unsigned char key;
-
-	page = pfn_to_page(*pfn);
-	addr = (unsigned long) page_address(page);
-	key = (unsigned char) page_get_storage_key(addr) & 0x7f;
-	if (arch_test_page_nodat(page))
-		key |= 0x80;
-	*(unsigned char *) pfn = key;
-}
-
-/*
- * Extract the storage key from the upper 8 bits of the page frame number
- * and store it in the page_key_data list of arrays.
- */
-void page_key_memorize(unsigned long *pfn)
-{
-	page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
-	*(unsigned char *) pfn = 0;
-	if (++page_key_wx < PAGE_KEY_DATA_SIZE)
-		return;
-	page_key_wp = page_key_wp->next;
-	page_key_wx = 0;
-}
-
-/*
- * Get the next key from the page_key_data list of arrays and set the
- * storage key of the page referred by @address. If @address refers to
- * a "safe" page the swsusp_arch_resume code will transfer the storage
- * key from the buffer page to the original page.
- */
-void page_key_write(void *address)
-{
-	struct page *page;
-	unsigned char key;
-
-	key = page_key_rp->data[page_key_rx];
-	page_set_storage_key((unsigned long) address, key & 0x7f, 0);
-	page = virt_to_page(address);
-	if (key & 0x80)
-		arch_set_page_nodat(page, 0);
-	else
-		arch_set_page_dat(page, 0);
-	if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
-		return;
-	page_key_rp = page_key_rp->next;
-	page_key_rx = 0;
-}
-
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
-	unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
-	unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1;
-	unsigned long stext_pfn = PFN_DOWN(__pa(_stext));
-
-	/* Always save lowcore pages (LC protection might be enabled). */
-	if (pfn <= LC_PAGES)
-		return 0;
-	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
-		return 1;
-	/* Skip memory holes and read-only pages (DCSS, ...). */
-	if (pfn >= stext_pfn && pfn <= end_rodata_pfn)
-		return 0;
-	if (tprot(PFN_PHYS(pfn)))
-		return 1;
-	return 0;
-}
-
-/*
- * PM notifier callback for suspend
- */
-static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
-			 void *ptr)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
-		if (!suspend_zero_pages)
-			return NOTIFY_BAD;
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		free_pages(suspend_zero_pages, LC_ORDER);
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-	return NOTIFY_OK;
-}
-
-static int __init suspend_pm_init(void)
-{
-	pm_notifier(suspend_pm_cb, 0);
-	return 0;
-}
-arch_initcall(suspend_pm_init);
-
-void save_processor_state(void)
-{
-	/* swsusp_arch_suspend() actually saves all cpu register contents.
-	 * Machine checks must be disabled since swsusp_arch_suspend() stores
-	 * register contents to their lowcore save areas. That's the same
-	 * place where register contents on machine checks would be saved.
-	 * To avoid register corruption disable machine checks.
-	 * We must also disable machine checks in the new psw mask for
-	 * program checks, since swsusp_arch_suspend() may generate program
-	 * checks. Disabling machine checks for all other new psw masks is
-	 * just paranoia.
-	 */
-	local_mcck_disable();
-	/* Disable lowcore protection */
-	__ctl_clear_bit(0,28);
-	S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
-}
-
-void restore_processor_state(void)
-{
-	S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
-	/* Enable lowcore protection */
-	__ctl_set_bit(0,28);
-	local_mcck_enable();
-}
-
-/* Called at the end of swsusp_arch_resume */
-void s390_early_resume(void)
-{
-	lgr_info_log();
-	channel_subsystem_reinit();
-	zpci_rescan();
-}
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
deleted file mode 100644
index a7baf0b5f818..000000000000
--- a/arch/s390/kernel/swsusp.S
+++ /dev/null
@@ -1,276 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 64-bit swsusp implementation
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/nospec-insn.h>
-#include <asm/sigp.h>
-
-/*
- * Save register context in absolute 0 lowcore and call swsusp_save() to
- * create in-memory kernel image. The context is saved in the designated
- * "store status" memory locations (see POP).
- * We return from this function twice. The first time during the suspend to
- * disk process. The second time via the swsusp_arch_resume() function
- * (see below) in the resume process.
- * This function runs with disabled interrupts.
- */
-	GEN_BR_THUNK %r14
-
-	.section .text
-ENTRY(swsusp_arch_suspend)
-	lg	%r1,__LC_NODAT_STACK
-	stmg	%r6,%r15,__SF_GPRS(%r1)
-	aghi	%r1,-STACK_FRAME_OVERHEAD
-	stg	%r15,__SF_BACKCHAIN(%r1)
-	lgr	%r15,%r1
-
-	/* Store FPU registers */
-	brasl	%r14,save_fpu_regs
-
-	/* Deactivate DAT */
-	stnsm	__SF_EMPTY(%r15),0xfb
-
-	/* Store prefix register on stack */
-	stpx	__SF_EMPTY(%r15)
-
-	/* Save prefix register contents for lowcore copy */
-	llgf	%r10,__SF_EMPTY(%r15)
-
-	/* Get pointer to save area */
-	lghi	%r1,0x1000
-
-	/* Save CPU address */
-	stap	__LC_EXT_CPU_ADDR(%r0)
-
-	/* Store registers */
-	mvc	0x318(4,%r1),__SF_EMPTY(%r15)	/* move prefix to lowcore */
-	stam	%a0,%a15,0x340(%r1)		/* store access registers */
-	stctg	%c0,%c15,0x380(%r1)		/* store control registers */
-	stmg	%r0,%r15,0x280(%r1)		/* store general registers */
-
-	stpt	0x328(%r1)			/* store timer */
-	stck	__SF_EMPTY(%r15)		/* store clock */
-	stckc	0x330(%r1)			/* store clock comparator */
-
-	/* Update cputime accounting before going to sleep */
-	lg	%r0,__LC_LAST_UPDATE_TIMER
-	slg	%r0,0x328(%r1)
-	alg	%r0,__LC_SYSTEM_TIMER
-	stg	%r0,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),0x328(%r1)
-	lg	%r0,__LC_LAST_UPDATE_CLOCK
-	slg	%r0,__SF_EMPTY(%r15)
-	alg	%r0,__LC_STEAL_TIMER
-	stg	%r0,__LC_STEAL_TIMER
-	mvc	__LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
-
-	/* Activate DAT */
-	stosm	__SF_EMPTY(%r15),0x04
-
-	/* Set prefix page to zero */
-	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
-	spx	__SF_EMPTY(%r15)
-
-	/* Save absolute zero pages */
-	larl	%r2,suspend_zero_pages
-	lg	%r2,0(%r2)
-	lghi	%r4,0
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Copy lowcore to absolute zero lowcore */
-	lghi	%r2,0
-	lgr	%r4,%r10
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Save image */
-	brasl	%r14,swsusp_save
-
-	/* Restore prefix register and return */
-	lghi	%r1,0x1000
-	spx	0x318(%r1)
-	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
-	lghi	%r2,0
-	BR_EX	%r14
-ENDPROC(swsusp_arch_suspend)
-
-/*
- * Restore saved memory image to correct place and restore register context.
- * Then we return to the function that called swsusp_arch_suspend().
- * swsusp_arch_resume() runs with disabled interrupts.
- */
-ENTRY(swsusp_arch_resume)
-	stmg	%r6,%r15,__SF_GPRS(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-STACK_FRAME_OVERHEAD
-	stg	%r1,__SF_BACKCHAIN(%r15)
-
-	/* Make all free pages stable */
-	lghi	%r2,1
-	brasl	%r14,arch_set_page_states
-
-	/* Set prefix page to zero */
-	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
-	spx	__SF_EMPTY(%r15)
-
-	/* Deactivate DAT */
-	stnsm	__SF_EMPTY(%r15),0xfb
-
-	/* Restore saved image */
-	larl	%r1,restore_pblist
-	lg	%r1,0(%r1)
-	ltgr	%r1,%r1
-	jz	2f
-0:
-	lg	%r2,8(%r1)
-	lg	%r4,0(%r1)
-	iske	%r0,%r4
-	lghi	%r3,PAGE_SIZE
-	lghi	%r5,PAGE_SIZE
-1:
-	mvcle	%r2,%r4,0
-	jo	1b
-	lg	%r2,8(%r1)
-	sske	%r0,%r2
-	lg	%r1,16(%r1)
-	ltgr	%r1,%r1
-	jnz	0b
-2:
-	ptlb				/* flush tlb */
-
-	/* Reset System */
-	larl	%r1,.Lnew_pgm_check_psw
-	epsw	%r2,%r3
-	stm	%r2,%r3,0(%r1)
-	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
-	larl	%r1,__swsusp_reset_dma
-	lg	%r1,0(%r1)
-	BASR_EX	%r14,%r1
-	larl	%r1,smp_cpu_mt_shift
-	icm	%r1,15,0(%r1)
-	jz	smt_done
-	llgfr	%r1,%r1
-smt_loop:
-	sigp	%r1,%r0,SIGP_SET_MULTI_THREADING
-	brc	8,smt_done			/* accepted */
-	brc	2,smt_loop			/* busy, try again */
-smt_done:
-	larl	%r1,.Lnew_pgm_check_psw
-	lpswe	0(%r1)
-pgm_check_entry:
-
-	/* Switch to original suspend CPU */
-	larl	%r1,.Lresume_cpu		/* Resume CPU address: r2 */
-	stap	0(%r1)
-	llgh	%r2,0(%r1)
-	llgh	%r1,__LC_EXT_CPU_ADDR(%r0)	/* Suspend CPU address: r1 */
-	cgr	%r1,%r2
-	je	restore_registers		/* r1 = r2 -> nothing to do */
-	larl	%r4,.Lrestart_suspend_psw	/* Set new restart PSW */
-	mvc	__LC_RST_NEW_PSW(16,%r0),0(%r4)
-3:
-	sigp	%r9,%r1,SIGP_INITIAL_CPU_RESET	/* sigp initial cpu reset */
-	brc	8,4f				/* accepted */
-	brc	2,3b				/* busy, try again */
-
-	/* Suspend CPU not available -> panic */
-	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
-	larl	%r2,.Lpanic_string
-	brasl	%r14,sclp_early_printk_force
-	larl	%r3,.Ldisabled_wait_31
-	lpsw	0(%r3)
-4:
-	/* Switch to suspend CPU */
-	sigp	%r9,%r1,SIGP_RESTART	/* sigp restart to suspend CPU */
-	brc	2,4b			/* busy, try again */
-5:
-	sigp	%r9,%r2,SIGP_STOP	/* sigp stop to current resume CPU */
-	brc	2,5b			/* busy, try again */
-6:	j	6b
-
-restart_suspend:
-	larl	%r1,.Lresume_cpu
-	llgh	%r2,0(%r1)
-7:
-	sigp	%r9,%r2,SIGP_SENSE	/* sigp sense, wait for resume CPU */
-	brc	8,7b			/* accepted, status 0, still running */
-	brc	2,7b			/* busy, try again */
-	tmll	%r9,0x40		/* Test if resume CPU is stopped */
-	jz	7b
-
-restore_registers:
-	/* Restore registers */
-	lghi	%r13,0x1000		/* %r1 = pointer to save area */
-
-	/* Ignore time spent in suspended state. */
-	llgf	%r1,0x318(%r13)
-	stck	__LC_LAST_UPDATE_CLOCK(%r1)
-	spt	0x328(%r13)		/* reprogram timer */
-	//sckc	0x330(%r13)		/* set clock comparator */
-
-	lctlg	%c0,%c15,0x380(%r13)	/* load control registers */
-	lam	%a0,%a15,0x340(%r13)	/* load access registers */
-
-	/* Load old stack */
-	lg	%r15,0x2f8(%r13)
-
-	/* Save prefix register */
-	mvc __SF_EMPTY(4,%r15),0x318(%r13)
-
-	/* Restore absolute zero pages */
-	lghi	%r2,0
-	larl	%r4,suspend_zero_pages
-	lg	%r4,0(%r4)
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Restore prefix register */
-	spx	__SF_EMPTY(%r15)
-
-	/* Activate DAT */
-	stosm	__SF_EMPTY(%r15),0x04
-
-	/* Make all free pages unstable */
-	lghi	%r2,0
-	brasl	%r14,arch_set_page_states
-
-	/* Call arch specific early resume code */
-	brasl	%r14,s390_early_resume
-
-	/* Return 0 */
-	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
-	lghi	%r2,0
-	BR_EX	%r14
-ENDPROC(swsusp_arch_resume)
-
-	.section .data..nosave,"aw",@progbits
-	.align	8
-.Ldisabled_wait_31:
-	.long  0x000a0000,0x00000000
-.Lpanic_string:
-	.asciz	"Resume not possible because suspend CPU is no longer available\n"
-	.align	8
-.Lrestart_suspend_psw:
-	.quad	0x0000000180000000,restart_suspend
-.Lnew_pgm_check_psw:
-	.quad	0,pgm_check_entry
-.Lresume_cpu:
-	.byte	0,0
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 3627953007ed..5f70cefc13e4 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -26,7 +26,6 @@
 #include <linux/nodemask.h>
 #include <linux/node.h>
 #include <asm/sysinfo.h>
-#include <asm/numa.h>
 
 #define PTF_HORIZONTAL	(0UL)
 #define PTF_VERTICAL	(1UL)
@@ -63,8 +62,6 @@ static struct mask_info drawer_info;
 struct cpu_topology_s390 cpu_topology[NR_CPUS];
 EXPORT_SYMBOL_GPL(cpu_topology);
 
-cpumask_t cpus_with_topology;
-
 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
 {
 	cpumask_t mask;
@@ -86,11 +83,12 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
 		cpumask_copy(&mask, cpu_present_mask);
 		break;
 	default:
-		/* fallthrough */
+		fallthrough;
 	case TOPOLOGY_MODE_SINGLE:
 		cpumask_copy(&mask, cpumask_of(cpu));
 		break;
 	}
+	cpumask_and(&mask, &mask, cpu_online_mask);
 	return mask;
 }
 
@@ -106,6 +104,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 	for (i = 0; i <= smp_cpu_mtid; i++)
 		if (cpu_present(cpu + i))
 			cpumask_set_cpu(cpu + i, &mask);
+	cpumask_and(&mask, &mask, cpu_online_mask);
 	return mask;
 }
 
@@ -138,7 +137,6 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
 			cpumask_set_cpu(lcpu + i, &drawer->mask);
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
-			cpumask_set_cpu(lcpu + i, &cpus_with_topology);
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 		}
 	}
@@ -245,10 +243,10 @@ int topology_set_cpu_management(int fc)
 	return rc;
 }
 
-static void update_cpu_masks(void)
+void update_cpu_masks(void)
 {
-	struct cpu_topology_s390 *topo;
-	int cpu, id;
+	struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
+	int cpu, sibling, pkg_first, smt_first, id;
 
 	for_each_possible_cpu(cpu) {
 		topo = &cpu_topology[cpu];
@@ -256,6 +254,7 @@ static void update_cpu_masks(void)
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
 		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
+		topo->booted_cores = 0;
 		if (topology_mode != TOPOLOGY_MODE_HW) {
 			id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
 			topo->thread_id = cpu;
@@ -263,11 +262,23 @@ static void update_cpu_masks(void)
 			topo->socket_id = id;
 			topo->book_id = id;
 			topo->drawer_id = id;
-			if (cpu_present(cpu))
-				cpumask_set_cpu(cpu, &cpus_with_topology);
 		}
 	}
-	numa_update_cpu_topology();
+	for_each_online_cpu(cpu) {
+		topo = &cpu_topology[cpu];
+		pkg_first = cpumask_first(&topo->core_mask);
+		topo_package = &cpu_topology[pkg_first];
+		if (cpu == pkg_first) {
+			for_each_cpu(sibling, &topo->core_mask) {
+				topo_sibling = &cpu_topology[sibling];
+				smt_first = cpumask_first(&topo_sibling->thread_mask);
+				if (sibling == smt_first)
+					topo_package->booted_cores++;
+			}
+		} else {
+			topo->booted_cores = topo_package->booted_cores;
+		}
+	}
 }
 
 void store_topology(struct sysinfo_15_1_x *info)
@@ -289,7 +300,6 @@ static int __arch_update_cpu_topology(void)
 	int rc = 0;
 
 	mutex_lock(&smp_cpu_state_mutex);
-	cpumask_clear(&cpus_with_topology);
 	if (MACHINE_HAS_TOPOLOGY) {
 		rc = 1;
 		store_topology(info);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index dc75588d7894..ff9cc4c3290e 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -271,7 +271,7 @@ void kernel_stack_overflow(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(kernel_stack_overflow);
 
-static void test_monitor_call(void)
+static void __init test_monitor_call(void)
 {
 	int val = 1;
 
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index a51c892f14f3..ae989b740376 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -19,7 +19,6 @@
 #include <linux/swap.h>
 #include <linux/kthread.h>
 #include <linux/oom.h>
-#include <linux/suspend.h>
 #include <linux/uaccess.h>
 
 #include <asm/pgalloc.h>
@@ -49,7 +48,6 @@ static volatile long cmm_pages_target;
 static volatile long cmm_timed_pages_target;
 static long cmm_timeout_pages;
 static long cmm_timeout_seconds;
-static int cmm_suspended;
 
 static struct cmm_page_array *cmm_page_list;
 static struct cmm_page_array *cmm_timed_page_list;
@@ -151,9 +149,9 @@ static int cmm_thread(void *dummy)
 
 	while (1) {
 		rc = wait_event_interruptible(cmm_thread_wait,
-			(!cmm_suspended && (cmm_pages != cmm_pages_target ||
-			 cmm_timed_pages != cmm_timed_pages_target)) ||
-			 kthread_should_stop());
+			cmm_pages != cmm_pages_target ||
+			cmm_timed_pages != cmm_timed_pages_target ||
+			kthread_should_stop());
 		if (kthread_should_stop() || rc == -ERESTARTSYS) {
 			cmm_pages_target = cmm_pages;
 			cmm_timed_pages_target = cmm_timed_pages;
@@ -390,38 +388,6 @@ static void cmm_smsg_target(const char *from, char *msg)
 
 static struct ctl_table_header *cmm_sysctl_header;
 
-static int cmm_suspend(void)
-{
-	cmm_suspended = 1;
-	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
-	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
-	return 0;
-}
-
-static int cmm_resume(void)
-{
-	cmm_suspended = 0;
-	cmm_kick_thread();
-	return 0;
-}
-
-static int cmm_power_event(struct notifier_block *this,
-			   unsigned long event, void *ptr)
-{
-	switch (event) {
-	case PM_POST_HIBERNATION:
-		return cmm_resume();
-	case PM_HIBERNATION_PREPARE:
-		return cmm_suspend();
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
-static struct notifier_block cmm_power_notifier = {
-	.notifier_call = cmm_power_event,
-};
-
 static int __init cmm_init(void)
 {
 	int rc = -ENOMEM;
@@ -446,16 +412,11 @@ static int __init cmm_init(void)
 	rc = register_oom_notifier(&cmm_oom_nb);
 	if (rc < 0)
 		goto out_oom_notify;
-	rc = register_pm_notifier(&cmm_power_notifier);
-	if (rc)
-		goto out_pm;
 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
 	if (!IS_ERR(cmm_thread_ptr))
 		return 0;
 
 	rc = PTR_ERR(cmm_thread_ptr);
-	unregister_pm_notifier(&cmm_power_notifier);
-out_pm:
 	unregister_oom_notifier(&cmm_oom_nb);
 out_oom_notify:
 #ifdef CONFIG_CMM_IUCV
@@ -475,7 +436,6 @@ static void __exit cmm_exit(void)
 #ifdef CONFIG_CMM_IUCV
 	smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
 #endif
-	unregister_pm_notifier(&cmm_power_notifier);
 	unregister_oom_notifier(&cmm_oom_nb);
 	kthread_stop(cmm_thread_ptr);
 	del_timer_sync(&cmm_timer);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 09bf7f2121ac..d56f67745e3e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -45,11 +45,11 @@
 #define __SUBCODE_MASK 0x0600
 #define __PF_RES_FIELD 0x8000000000000000ULL
 
-#define VM_FAULT_BADCONTEXT	0x010000
-#define VM_FAULT_BADMAP		0x020000
-#define VM_FAULT_BADACCESS	0x040000
-#define VM_FAULT_SIGNAL		0x080000
-#define VM_FAULT_PFAULT		0x100000
+#define VM_FAULT_BADCONTEXT	((__force vm_fault_t) 0x010000)
+#define VM_FAULT_BADMAP		((__force vm_fault_t) 0x020000)
+#define VM_FAULT_BADACCESS	((__force vm_fault_t) 0x040000)
+#define VM_FAULT_SIGNAL		((__force vm_fault_t) 0x080000)
+#define VM_FAULT_PFAULT		((__force vm_fault_t) 0x100000)
 
 enum fault_type {
 	KERNEL_FAULT,
@@ -123,7 +123,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_REGION2:
 		table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
 		if (bad_address(table))
@@ -132,7 +132,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_REGION3:
 		table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
 		if (bad_address(table))
@@ -141,7 +141,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_SEGMENT:
 		table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 		if (bad_address(table))
@@ -328,7 +328,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access,
 	case VM_FAULT_BADACCESS:
 		if (access == VM_EXEC && signal_return(regs) == 0)
 			break;
-		/* fallthrough */
+		fallthrough;
 	case VM_FAULT_BADMAP:
 		/* Bad memory access. Check if it is kernel or user space. */
 		if (user_mode(regs)) {
@@ -338,9 +338,8 @@ static noinline void do_fault_error(struct pt_regs *regs, int access,
 			do_sigsegv(regs, si_code);
 			break;
 		}
-		/* fallthrough */
+		fallthrough;
 	case VM_FAULT_BADCONTEXT:
-		/* fallthrough */
 	case VM_FAULT_PFAULT:
 		do_no_context(regs);
 		break;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 5674710a4841..f01daddcbc5e 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -326,7 +326,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	struct hstate *h = hstate_file(file);
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	int rc;
 
 	if (len & ~huge_page_mask(h))
 		return -EINVAL;
@@ -353,15 +352,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	else
 		addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
 				pgoff, flags);
-	if (addr & ~PAGE_MASK)
+	if (offset_in_page(addr))
 		return addr;
 
 check_asce_limit:
-	if (addr + len > current->mm->context.asce_limit &&
-	    addr + len <= TASK_SIZE) {
-		rc = crst_table_upgrade(mm, addr + len);
-		if (rc)
-			return (unsigned long) rc;
-	}
-	return addr;
+	return check_asce_limit(mm, addr, len);
 }
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index cbc718ba6d78..1b78f630a9ca 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -72,14 +72,13 @@ static inline unsigned long mmap_base(unsigned long rnd,
 	return PAGE_ALIGN(STACK_TOP - gap - rnd);
 }
 
-unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
-		unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+				     unsigned long len, unsigned long pgoff,
+				     unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	struct vm_unmapped_area_info info;
-	int rc;
 
 	if (len > TASK_SIZE - mmap_min_addr)
 		return -ENOMEM;
@@ -105,30 +104,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 	addr = vm_unmapped_area(&info);
-	if (addr & ~PAGE_MASK)
+	if (offset_in_page(addr))
 		return addr;
 
 check_asce_limit:
-	if (addr + len > current->mm->context.asce_limit &&
-	    addr + len <= TASK_SIZE) {
-		rc = crst_table_upgrade(mm, addr + len);
-		if (rc)
-			return (unsigned long) rc;
-	}
-
-	return addr;
+	return check_asce_limit(mm, addr, len);
 }
 
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
-			  const unsigned long len, const unsigned long pgoff,
-			  const unsigned long flags)
+unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+					     unsigned long len, unsigned long pgoff,
+					     unsigned long flags)
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
-	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
-	int rc;
 
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE - mmap_min_addr)
@@ -163,25 +152,18 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	 * can happen with large stack limits and large mmap()
 	 * allocations.
 	 */
-	if (addr & ~PAGE_MASK) {
+	if (offset_in_page(addr)) {
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = TASK_UNMAPPED_BASE;
 		info.high_limit = TASK_SIZE;
 		addr = vm_unmapped_area(&info);
-		if (addr & ~PAGE_MASK)
+		if (offset_in_page(addr))
 			return addr;
 	}
 
 check_asce_limit:
-	if (addr + len > current->mm->context.asce_limit &&
-	    addr + len <= TASK_SIZE) {
-		rc = crst_table_upgrade(mm, addr + len);
-		if (rc)
-			return (unsigned long) rc;
-	}
-
-	return addr;
+	return check_asce_limit(mm, addr, len);
 }
 
 /*
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index f8c6faab41f4..e22c06d5f206 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -367,20 +367,4 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 	}
 }
 
-#ifdef CONFIG_HIBERNATION
-bool kernel_page_present(struct page *page)
-{
-	unsigned long addr;
-	int cc;
-
-	addr = page_to_phys(page);
-	asm volatile(
-		"	lra	%1,0(%1)\n"
-		"	ipm	%0\n"
-		"	srl	%0,28"
-		: "=d" (cc), "+a" (addr) : : "cc");
-	return cc == 0;
-}
-#endif /* CONFIG_HIBERNATION */
-
 #endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 3dd253f81a77..498c98a312f4 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -77,67 +77,65 @@ static void __crst_table_upgrade(void *arg)
 
 int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 {
-	unsigned long *table, *pgd;
-	int rc, notify;
+	unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
+	unsigned long asce_limit = mm->context.asce_limit;
 
 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
-	VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
-	rc = 0;
-	notify = 0;
-	while (mm->context.asce_limit < end) {
-		table = crst_table_alloc(mm);
-		if (!table) {
-			rc = -ENOMEM;
-			break;
-		}
-		spin_lock_bh(&mm->page_table_lock);
-		pgd = (unsigned long *) mm->pgd;
-		if (mm->context.asce_limit == _REGION2_SIZE) {
-			crst_table_init(table, _REGION2_ENTRY_EMPTY);
-			p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
-			mm->pgd = (pgd_t *) table;
-			mm->context.asce_limit = _REGION1_SIZE;
-			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
-			mm_inc_nr_puds(mm);
-		} else {
-			crst_table_init(table, _REGION1_ENTRY_EMPTY);
-			pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
-			mm->pgd = (pgd_t *) table;
-			mm->context.asce_limit = -PAGE_SIZE;
-			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
-		}
-		notify = 1;
-		spin_unlock_bh(&mm->page_table_lock);
-	}
-	if (notify)
-		on_each_cpu(__crst_table_upgrade, mm, 0);
-	return rc;
-}
+	VM_BUG_ON(asce_limit < _REGION2_SIZE);
 
-void crst_table_downgrade(struct mm_struct *mm)
-{
-	pgd_t *pgd;
+	if (end <= asce_limit)
+		return 0;
 
-	/* downgrade should only happen from 3 to 2 levels (compat only) */
-	VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
+	if (asce_limit == _REGION2_SIZE) {
+		p4d = crst_table_alloc(mm);
+		if (unlikely(!p4d))
+			goto err_p4d;
+		crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
+	}
+	if (end > _REGION1_SIZE) {
+		pgd = crst_table_alloc(mm);
+		if (unlikely(!pgd))
+			goto err_pgd;
+		crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
+	}
 
-	if (current->active_mm == mm) {
-		clear_user_asce();
-		__tlb_flush_mm(mm);
+	spin_lock_bh(&mm->page_table_lock);
+
+	/*
+	 * This routine gets called with mmap_sem lock held and there is
+	 * no reason to optimize for the case of otherwise. However, if
+	 * that would ever change, the below check will let us know.
+	 */
+	VM_BUG_ON(asce_limit != mm->context.asce_limit);
+
+	if (p4d) {
+		__pgd = (unsigned long *) mm->pgd;
+		p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
+		mm->pgd = (pgd_t *) p4d;
+		mm->context.asce_limit = _REGION1_SIZE;
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+		mm_inc_nr_puds(mm);
+	}
+	if (pgd) {
+		__pgd = (unsigned long *) mm->pgd;
+		pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
+		mm->pgd = (pgd_t *) pgd;
+		mm->context.asce_limit = TASK_SIZE_MAX;
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
 	}
 
-	pgd = mm->pgd;
-	mm_dec_nr_pmds(mm);
-	mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
-	mm->context.asce_limit = _REGION3_SIZE;
-	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-			   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-	crst_table_free(mm, (unsigned long *) pgd);
+	spin_unlock_bh(&mm->page_table_lock);
 
-	if (current->active_mm == mm)
-		set_user_asce(mm);
+	on_each_cpu(__crst_table_upgrade, mm, 0);
+
+	return 0;
+
+err_pgd:
+	crst_table_free(mm, p4d);
+err_p4d:
+	return -ENOMEM;
 }
 
 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
@@ -304,7 +302,7 @@ void __tlb_remove_table(void *_table)
 		mask >>= 24;
 		if (mask != 0)
 			break;
-		/* fallthrough */
+		fallthrough;
 	case 3:		/* 4K page table with pgstes */
 		if (mask & 3)
 			atomic_xor_bits(&page->_refcount, 3 << 24);
@@ -529,7 +527,7 @@ void base_asce_free(unsigned long asce)
 		base_region2_walk(table, 0, _REGION1_SIZE, 0);
 		break;
 	case _ASCE_TYPE_REGION1:
-		base_region1_walk(table, 0, -_PAGE_SIZE, 0);
+		base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
 		break;
 	}
 	base_crst_free(table);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index b403fa14847d..f810930aff42 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -415,6 +415,10 @@ void __init vmem_map_init(void)
 		     SET_MEMORY_RO | SET_MEMORY_X);
 	__set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
 		     SET_MEMORY_RO | SET_MEMORY_X);
+
+	/* we need lowcore executable for our LPSWE instructions */
+	set_memory_x(0, 1);
+
 	pr_info("Write protected kernel read-only data: %luk\n",
 		(unsigned long)(__end_rodata - _stext) >> 10);
 }
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
index 66c2dff74895..c89d26f4f77d 100644
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1,4 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y			+= numa.o
-obj-y			+= toptree.o
-obj-$(CONFIG_NUMA_EMU)	+= mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
deleted file mode 100644
index 72d742bb2d17..000000000000
--- a/arch/s390/numa/mode_emu.c
+++ /dev/null
@@ -1,577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
- * without using real topology information about the physical memory of the
- * machine.
- *
- * It distributes the available CPUs to nodes while respecting the original
- * machine topology information. This is done by trying to avoid to separate
- * CPUs which reside on the same book or even on the same MC.
- *
- * Because the current Linux scheduler code requires a stable cpu to node
- * mapping, cores are pinned to nodes when the first CPU thread is set online.
- *
- * Copyright IBM Corp. 2015
- */
-
-#define KMSG_COMPONENT "numa_emu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/cpumask.h>
-#include <linux/memblock.h>
-#include <linux/node.h>
-#include <linux/memory.h>
-#include <linux/slab.h>
-#include <asm/smp.h>
-#include <asm/topology.h>
-#include "numa_mode.h"
-#include "toptree.h"
-
-/* Distances between the different system components */
-#define DIST_EMPTY	0
-#define DIST_CORE	1
-#define DIST_MC		2
-#define DIST_BOOK	3
-#define DIST_DRAWER	4
-#define DIST_MAX	5
-
-/* Node distance reported to common code */
-#define EMU_NODE_DIST	10
-
-/* Node ID for free (not yet pinned) cores */
-#define NODE_ID_FREE	-1
-
-/* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
-
-/* The two toptree IDs */
-enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
-
-/* Number of NUMA nodes */
-static int emu_nodes = 1;
-/* NUMA stripe size */
-static unsigned long emu_size;
-
-/*
- * Node to core pinning information updates are protected by
- * "sched_domains_mutex".
- */
-static struct {
-	s32 to_node_id[CONFIG_NR_CPUS];	/* Pinned core to node mapping */
-	int total;			/* Total number of pinned cores */
-	int per_node_target;		/* Cores per node without extra cores */
-	int per_node[MAX_NUMNODES];	/* Number of cores pinned to node */
-} *emu_cores;
-
-/*
- * Pin a core to a node
- */
-static void pin_core_to_node(int core_id, int node_id)
-{
-	if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
-		emu_cores->per_node[node_id]++;
-		emu_cores->to_node_id[core_id] = node_id;
-		emu_cores->total++;
-	} else {
-		WARN_ON(emu_cores->to_node_id[core_id] != node_id);
-	}
-}
-
-/*
- * Number of pinned cores of a node
- */
-static int cores_pinned(struct toptree *node)
-{
-	return emu_cores->per_node[node->id];
-}
-
-/*
- * ID of the node where the core is pinned (or NODE_ID_FREE)
- */
-static int core_pinned_to_node_id(struct toptree *core)
-{
-	return emu_cores->to_node_id[core->id];
-}
-
-/*
- * Number of cores in the tree that are not yet pinned
- */
-static int cores_free(struct toptree *tree)
-{
-	struct toptree *core;
-	int count = 0;
-
-	toptree_for_each(core, tree, CORE) {
-		if (core_pinned_to_node_id(core) == NODE_ID_FREE)
-			count++;
-	}
-	return count;
-}
-
-/*
- * Return node of core
- */
-static struct toptree *core_node(struct toptree *core)
-{
-	return core->parent->parent->parent->parent;
-}
-
-/*
- * Return drawer of core
- */
-static struct toptree *core_drawer(struct toptree *core)
-{
-	return core->parent->parent->parent;
-}
-
-/*
- * Return book of core
- */
-static struct toptree *core_book(struct toptree *core)
-{
-	return core->parent->parent;
-}
-
-/*
- * Return mc of core
- */
-static struct toptree *core_mc(struct toptree *core)
-{
-	return core->parent;
-}
-
-/*
- * Distance between two cores
- */
-static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
-{
-	if (core_drawer(core1)->id != core_drawer(core2)->id)
-		return DIST_DRAWER;
-	if (core_book(core1)->id != core_book(core2)->id)
-		return DIST_BOOK;
-	if (core_mc(core1)->id != core_mc(core2)->id)
-		return DIST_MC;
-	/* Same core or sibling on same MC */
-	return DIST_CORE;
-}
-
-/*
- * Distance of a node to a core
- */
-static int dist_node_to_core(struct toptree *node, struct toptree *core)
-{
-	struct toptree *core_node;
-	int dist_min = DIST_MAX;
-
-	toptree_for_each(core_node, node, CORE)
-		dist_min = min(dist_min, dist_core_to_core(core_node, core));
-	return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
-}
-
-/*
- * Unify will delete empty nodes, therefore recreate nodes.
- */
-static void toptree_unify_tree(struct toptree *tree)
-{
-	int nid;
-
-	toptree_unify(tree);
-	for (nid = 0; nid < emu_nodes; nid++)
-		toptree_get_child(tree, nid);
-}
-
-/*
- * Find the best/nearest node for a given core and ensure that no node
- * gets more than "emu_cores->per_node_target + extra" cores.
- */
-static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
-				     int extra)
-{
-	struct toptree *node, *node_best = NULL;
-	int dist_cur, dist_best, cores_target;
-
-	cores_target = emu_cores->per_node_target + extra;
-	dist_best = DIST_MAX;
-	node_best = NULL;
-	toptree_for_each(node, numa, NODE) {
-		/* Already pinned cores must use their nodes */
-		if (core_pinned_to_node_id(core) == node->id) {
-			node_best = node;
-			break;
-		}
-		/* Skip nodes that already have enough cores */
-		if (cores_pinned(node) >= cores_target)
-			continue;
-		dist_cur = dist_node_to_core(node, core);
-		if (dist_cur < dist_best) {
-			dist_best = dist_cur;
-			node_best = node;
-		}
-	}
-	return node_best;
-}
-
-/*
- * Find the best node for each core with respect to "extra" core count
- */
-static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
-				   int extra)
-{
-	struct toptree *node, *core, *tmp;
-
-	toptree_for_each_safe(core, tmp, phys, CORE) {
-		node = node_for_core(numa, core, extra);
-		if (!node)
-			return;
-		toptree_move(core, node);
-		pin_core_to_node(core->id, node->id);
-	}
-}
-
-/*
- * Move structures of given level to specified NUMA node
- */
-static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
-				    enum toptree_level level, bool perfect)
-{
-	int cores_free, cores_target = emu_cores->per_node_target;
-	struct toptree *cur, *tmp;
-
-	toptree_for_each_safe(cur, tmp, phys, level) {
-		cores_free = cores_target - toptree_count(node, CORE);
-		if (perfect) {
-			if (cores_free == toptree_count(cur, CORE))
-				toptree_move(cur, node);
-		} else {
-			if (cores_free >= toptree_count(cur, CORE))
-				toptree_move(cur, node);
-		}
-	}
-}
-
-/*
- * Move structures of a given level to NUMA nodes. If "perfect" is specified
- * move only perfectly fitting structures. Otherwise move also smaller
- * than needed structures.
- */
-static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
-			       enum toptree_level level, bool perfect)
-{
-	struct toptree *node;
-
-	toptree_for_each(node, numa, NODE)
-		move_level_to_numa_node(node, phys, level, perfect);
-}
-
-/*
- * For the first run try to move the big structures
- */
-static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
-{
-	struct toptree *core;
-
-	/* Always try to move perfectly fitting structures first */
-	move_level_to_numa(numa, phys, DRAWER, true);
-	move_level_to_numa(numa, phys, DRAWER, false);
-	move_level_to_numa(numa, phys, BOOK, true);
-	move_level_to_numa(numa, phys, BOOK, false);
-	move_level_to_numa(numa, phys, MC, true);
-	move_level_to_numa(numa, phys, MC, false);
-	/* Now pin all the moved cores */
-	toptree_for_each(core, numa, CORE)
-		pin_core_to_node(core->id, core_node(core)->id);
-}
-
-/*
- * Allocate new topology and create required nodes
- */
-static struct toptree *toptree_new(int id, int nodes)
-{
-	struct toptree *tree;
-	int nid;
-
-	tree = toptree_alloc(TOPOLOGY, id);
-	if (!tree)
-		goto fail;
-	for (nid = 0; nid < nodes; nid++) {
-		if (!toptree_get_child(tree, nid))
-			goto fail;
-	}
-	return tree;
-fail:
-	panic("NUMA emulation could not allocate topology");
-}
-
-/*
- * Allocate and initialize core to node mapping
- */
-static void __ref create_core_to_node_map(void)
-{
-	int i;
-
-	emu_cores = memblock_alloc(sizeof(*emu_cores), 8);
-	if (!emu_cores)
-		panic("%s: Failed to allocate %zu bytes align=0x%x\n",
-		      __func__, sizeof(*emu_cores), 8);
-	for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
-		emu_cores->to_node_id[i] = NODE_ID_FREE;
-}
-
-/*
- * Move cores from physical topology into NUMA target topology
- * and try to keep as much of the physical topology as possible.
- */
-static struct toptree *toptree_to_numa(struct toptree *phys)
-{
-	static int first = 1;
-	struct toptree *numa;
-	int cores_total;
-
-	cores_total = emu_cores->total + cores_free(phys);
-	emu_cores->per_node_target = cores_total / emu_nodes;
-	numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
-	if (first) {
-		toptree_to_numa_first(numa, phys);
-		first = 0;
-	}
-	toptree_to_numa_single(numa, phys, 0);
-	toptree_to_numa_single(numa, phys, 1);
-	toptree_unify_tree(numa);
-
-	WARN_ON(cpumask_weight(&phys->mask));
-	return numa;
-}
-
-/*
- * Create a toptree out of the physical topology that we got from the hypervisor
- */
-static struct toptree *toptree_from_topology(void)
-{
-	struct toptree *phys, *node, *drawer, *book, *mc, *core;
-	struct cpu_topology_s390 *top;
-	int cpu;
-
-	phys = toptree_new(TOPTREE_ID_PHYS, 1);
-
-	for_each_cpu(cpu, &cpus_with_topology) {
-		top = &cpu_topology[cpu];
-		node = toptree_get_child(phys, 0);
-		drawer = toptree_get_child(node, top->drawer_id);
-		book = toptree_get_child(drawer, top->book_id);
-		mc = toptree_get_child(book, top->socket_id);
-		core = toptree_get_child(mc, smp_get_base_cpu(cpu));
-		if (!drawer || !book || !mc || !core)
-			panic("NUMA emulation could not allocate memory");
-		cpumask_set_cpu(cpu, &core->mask);
-		toptree_update_mask(mc);
-	}
-	return phys;
-}
-
-/*
- * Add toptree core to topology and create correct CPU masks
- */
-static void topology_add_core(struct toptree *core)
-{
-	struct cpu_topology_s390 *top;
-	int cpu;
-
-	for_each_cpu(cpu, &core->mask) {
-		top = &cpu_topology[cpu];
-		cpumask_copy(&top->thread_mask, &core->mask);
-		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
-		cpumask_copy(&top->book_mask, &core_book(core)->mask);
-		cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
-		cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
-		top->node_id = core_node(core)->id;
-	}
-}
-
-/*
- * Apply toptree to topology and create CPU masks
- */
-static void toptree_to_topology(struct toptree *numa)
-{
-	struct toptree *core;
-	int i;
-
-	/* Clear all node masks */
-	for (i = 0; i < MAX_NUMNODES; i++)
-		cpumask_clear(&node_to_cpumask_map[i]);
-
-	/* Rebuild all masks */
-	toptree_for_each(core, numa, CORE)
-		topology_add_core(core);
-}
-
-/*
- * Show the node to core mapping
- */
-static void print_node_to_core_map(void)
-{
-	int nid, cid;
-
-	if (!numa_debug_enabled)
-		return;
-	printk(KERN_DEBUG "NUMA node to core mapping\n");
-	for (nid = 0; nid < emu_nodes; nid++) {
-		printk(KERN_DEBUG "  node %3d: ", nid);
-		for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
-			if (emu_cores->to_node_id[cid] == nid)
-				printk(KERN_CONT "%d ", cid);
-		}
-		printk(KERN_CONT "\n");
-	}
-}
-
-static void pin_all_possible_cpus(void)
-{
-	int core_id, node_id, cpu;
-	static int initialized;
-
-	if (initialized)
-		return;
-	print_node_to_core_map();
-	node_id = 0;
-	for_each_possible_cpu(cpu) {
-		core_id = smp_get_base_cpu(cpu);
-		if (emu_cores->to_node_id[core_id] != NODE_ID_FREE)
-			continue;
-		pin_core_to_node(core_id, node_id);
-		cpu_topology[cpu].node_id = node_id;
-		node_id = (node_id + 1) % emu_nodes;
-	}
-	print_node_to_core_map();
-	initialized = 1;
-}
-
-/*
- * Transfer physical topology into a NUMA topology and modify CPU masks
- * according to the NUMA topology.
- *
- * Must be called with "sched_domains_mutex" lock held.
- */
-static void emu_update_cpu_topology(void)
-{
-	struct toptree *phys, *numa;
-
-	if (emu_cores == NULL)
-		create_core_to_node_map();
-	phys = toptree_from_topology();
-	numa = toptree_to_numa(phys);
-	toptree_free(phys);
-	toptree_to_topology(numa);
-	toptree_free(numa);
-	pin_all_possible_cpus();
-}
-
-/*
- * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
- * alignment (needed for memory hotplug).
- */
-static unsigned long emu_setup_size_adjust(unsigned long size)
-{
-	unsigned long size_new;
-
-	size = size ? : CONFIG_EMU_SIZE;
-	size_new = roundup(size, memory_block_size_bytes());
-	if (size_new == size)
-		return size;
-	pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n",
-		size >> 20, size_new >> 20);
-	return size_new;
-}
-
-/*
- * If we have not enough memory for the specified nodes, reduce the node count.
- */
-static int emu_setup_nodes_adjust(int nodes)
-{
-	int nodes_max;
-
-	nodes_max = memblock.memory.total_size / emu_size;
-	nodes_max = max(nodes_max, 1);
-	if (nodes_max >= nodes)
-		return nodes;
-	pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
-	return nodes_max;
-}
-
-/*
- * Early emu setup
- */
-static void emu_setup(void)
-{
-	int nid;
-
-	emu_size = emu_setup_size_adjust(emu_size);
-	emu_nodes = emu_setup_nodes_adjust(emu_nodes);
-	for (nid = 0; nid < emu_nodes; nid++)
-		node_set(nid, node_possible_map);
-	pr_info("Creating %d nodes with memory stripe size %ld MB\n",
-		emu_nodes, emu_size >> 20);
-}
-
-/*
- * Return node id for given page number
- */
-static int emu_pfn_to_nid(unsigned long pfn)
-{
-	return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
-}
-
-/*
- * Return stripe size
- */
-static unsigned long emu_align(void)
-{
-	return emu_size;
-}
-
-/*
- * Return distance between two nodes
- */
-static int emu_distance(int node1, int node2)
-{
-	return (node1 != node2) * EMU_NODE_DIST;
-}
-
-/*
- * Define callbacks for generic s390 NUMA infrastructure
- */
-const struct numa_mode numa_mode_emu = {
-	.name = "emu",
-	.setup = emu_setup,
-	.update_cpu_topology = emu_update_cpu_topology,
-	.__pfn_to_nid = emu_pfn_to_nid,
-	.align = emu_align,
-	.distance = emu_distance,
-};
-
-/*
- * Kernel parameter: emu_nodes=<n>
- */
-static int __init early_parse_emu_nodes(char *p)
-{
-	int count;
-
-	if (!p || kstrtoint(p, 0, &count) != 0 || count <= 0)
-		return 0;
-	emu_nodes = min(count, MAX_NUMNODES);
-	return 0;
-}
-early_param("emu_nodes", early_parse_emu_nodes);
-
-/*
- * Kernel parameter: emu_size=[<n>[k|M|G|T]]
- */
-static int __init early_parse_emu_size(char *p)
-{
-	if (p)
-		emu_size = memparse(p, NULL);
-	return 0;
-}
-early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index d2910fa834c8..51c5a9f6e525 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -7,165 +7,36 @@
  * Copyright IBM Corp. 2015
  */
 
-#define KMSG_COMPONENT "numa"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
 #include <linux/kernel.h>
 #include <linux/mmzone.h>
 #include <linux/cpumask.h>
 #include <linux/memblock.h>
-#include <linux/slab.h>
 #include <linux/node.h>
-
 #include <asm/numa.h>
-#include "numa_mode.h"
 
-pg_data_t *node_data[MAX_NUMNODES];
+struct pglist_data *node_data[MAX_NUMNODES];
 EXPORT_SYMBOL(node_data);
 
-cpumask_t node_to_cpumask_map[MAX_NUMNODES];
-EXPORT_SYMBOL(node_to_cpumask_map);
-
-static void plain_setup(void)
-{
-	node_set(0, node_possible_map);
-}
-
-const struct numa_mode numa_mode_plain = {
-	.name = "plain",
-	.setup = plain_setup,
-};
-
-static const struct numa_mode *mode = &numa_mode_plain;
-
-int numa_pfn_to_nid(unsigned long pfn)
-{
-	return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
-}
-
-void numa_update_cpu_topology(void)
-{
-	if (mode->update_cpu_topology)
-		mode->update_cpu_topology();
-}
-
-int __node_distance(int a, int b)
-{
-	return mode->distance ? mode->distance(a, b) : 0;
-}
-EXPORT_SYMBOL(__node_distance);
-
-int numa_debug_enabled;
-
-/*
- * numa_setup_memory() - Assign bootmem to nodes
- *
- * The memory is first added to memblock without any respect to nodes.
- * This is fixed before remaining memblock memory is handed over to the
- * buddy allocator.
- * An important side effect is that large bootmem allocations might easily
- * cross node boundaries, which can be needed for large allocations with
- * smaller memory stripes in each node (i.e. when using NUMA emulation).
- *
- * Memory defines nodes:
- * Therefore this routine also sets the nodes online with memory.
- */
-static void __init numa_setup_memory(void)
+void __init numa_setup(void)
 {
-	unsigned long cur_base, align, end_of_dram;
-	int nid = 0;
-
-	end_of_dram = memblock_end_of_DRAM();
-	align = mode->align ? mode->align() : ULONG_MAX;
-
-	/*
-	 * Step through all available memory and assign it to the nodes
-	 * indicated by the mode implementation.
-	 * All nodes which are seen here will be set online.
-	 */
-	cur_base = 0;
-	do {
-		nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
-		node_set_online(nid);
-		memblock_set_node(cur_base, align, &memblock.memory, nid);
-		cur_base += align;
-	} while (cur_base < end_of_dram);
+	int nid;
 
-	/* Allocate and fill out node_data */
+	nodes_clear(node_possible_map);
+	node_set(0, node_possible_map);
+	node_set_online(0);
 	for (nid = 0; nid < MAX_NUMNODES; nid++) {
 		NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
 		if (!NODE_DATA(nid))
 			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
 			      __func__, sizeof(pg_data_t), 8);
 	}
-
-	for_each_online_node(nid) {
-		unsigned long start_pfn, end_pfn;
-		unsigned long t_start, t_end;
-		int i;
-
-		start_pfn = ULONG_MAX;
-		end_pfn = 0;
-		for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
-			if (t_start < start_pfn)
-				start_pfn = t_start;
-			if (t_end > end_pfn)
-				end_pfn = t_end;
-		}
-		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
-		NODE_DATA(nid)->node_id = nid;
-	}
-}
-
-/*
- * numa_setup() - Earliest initialization
- *
- * Assign the mode and call the mode's setup routine.
- */
-void __init numa_setup(void)
-{
-	pr_info("NUMA mode: %s\n", mode->name);
-	nodes_clear(node_possible_map);
-	/* Initially attach all possible CPUs to node 0. */
-	cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
-	if (mode->setup)
-		mode->setup();
-	numa_setup_memory();
-	memblock_dump_all();
+	NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	NODE_DATA(0)->node_id = 0;
 }
 
-/*
- * numa_init_late() - Initialization initcall
- *
- * Register NUMA nodes.
- */
 static int __init numa_init_late(void)
 {
-	int nid;
-
-	for_each_online_node(nid)
-		register_one_node(nid);
+	register_one_node(0);
 	return 0;
 }
 arch_initcall(numa_init_late);
-
-static int __init parse_debug(char *parm)
-{
-	numa_debug_enabled = 1;
-	return 0;
-}
-early_param("numa_debug", parse_debug);
-
-static int __init parse_numa(char *parm)
-{
-	if (!parm)
-		return 1;
-	if (strcmp(parm, numa_mode_plain.name) == 0)
-		mode = &numa_mode_plain;
-#ifdef CONFIG_NUMA_EMU
-	if (strcmp(parm, numa_mode_emu.name) == 0)
-		mode = &numa_mode_emu;
-#endif
-	return 0;
-}
-early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
deleted file mode 100644
index dfd3e2784081..000000000000
--- a/arch/s390/numa/numa_mode.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * Define declarations used for communication between NUMA mode
- * implementations and NUMA core functionality.
- *
- * Copyright IBM Corp. 2015
- */
-#ifndef __S390_NUMA_MODE_H
-#define __S390_NUMA_MODE_H
-
-struct numa_mode {
-	char *name;				/* Name of mode */
-	void (*setup)(void);			/* Initizalize mode */
-	void (*update_cpu_topology)(void);	/* Called by topology code */
-	int (*__pfn_to_nid)(unsigned long pfn);	/* PFN to node ID */
-	unsigned long (*align)(void);		/* Minimum node alignment */
-	int (*distance)(int a, int b);		/* Distance between two nodes */
-};
-
-extern const struct numa_mode numa_mode_plain;
-extern const struct numa_mode numa_mode_emu;
-
-#endif /* __S390_NUMA_MODE_H */
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
deleted file mode 100644
index 71a608cd4f61..000000000000
--- a/arch/s390/numa/toptree.c
+++ /dev/null
@@ -1,351 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * A tree structure used for machine topology mangling
- *
- * Copyright IBM Corp. 2015
- */
-
-#include <linux/kernel.h>
-#include <linux/memblock.h>
-#include <linux/cpumask.h>
-#include <linux/list.h>
-#include <linux/list_sort.h>
-#include <linux/slab.h>
-#include <asm/numa.h>
-
-#include "toptree.h"
-
-/**
- * toptree_alloc - Allocate and initialize a new tree node.
- * @level: The node's vertical level; level 0 contains the leaves.
- * @id: ID number, explicitly not unique beyond scope of node's siblings
- *
- * Allocate a new tree node and initialize it.
- *
- * RETURNS:
- * Pointer to the new tree node or NULL on error
- */
-struct toptree __ref *toptree_alloc(int level, int id)
-{
-	struct toptree *res;
-
-	if (slab_is_available())
-		res = kzalloc(sizeof(*res), GFP_KERNEL);
-	else
-		res = memblock_alloc(sizeof(*res), 8);
-	if (!res)
-		return res;
-
-	INIT_LIST_HEAD(&res->children);
-	INIT_LIST_HEAD(&res->sibling);
-	cpumask_clear(&res->mask);
-	res->level = level;
-	res->id = id;
-	return res;
-}
-
-/**
- * toptree_remove - Remove a tree node from a tree
- * @cand: Pointer to the node to remove
- *
- * The node is detached from its parent node. The parent node's
- * masks will be updated to reflect the loss of the child.
- */
-static void toptree_remove(struct toptree *cand)
-{
-	struct toptree *oldparent;
-
-	list_del_init(&cand->sibling);
-	oldparent = cand->parent;
-	cand->parent = NULL;
-	toptree_update_mask(oldparent);
-}
-
-/**
- * toptree_free - discard a tree node
- * @cand: Pointer to the tree node to discard
- *
- * Checks if @cand is attached to a parent node. Detaches it
- * cleanly using toptree_remove. Possible children are freed
- * recursively. In the end @cand itself is freed.
- */
-void __ref toptree_free(struct toptree *cand)
-{
-	struct toptree *child, *tmp;
-
-	if (cand->parent)
-		toptree_remove(cand);
-	toptree_for_each_child_safe(child, tmp, cand)
-		toptree_free(child);
-	if (slab_is_available())
-		kfree(cand);
-	else
-		memblock_free_early((unsigned long)cand, sizeof(*cand));
-}
-
-/**
- * toptree_update_mask - Update node bitmasks
- * @cand: Pointer to a tree node
- *
- * The node's cpumask will be updated by combining all children's
- * masks. Then toptree_update_mask is called recursively for the
- * parent if applicable.
- *
- * NOTE:
- * This must not be called on leaves. If called on a leaf, its
- * CPU mask is cleared and lost.
- */
-void toptree_update_mask(struct toptree *cand)
-{
-	struct toptree *child;
-
-	cpumask_clear(&cand->mask);
-	list_for_each_entry(child, &cand->children, sibling)
-		cpumask_or(&cand->mask, &cand->mask, &child->mask);
-	if (cand->parent)
-		toptree_update_mask(cand->parent);
-}
-
-/**
- * toptree_insert - Insert a tree node into tree
- * @cand: Pointer to the node to insert
- * @target: Pointer to the node to which @cand will added as a child
- *
- * Insert a tree node into a tree. Masks will be updated automatically.
- *
- * RETURNS:
- * 0 on success, -1 if NULL is passed as argument or the node levels
- * don't fit.
- */
-static int toptree_insert(struct toptree *cand, struct toptree *target)
-{
-	if (!cand || !target)
-		return -1;
-	if (target->level != (cand->level + 1))
-		return -1;
-	list_add_tail(&cand->sibling, &target->children);
-	cand->parent = target;
-	toptree_update_mask(target);
-	return 0;
-}
-
-/**
- * toptree_move_children - Move all child nodes of a node to a new place
- * @cand: Pointer to the node whose children are to be moved
- * @target: Pointer to the node to which @cand's children will be attached
- *
- * Take all child nodes of @cand and move them using toptree_move.
- */
-static void toptree_move_children(struct toptree *cand, struct toptree *target)
-{
-	struct toptree *child, *tmp;
-
-	toptree_for_each_child_safe(child, tmp, cand)
-		toptree_move(child, target);
-}
-
-/**
- * toptree_unify - Merge children with same ID
- * @cand: Pointer to node whose direct children should be made unique
- *
- * When mangling the tree it is possible that a node has two or more children
- * which have the same ID. This routine merges these children into one and
- * moves all children of the merged nodes into the unified node.
- */
-void toptree_unify(struct toptree *cand)
-{
-	struct toptree *child, *tmp, *cand_copy;
-
-	/* Threads cannot be split, cores are not split */
-	if (cand->level < 2)
-		return;
-
-	cand_copy = toptree_alloc(cand->level, 0);
-	toptree_for_each_child_safe(child, tmp, cand) {
-		struct toptree *tmpchild;
-
-		if (!cpumask_empty(&child->mask)) {
-			tmpchild = toptree_get_child(cand_copy, child->id);
-			toptree_move_children(child, tmpchild);
-		}
-		toptree_free(child);
-	}
-	toptree_move_children(cand_copy, cand);
-	toptree_free(cand_copy);
-
-	toptree_for_each_child(child, cand)
-		toptree_unify(child);
-}
-
-/**
- * toptree_move - Move a node to another context
- * @cand: Pointer to the node to move
- * @target: Pointer to the node where @cand should go
- *
- * In the easiest case @cand is exactly on the level below @target
- * and will be immediately moved to the target.
- *
- * If @target's level is not the direct parent level of @cand,
- * nodes for the missing levels are created and put between
- * @cand and @target. The "stacking" nodes' IDs are taken from
- * @cand's parents.
- *
- * After this it is likely to have redundant nodes in the tree
- * which are addressed by means of toptree_unify.
- */
-void toptree_move(struct toptree *cand, struct toptree *target)
-{
-	struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
-
-	if (cand->level + 1 == target->level) {
-		toptree_remove(cand);
-		toptree_insert(cand, target);
-		return;
-	}
-
-	real_insert_point = NULL;
-	ptr = cand;
-	stack_target = NULL;
-
-	do {
-		tmp = stack_target;
-		stack_target = toptree_alloc(ptr->level + 1,
-					     ptr->parent->id);
-		toptree_insert(tmp, stack_target);
-		if (!real_insert_point)
-			real_insert_point = stack_target;
-		ptr = ptr->parent;
-	} while (stack_target->level < (target->level - 1));
-
-	toptree_remove(cand);
-	toptree_insert(cand, real_insert_point);
-	toptree_insert(stack_target, target);
-}
-
-/**
- * toptree_get_child - Access a tree node's child by its ID
- * @cand: Pointer to tree node whose child is to access
- * @id: The desired child's ID
- *
- * @cand's children are searched for a child with matching ID.
- * If no match can be found, a new child with the desired ID
- * is created and returned.
- */
-struct toptree *toptree_get_child(struct toptree *cand, int id)
-{
-	struct toptree *child;
-
-	toptree_for_each_child(child, cand)
-		if (child->id == id)
-			return child;
-	child = toptree_alloc(cand->level-1, id);
-	toptree_insert(child, cand);
-	return child;
-}
-
-/**
- * toptree_first - Find the first descendant on specified level
- * @context: Pointer to tree node whose descendants are to be used
- * @level: The level of interest
- *
- * RETURNS:
- * @context's first descendant on the specified level, or NULL
- * if there is no matching descendant
- */
-struct toptree *toptree_first(struct toptree *context, int level)
-{
-	struct toptree *child, *tmp;
-
-	if (context->level == level)
-		return context;
-
-	if (!list_empty(&context->children)) {
-		list_for_each_entry(child, &context->children, sibling) {
-			tmp = toptree_first(child, level);
-			if (tmp)
-				return tmp;
-		}
-	}
-	return NULL;
-}
-
-/**
- * toptree_next_sibling - Return next sibling
- * @cur: Pointer to a tree node
- *
- * RETURNS:
- * If @cur has a parent and is not the last in the parent's children list,
- * the next sibling is returned. Or NULL when there are no siblings left.
- */
-static struct toptree *toptree_next_sibling(struct toptree *cur)
-{
-	if (cur->parent == NULL)
-		return NULL;
-
-	if (cur == list_last_entry(&cur->parent->children,
-				   struct toptree, sibling))
-		return NULL;
-	return (struct toptree *) list_next_entry(cur, sibling);
-}
-
-/**
- * toptree_next - Tree traversal function
- * @cur: Pointer to current element
- * @context: Pointer to the root node of the tree or subtree to
- * be traversed.
- * @level: The level of interest.
- *
- * RETURNS:
- * Pointer to the next node on level @level
- * or NULL when there is no next node.
- */
-struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
-			     int level)
-{
-	struct toptree *cur_context, *tmp;
-
-	if (!cur)
-		return NULL;
-
-	if (context->level == level)
-		return NULL;
-
-	tmp = toptree_next_sibling(cur);
-	if (tmp != NULL)
-		return tmp;
-
-	cur_context = cur;
-	while (cur_context->level < context->level - 1) {
-		/* Step up */
-		cur_context = cur_context->parent;
-		/* Step aside */
-		tmp = toptree_next_sibling(cur_context);
-		if (tmp != NULL) {
-			/* Step down */
-			tmp = toptree_first(tmp, level);
-			if (tmp != NULL)
-				return tmp;
-		}
-	}
-	return NULL;
-}
-
-/**
- * toptree_count - Count descendants on specified level
- * @context: Pointer to node whose descendants are to be considered
- * @level: Only descendants on the specified level will be counted
- *
- * RETURNS:
- * Number of descendants on the specified level
- */
-int toptree_count(struct toptree *context, int level)
-{
-	struct toptree *cur;
-	int cnt = 0;
-
-	toptree_for_each(cur, context, level)
-		cnt++;
-	return cnt;
-}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
deleted file mode 100644
index 5246371ec713..000000000000
--- a/arch/s390/numa/toptree.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * A tree structure used for machine topology mangling
- *
- * Copyright IBM Corp. 2015
- */
-#ifndef S390_TOPTREE_H
-#define S390_TOPTREE_H
-
-#include <linux/cpumask.h>
-#include <linux/list.h>
-
-struct toptree {
-	int level;
-	int id;
-	cpumask_t mask;
-	struct toptree *parent;
-	struct list_head sibling;
-	struct list_head children;
-};
-
-struct toptree *toptree_alloc(int level, int id);
-void toptree_free(struct toptree *cand);
-void toptree_update_mask(struct toptree *cand);
-void toptree_unify(struct toptree *cand);
-struct toptree *toptree_get_child(struct toptree *cand, int id);
-void toptree_move(struct toptree *cand, struct toptree *target);
-int toptree_count(struct toptree *context, int level);
-
-struct toptree *toptree_first(struct toptree *context, int level);
-struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
-			     int level);
-
-#define toptree_for_each_child(child, ptree)				\
-	list_for_each_entry(child,  &ptree->children, sibling)
-
-#define toptree_for_each_child_safe(child, ptmp, ptree)			\
-	list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
-
-#define toptree_is_last(ptree)					\
-	((ptree->parent == NULL) ||				\
-	 (ptree->parent->children.prev == &ptree->sibling))
-
-#define toptree_for_each(ptree, cont, ttype)		\
-	for (ptree = toptree_first(cont, ttype);	\
-	     ptree != NULL;				\
-	     ptree = toptree_next(ptree, cont, ttype))
-
-#define toptree_for_each_safe(ptree, tmp, cont, ttype)		\
-	for (ptree = toptree_first(cont, ttype),		\
-		     tmp = toptree_next(ptree, cont, ttype);	\
-	     ptree != NULL;					\
-	     ptree = tmp,					\
-		     tmp = toptree_next(ptree, cont, ttype))
-
-#define toptree_for_each_sibling(ptree, start)			\
-	toptree_for_each(ptree, start->parent, start->level)
-
-#endif /* S390_TOPTREE_H */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 60716d18ce5a..94ca121933de 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -40,8 +40,9 @@
 static LIST_HEAD(zpci_list);
 static DEFINE_SPINLOCK(zpci_list_lock);
 
-static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
 static DEFINE_SPINLOCK(zpci_domain_lock);
+static unsigned int zpci_num_domains_allocated;
 
 #define ZPCI_IOMAP_ENTRIES						\
 	min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2),	\
@@ -607,57 +608,25 @@ void pcibios_disable_device(struct pci_dev *pdev)
 	zpci_debug_exit_device(zdev);
 }
 
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-static int zpci_restore(struct device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = to_zpci(pdev);
-	int ret = 0;
-
-	if (zdev->state != ZPCI_FN_STATE_ONLINE)
-		goto out;
-
-	ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
-	if (ret)
-		goto out;
-
-	zpci_map_resources(pdev);
-	zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-			   (u64) zdev->dma_table);
-
-out:
-	return ret;
-}
-
-static int zpci_freeze(struct device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = to_zpci(pdev);
-
-	if (zdev->state != ZPCI_FN_STATE_ONLINE)
-		return 0;
-
-	zpci_unregister_ioat(zdev, 0);
-	zpci_unmap_resources(pdev);
-	return clp_disable_fh(zdev);
-}
-
-struct dev_pm_ops pcibios_pm_ops = {
-	.thaw_noirq = zpci_restore,
-	.freeze_noirq = zpci_freeze,
-	.restore_noirq = zpci_restore,
-	.poweroff_noirq = zpci_freeze,
-};
-#endif /* CONFIG_HIBERNATE_CALLBACKS */
-
 static int zpci_alloc_domain(struct zpci_dev *zdev)
 {
+	spin_lock(&zpci_domain_lock);
+	if (zpci_num_domains_allocated > (ZPCI_NR_DEVICES - 1)) {
+		spin_unlock(&zpci_domain_lock);
+		pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+			zdev->fid, ZPCI_NR_DEVICES);
+		return -ENOSPC;
+	}
+
 	if (zpci_unique_uid) {
 		zdev->domain = (u16) zdev->uid;
-		if (zdev->domain >= ZPCI_NR_DEVICES)
-			return 0;
+		if (zdev->domain == 0) {
+			pr_warn("UID checking is active but no UID is set for PCI function %08x, so automatic domain allocation is used instead\n",
+				zdev->fid);
+			update_uid_checking(false);
+			goto auto_allocate;
+		}
 
-		spin_lock(&zpci_domain_lock);
 		if (test_bit(zdev->domain, zpci_domain)) {
 			spin_unlock(&zpci_domain_lock);
 			pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n",
@@ -665,30 +634,28 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
 			return -EEXIST;
 		}
 		set_bit(zdev->domain, zpci_domain);
+		zpci_num_domains_allocated++;
 		spin_unlock(&zpci_domain_lock);
 		return 0;
 	}
-
-	spin_lock(&zpci_domain_lock);
+auto_allocate:
+	/*
+	 * We can always auto allocate domains below ZPCI_NR_DEVICES.
+	 * There is either a free domain or we have reached the maximum in
+	 * which case we would have bailed earlier.
+	 */
 	zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
-	if (zdev->domain == ZPCI_NR_DEVICES) {
-		spin_unlock(&zpci_domain_lock);
-		pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n",
-			zdev->fid, ZPCI_NR_DEVICES);
-		return -ENOSPC;
-	}
 	set_bit(zdev->domain, zpci_domain);
+	zpci_num_domains_allocated++;
 	spin_unlock(&zpci_domain_lock);
 	return 0;
 }
 
 static void zpci_free_domain(struct zpci_dev *zdev)
 {
-	if (zdev->domain >= ZPCI_NR_DEVICES)
-		return;
-
 	spin_lock(&zpci_domain_lock);
 	clear_bit(zdev->domain, zpci_domain);
+	zpci_num_domains_allocated--;
 	spin_unlock(&zpci_domain_lock);
 }
 
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 0d3d8f170ea4..ea794ae755ae 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -24,7 +24,7 @@
 
 bool zpci_unique_uid;
 
-static void update_uid_checking(bool new)
+void update_uid_checking(bool new)
 {
 	if (zpci_unique_uid != new)
 		zpci_dbg(1, "uid checking:%d\n", new);