diff options
106 files changed, 894 insertions, 1000 deletions
@@ -95,6 +95,7 @@ Ben M Cahill <ben.m.cahill@intel.com> Ben Widawsky <bwidawsk@kernel.org> <ben@bwidawsk.net> Ben Widawsky <bwidawsk@kernel.org> <ben.widawsky@intel.com> Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com> +Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de> Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se> Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@linaro.org> Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@sonymobile.com> @@ -128,6 +129,7 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com> Christian Marangi <ansuelsmth@gmail.com> Christophe Ricard <christophe.ricard@gmail.com> Christoph Hellwig <hch@lst.de> +Claudiu Beznea <claudiu.beznea@tuxon.dev> <claudiu.beznea@microchip.com> Colin Ian King <colin.i.king@gmail.com> <colin.king@canonical.com> Corey Minyard <minyard@acm.org> Damian Hobson-Garcia <dhobsong@igel.co.jp> @@ -568,6 +570,7 @@ Takashi YOSHII <takashi.yoshii.zj@renesas.com> Tamizh Chelvam Raja <quic_tamizhr@quicinc.com> <tamizhr@codeaurora.org> Taniya Das <quic_tdas@quicinc.com> <tdas@codeaurora.org> Tejun Heo <htejun@gmail.com> +Tomeu Vizoso <tomeu@tomeuvizoso.net> <tomeu.vizoso@collabora.com> Thomas Graf <tgraf@suug.ch> Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu> Thomas Pedersen <twp@codeaurora.org> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 15eca804239e..b01cac05bd4c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1010,7 +1010,8 @@ static void __init reserve_crashkernel(void) total_mem = get_total_mem(); ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); + &crash_size, &crash_base, + NULL, NULL); /* invalid value specified or crashkernel=0 */ if (ret || !crash_size) return; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6062a52a084f..7b071a00425d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1498,6 +1498,9 @@ config ARCH_DEFAULT_KEXEC_IMAGE_VERIFY_SIG config ARCH_SUPPORTS_CRASH_DUMP def_bool y +config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + def_bool CRASH_CORE + config TRANS_TABLE def_bool y depends on HIBERNATION || KEXEC_CORE diff --git a/arch/arm64/include/asm/crash_core.h b/arch/arm64/include/asm/crash_core.h new file mode 100644 index 000000000000..9f5c8d339f44 --- /dev/null +++ b/arch/arm64/include/asm/crash_core.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ARM64_CRASH_CORE_H +#define _ARM64_CRASH_CORE_H + +/* Current arm64 boot protocol requires 2MB alignment */ +#define CRASH_ALIGN SZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) +#endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8deec68028ac..74c1db8ce271 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -65,15 +65,6 @@ EXPORT_SYMBOL(memstart_addr); */ phys_addr_t __ro_after_init arm64_dma_phys_limit; -/* Current arm64 boot protocol requires 2MB alignment */ -#define CRASH_ALIGN SZ_2M - -#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit -#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) -#define CRASH_HIGH_SEARCH_BASE SZ_4G - -#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) - /* * To make optimal use of block mappings when laying out the linear * mapping, round down the base of physical memory to a size that can @@ -101,140 +92,25 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; #define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) #endif -static int __init reserve_crashkernel_low(unsigned long long low_size) -{ - unsigned long long low_base; - - low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); - if (!low_base) { - pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); - return -ENOMEM; - } - - pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", - low_base, low_base + low_size, low_size >> 20); - - crashk_low_res.start = low_base; - crashk_low_res.end = low_base + low_size - 1; - insert_resource(&iomem_resource, &crashk_low_res); - - return 0; -} - -/* - * reserve_crashkernel() - reserves memory for crash kernel - * - * This function reserves memory area given in "crashkernel=" kernel command - * line parameter. The memory reserved is used by dump capture kernel when - * primary kernel is crashing. - */ -static void __init reserve_crashkernel(void) +static void __init arch_reserve_crashkernel(void) { - unsigned long long crash_low_size = 0, search_base = 0; - unsigned long long crash_max = CRASH_ADDR_LOW_MAX; + unsigned long long low_size = 0; unsigned long long crash_base, crash_size; char *cmdline = boot_command_line; - bool fixed_base = false; bool high = false; int ret; if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; - /* crashkernel=X[@offset] */ ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), - &crash_size, &crash_base); - if (ret == -ENOENT) { - ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); - if (ret || !crash_size) - return; - - /* - * crashkernel=Y,low can be specified or not, but invalid value - * is not allowed. - */ - ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); - if (ret == -ENOENT) - crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; - else if (ret) - return; - - search_base = CRASH_HIGH_SEARCH_BASE; - crash_max = CRASH_ADDR_HIGH_MAX; - high = true; - } else if (ret || !crash_size) { - /* The specified value is invalid */ + &crash_size, &crash_base, + &low_size, &high); + if (ret) return; - } - - crash_size = PAGE_ALIGN(crash_size); - - /* User specifies base address explicitly. */ - if (crash_base) { - fixed_base = true; - search_base = crash_base; - crash_max = crash_base + crash_size; - } - -retry: - crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, - search_base, crash_max); - if (!crash_base) { - /* - * For crashkernel=size[KMG]@offset[KMG], print out failure - * message if can't reserve the specified region. - */ - if (fixed_base) { - pr_warn("crashkernel reservation failed - memory is in use.\n"); - return; - } - - /* - * For crashkernel=size[KMG], if the first attempt was for - * low memory, fall back to high memory, the minimum required - * low memory will be reserved later. - */ - if (!high && crash_max == CRASH_ADDR_LOW_MAX) { - crash_max = CRASH_ADDR_HIGH_MAX; - search_base = CRASH_ADDR_LOW_MAX; - crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; - goto retry; - } - - /* - * For crashkernel=size[KMG],high, if the first attempt was - * for high memory, fall back to low memory. - */ - if (high && crash_max == CRASH_ADDR_HIGH_MAX) { - crash_max = CRASH_ADDR_LOW_MAX; - search_base = 0; - goto retry; - } - pr_warn("cannot allocate crashkernel (size:0x%llx)\n", - crash_size); - return; - } - - if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size && - reserve_crashkernel_low(crash_low_size)) { - memblock_phys_free(crash_base, crash_size); - return; - } - - pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", - crash_base, crash_base + crash_size, crash_size >> 20); - - /* - * The crashkernel memory will be removed from the kernel linear - * map. Inform kmemleak so that it won't try to access it. - */ - kmemleak_ignore_phys(crash_base); - if (crashk_low_res.end) - kmemleak_ignore_phys(crashk_low_res.start); - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - insert_resource(&iomem_resource, &crashk_res); + reserve_crashkernel_generic(cmdline, crash_size, crash_base, + low_size, high); } /* @@ -480,7 +356,7 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - reserve_crashkernel(); + arch_reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index aed65915e932..b35186f7b254 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -267,7 +267,9 @@ static void __init arch_parse_crashkernel(void) unsigned long long crash_base, crash_size; total_mem = memblock_phys_mem_size(); - ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base, + NULL, NULL); if (ret < 0 || crash_size <= 0) return; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index cb871eb784a7..08321c945ac4 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -460,7 +460,8 @@ static void __init mips_parse_crashkernel(void) total_mem = memblock_phys_mem_size(); ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); + &crash_size, &crash_base, + NULL, NULL); if (ret != 0 || crash_size <= 0) return; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 3ff2da7b120b..d14eda1e8589 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -313,7 +313,7 @@ static __init u64 fadump_calculate_reserve_size(void) * memory at a predefined offset. */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &size, &base); + &size, &base, NULL, NULL); if (ret == 0 && size > 0) { unsigned long max_size; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index de64c7962991..9346c960b296 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -109,7 +109,7 @@ void __init reserve_crashkernel(void) total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); /* use common parsing */ ret = parse_crashkernel(boot_command_line, total_mem_sz, - &crash_size, &crash_base); + &crash_size, &crash_base, NULL, NULL); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 2fb3edafe9ab..b4f2786a7d2b 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -178,7 +178,7 @@ static void __init get_crash_kernel(void *fdt, unsigned long size) int ret; ret = parse_crashkernel(boot_command_line, size, &crash_size, - &crash_base); + &crash_base, NULL, NULL); if (ret != 0 || crash_size == 0) return; if (crash_base == 0) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 9c48fecc6719..eaa15a20e6ae 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -693,6 +693,9 @@ config ARCH_SUPPORTS_KEXEC_PURGATORY config ARCH_SUPPORTS_CRASH_DUMP def_bool y +config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + def_bool CRASH_CORE + config COMPAT bool "Kernel support for 32-bit U-mode" default 64BIT diff --git a/arch/riscv/include/asm/crash_core.h b/arch/riscv/include/asm/crash_core.h new file mode 100644 index 000000000000..e1874b23feaf --- /dev/null +++ b/arch/riscv/include/asm/crash_core.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _RISCV_CRASH_CORE_H +#define _RISCV_CRASH_CORE_H + +#define CRASH_ALIGN PMD_SIZE + +#define CRASH_ADDR_LOW_MAX dma32_phys_limit +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() + +extern phys_addr_t memblock_end_of_DRAM(void); +#endif diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3e23e1786d05..441da1839c94 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -116,6 +116,8 @@ static inline void wait_for_interrupt(void) __asm__ __volatile__ ("wfi"); } +extern phys_addr_t dma32_phys_limit; + struct device_node; int riscv_of_processor_hartid(struct device_node *node, unsigned long *hartid); int riscv_early_of_processor_hartid(struct device_node *node, unsigned long *hartid); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0798bd861dcb..d9a4e8702864 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -65,7 +65,7 @@ extern char _start[]; void *_dtb_early_va __initdata; uintptr_t _dtb_early_pa __initdata; -static phys_addr_t dma32_phys_limit __initdata; +phys_addr_t dma32_phys_limit __initdata; static void __init zone_sizes_init(void) { @@ -1333,28 +1333,6 @@ static inline void setup_vm_final(void) } #endif /* CONFIG_MMU */ -/* Reserve 128M low memory by default for swiotlb buffer */ -#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) - -static int __init reserve_crashkernel_low(unsigned long long low_size) -{ - unsigned long long low_base; - - low_base = memblock_phys_alloc_range(low_size, PMD_SIZE, 0, dma32_phys_limit); - if (!low_base) { - pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); - return -ENOMEM; - } - - pr_info("crashkernel low memory reserved: 0x%016llx - 0x%016llx (%lld MB)\n", - low_base, low_base + low_size, low_size >> 20); - - crashk_low_res.start = low_base; - crashk_low_res.end = low_base + low_size - 1; - - return 0; -} - /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -1362,122 +1340,25 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) * line parameter. The memory reserved is used by dump capture kernel when * primary kernel is crashing. */ -static void __init reserve_crashkernel(void) +static void __init arch_reserve_crashkernel(void) { - unsigned long long crash_base = 0; - unsigned long long crash_size = 0; - unsigned long long crash_low_size = 0; - unsigned long search_start = memblock_start_of_DRAM(); - unsigned long search_end = (unsigned long)dma32_phys_limit; + unsigned long long low_size = 0; + unsigned long long crash_base, crash_size; char *cmdline = boot_command_line; - bool fixed_base = false; bool high = false; - - int ret = 0; + int ret; if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; - /* - * Don't reserve a region for a crash kernel on a crash kernel - * since it doesn't make much sense and we have limited memory - * resources. - */ - if (is_kdump_kernel()) { - pr_info("crashkernel: ignoring reservation request\n"); - return; - } ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), - &crash_size, &crash_base); - if (ret == -ENOENT) { - /* Fallback to crashkernel=X,[high,low] */ - ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); - if (ret || !crash_size) - return; - - /* - * crashkernel=Y,low is valid only when crashkernel=X,high - * is passed. - */ - ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); - if (ret == -ENOENT) - crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; - else if (ret) - return; - - search_start = (unsigned long)dma32_phys_limit; - search_end = memblock_end_of_DRAM(); - high = true; - } else if (ret || !crash_size) { - /* Invalid argument value specified */ + &crash_size, &crash_base, + &low_size, &high); + if (ret) return; - } - - crash_size = PAGE_ALIGN(crash_size); - - if (crash_base) { - fixed_base = true; - search_start = crash_base; - search_end = crash_base + crash_size; - } - - /* - * Current riscv boot protocol requires 2MB alignment for - * RV64 and 4MB alignment for RV32 (hugepage size) - * - * Try to alloc from 32bit addressible physical memory so that - * swiotlb can work on the crash kernel. - */ - crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, - search_start, search_end); - if (crash_base == 0) { - /* - * For crashkernel=size[KMG]@offset[KMG], print out failure - * message if can't reserve the specified region. - */ - if (fixed_base) { - pr_warn("crashkernel: allocating failed with given size@offset\n"); - return; - } - - if (high) { - /* - * For crashkernel=size[KMG],high, if the first attempt was - * for high memory, fall back to low memory. - */ - search_start = memblock_start_of_DRAM(); - search_end = (unsigned long)dma32_phys_limit; - } else { - /* - * For crashkernel=size[KMG], if the first attempt was for - * low memory, fall back to high memory, the minimum required - * low memory will be reserved later. - */ - search_start = (unsigned long)dma32_phys_limit; - search_end = memblock_end_of_DRAM(); - crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; - } - - crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, - search_start, search_end); - if (crash_base == 0) { - pr_warn("crashkernel: couldn't allocate %lldKB\n", - crash_size >> 10); - return; - } - } - - if ((crash_base >= dma32_phys_limit) && crash_low_size && - reserve_crashkernel_low(crash_low_size)) { - memblock_phys_free(crash_base, crash_size); - return; - } - - pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n", - crash_base, crash_base + crash_size, crash_size >> 20); - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; + reserve_crashkernel_generic(cmdline, crash_size, crash_base, + low_size, high); } void __init paging_init(void) @@ -1495,7 +1376,7 @@ void __init misc_mem_init(void) arch_numa_init(); sparse_init(); zone_sizes_init(); - reserve_crashkernel(); + arch_reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index de6ad0fb2328..e555b576d3c8 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -625,8 +625,8 @@ static void __init reserve_crashkernel(void) phys_addr_t low, high; int rc; - rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size, - &crash_base); + rc = parse_crashkernel(boot_command_line, ident_map_size, + &crash_size, &crash_base, NULL, NULL); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index 223c14f44af7..fa3a7b36190a 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -154,7 +154,7 @@ void __init reserve_crashkernel(void) int ret; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &crash_size, &crash_base); + &crash_size, &crash_base, NULL, NULL); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 433f5e1906d1..6a917f62eff2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2075,6 +2075,9 @@ config ARCH_SUPPORTS_CRASH_DUMP config ARCH_SUPPORTS_CRASH_HOTPLUG def_bool y +config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + def_bool CRASH_CORE + config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) default "0x1000000" diff --git a/arch/x86/include/asm/crash_core.h b/arch/x86/include/asm/crash_core.h new file mode 100644 index 000000000000..76af98f4e801 --- /dev/null +++ b/arch/x86/include/asm/crash_core.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_CRASH_CORE_H +#define _X86_CRASH_CORE_H + +/* 16M alignment for crash kernel regions */ +#define CRASH_ALIGN SZ_16M + +/* + * Keep the crash kernel below this limit. + * + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range + * due to mapping restrictions. + * + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is + * the upper limit of system RAM in 4-level paging mode. Since the kdump + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. + */ +extern unsigned long swiotlb_size_or_default(void); + +#ifdef CONFIG_X86_32 +# define CRASH_ADDR_LOW_MAX SZ_512M +# define CRASH_ADDR_HIGH_MAX SZ_512M +#else +# define CRASH_ADDR_LOW_MAX SZ_4G +# define CRASH_ADDR_HIGH_MAX SZ_64T +#endif + +# define DEFAULT_CRASH_KERNEL_LOW_SIZE crash_low_size_default() + +static inline unsigned long crash_low_size_default(void) +{ +#ifdef CONFIG_X86_64 + return max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); +#else + return 0; +#endif +} + +#endif /* _X86_CRASH_CORE_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ccd3ad29a1dc..163c35db3d04 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -466,154 +466,29 @@ static void __init memblock_x86_reserve_range_setup_data(void) } } -/* - * --------- Crashkernel reservation ------------------------------ - */ - -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGN SZ_16M - -/* - * Keep the crash kernel below this limit. - * - * Earlier 32-bits kernels would limit the kernel to the low 512 MB range - * due to mapping restrictions. - * - * 64-bit kdump kernels need to be restricted to be under 64 TB, which is - * the upper limit of system RAM in 4-level paging mode. Since the kdump - * jump could be from 5-level paging to 4-level paging, the jump will fail if - * the kernel is put above 64 TB, and during the 1st kernel bootup there's - * no good way to detect the paging mode of the target kernel which will be - * loaded for dumping. - */ -#ifdef CONFIG_X86_32 -# define CRASH_ADDR_LOW_MAX SZ_512M -# define CRASH_ADDR_HIGH_MAX SZ_512M -#else -# define CRASH_ADDR_LOW_MAX SZ_4G -# define CRASH_ADDR_HIGH_MAX SZ_64T -#endif - -static int __init reserve_crashkernel_low(void) +static void __init arch_reserve_crashkernel(void) { -#ifdef CONFIG_X86_64 - unsigned long long base, low_base = 0, low_size = 0; - unsigned long low_mem_limit; - int ret; - - low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX); - - /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, low_mem_limit, &low_size, &base); - if (ret) { - /* - * two parts from kernel/dma/swiotlb.c: - * -swiotlb size: user-specified with swiotlb= or default. - * - * -swiotlb overflow buffer: now hardcoded to 32k. We round it - * to 8M for other buffers that may need to stay low too. Also - * make sure we allocate enough extra low memory so that we - * don't run out of DMA buffers for 32-bit devices. - */ - low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); - } else { - /* passed with crashkernel=0,low ? */ - if (!low_size) - return 0; - } - - low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); - if (!low_base) { - pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", - (unsigned long)(low_size >> 20)); - return -ENOMEM; - } - - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n", - (unsigned long)(low_size >> 20), - (unsigned long)(low_base >> 20), - (unsigned long)(low_mem_limit >> 20)); - - crashk_low_res.start = low_base; - crashk_low_res.end = low_base + low_size - 1; - insert_resource(&iomem_resource, &crashk_low_res); -#endif - return 0; -} - -static void __init reserve_crashkernel(void) -{ - unsigned long long crash_size, crash_base, total_mem; + unsigned long long crash_base, crash_size, low_size = 0; + char *cmdline = boot_command_line; bool high = false; int ret; if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; - total_mem = memblock_phys_mem_size(); - - /* crashkernel=XM */ - ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); - if (ret != 0 || crash_size <= 0) { - /* crashkernel=X,high */ - ret = parse_crashkernel_high(boot_command_line, total_mem, - &crash_size, &crash_base); - if (ret != 0 || crash_size <= 0) - return; - high = true; - } + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + &crash_size, &crash_base, + &low_size, &high); + if (ret) + return; if (xen_pv_domain()) { pr_info("Ignoring crashkernel for a Xen PV domain\n"); return; } - /* 0 means: find the address automatically */ - if (!crash_base) { - /* - * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, - * crashkernel=x,high reserves memory over 4G, also allocates - * 256M extra low memory for DMA buffers and swiotlb. - * But the extra memory is not required for all machines. - * So try low memory first and fall back to high memory - * unless "crashkernel=size[KMG],high" is specified. - */ - if (!high) - crash_base = memblock_phys_alloc_range(crash_size, - CRASH_ALIGN, CRASH_ALIGN, - CRASH_ADDR_LOW_MAX); - if (!crash_base) - crash_base = memblock_phys_alloc_range(crash_size, - CRASH_ALIGN, CRASH_ALIGN, - CRASH_ADDR_HIGH_MAX); - if (!crash_base) { - pr_info("crashkernel reservation failed - No suitable area found.\n"); - return; - } - } else { - unsigned long long start; - - start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base, - crash_base + crash_size); - if (start != crash_base) { - pr_info("crashkernel reservation failed - memory is in use.\n"); - return; - } - } - - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { - memblock_phys_free(crash_base, crash_size); - return; - } - - pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crash_base >> 20), - (unsigned long)(total_mem >> 20)); - - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - insert_resource(&iomem_resource, &crashk_res); + reserve_crashkernel_generic(cmdline, crash_size, crash_base, + low_size, high); } static struct resource standard_io_resources[] = { @@ -1229,7 +1104,7 @@ void __init setup_arch(char **cmdline_p) * Reserve memory for crash kernel after SRAT is parsed so that it * won't consume hotpluggable memory. */ - reserve_crashkernel(); + arch_reserve_crashkernel(); memblock_find_dma_reserve(); diff --git a/block/bdev.c b/block/bdev.c index 2018d250e131..e4cfb7adb645 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -292,7 +292,7 @@ EXPORT_SYMBOL(thaw_bdev); */ static __cacheline_aligned_in_smp DEFINE_MUTEX(bdev_lock); -static struct kmem_cache * bdev_cachep __read_mostly; +static struct kmem_cache *bdev_cachep __ro_after_init; static struct inode *bdev_alloc_inode(struct super_block *sb) { @@ -361,13 +361,13 @@ static struct file_system_type bd_type = { .kill_sb = kill_anon_super, }; -struct super_block *blockdev_superblock __read_mostly; +struct super_block *blockdev_superblock __ro_after_init; EXPORT_SYMBOL_GPL(blockdev_superblock); void __init bdev_cache_init(void) { int err; - static struct vfsmount *bd_mnt; + static struct vfsmount *bd_mnt __ro_after_init; bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 689dc0d13b8f..8983e3a4fdf9 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -618,6 +618,5 @@ int ivpu_job_done_thread_init(struct ivpu_device *vdev) void ivpu_job_done_thread_fini(struct ivpu_device *vdev) { - kthread_stop(vdev->job_done_thread); - put_task_struct(vdev->job_done_thread); + kthread_stop_put(vdev->job_done_thread); } diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index c0979c8049b5..9c2a0c082a76 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -476,10 +476,9 @@ static int find_race(void *arg) for (i = 0; i < ncpus; i++) { int ret; - ret = kthread_stop(threads[i]); + ret = kthread_stop_put(threads[i]); if (ret && !err) err = ret; - put_task_struct(threads[i]); } kfree(threads); @@ -591,8 +590,7 @@ static int wait_forward(void *arg) for (i = 0; i < fc.chain_length; i++) dma_fence_signal(fc.fences[i]); - err = kthread_stop(tsk); - put_task_struct(tsk); + err = kthread_stop_put(tsk); err: fence_chains_fini(&fc); @@ -621,8 +619,7 @@ static int wait_backward(void *arg) for (i = fc.chain_length; i--; ) dma_fence_signal(fc.fences[i]); - err = kthread_stop(tsk); - put_task_struct(tsk); + err = kthread_stop_put(tsk); err: fence_chains_fini(&fc); @@ -669,8 +666,7 @@ static int wait_random(void *arg) for (i = 0; i < fc.chain_length; i++) dma_fence_signal(fc.fences[i]); - err = kthread_stop(tsk); - put_task_struct(tsk); + err = kthread_stop_put(tsk); err: fence_chains_fini(&fc); diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index fb6e0a6ae2c9..b7c6f7ea9e0c 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -548,11 +548,9 @@ static int race_signal_callback(void *arg) for (i = 0; i < ARRAY_SIZE(t); i++) { int err; - err = kthread_stop(t[i].task); + err = kthread_stop_put(t[i].task); if (err && !ret) ret = err; - - put_task_struct(t[i].task); } } diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 1a34cbe04fb6..3eff364ccf3a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -719,11 +719,9 @@ static int threaded_migrate(struct intel_migrate *migrate, if (IS_ERR_OR_NULL(tsk)) continue; - status = kthread_stop(tsk); + status = kthread_stop_put(tsk); if (status && !err) err = status; - - put_task_struct(tsk); } kfree(thread); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index db304f178136..7cff90aa8d24 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -671,8 +671,7 @@ err: static void xenvif_disconnect_queue(struct xenvif_queue *queue) { if (queue->task) { - kthread_stop(queue->task); - put_task_struct(queue->task); + kthread_stop_put(queue->task); queue->task = NULL; } diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index ccc5acb39f5a..d8437a98037b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3959,22 +3959,7 @@ static ssize_t debugfs_bist_linkrate_v3_hw_write(struct file *filp, return count; } - -static int debugfs_bist_linkrate_v3_hw_open(struct inode *inode, - struct file *filp) -{ - return single_open(filp, debugfs_bist_linkrate_v3_hw_show, - inode->i_private); -} - -static const struct file_operations debugfs_bist_linkrate_v3_hw_fops = { - .open = debugfs_bist_linkrate_v3_hw_open, - .read = seq_read, - .write = debugfs_bist_linkrate_v3_hw_write, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_bist_linkrate_v3_hw); static const struct { int value; @@ -4049,22 +4034,7 @@ static ssize_t debugfs_bist_code_mode_v3_hw_write(struct file *filp, return count; } - -static int debugfs_bist_code_mode_v3_hw_open(struct inode *inode, - struct file *filp) -{ - return single_open(filp, debugfs_bist_code_mode_v3_hw_show, - inode->i_private); -} - -static const struct file_operations debugfs_bist_code_mode_v3_hw_fops = { - .open = debugfs_bist_code_mode_v3_hw_open, - .read = seq_read, - .write = debugfs_bist_code_mode_v3_hw_write, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_bist_code_mode_v3_hw); static ssize_t debugfs_bist_phy_v3_hw_write(struct file *filp, const char __user *buf, @@ -4098,22 +4068,7 @@ static int debugfs_bist_phy_v3_hw_show(struct seq_file *s, void *p) return 0; } - -static int debugfs_bist_phy_v3_hw_open(struct inode *inode, - struct file *filp) -{ - return single_open(filp, debugfs_bist_phy_v3_hw_show, - inode->i_private); -} - -static const struct file_operations debugfs_bist_phy_v3_hw_fops = { - .open = debugfs_bist_phy_v3_hw_open, - .read = seq_read, - .write = debugfs_bist_phy_v3_hw_write, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_bist_phy_v3_hw); static ssize_t debugfs_bist_cnt_v3_hw_write(struct file *filp, const char __user *buf, @@ -4146,22 +4101,7 @@ static int debugfs_bist_cnt_v3_hw_show(struct seq_file *s, void *p) return 0; } - -static int debugfs_bist_cnt_v3_hw_open(struct inode *inode, - struct file *filp) -{ - return single_open(filp, debugfs_bist_cnt_v3_hw_show, - inode->i_private); -} - -static const struct file_operations debugfs_bist_cnt_v3_hw_ops = { - .open = debugfs_bist_cnt_v3_hw_open, - .read = seq_read, - .write = debugfs_bist_cnt_v3_hw_write, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_bist_cnt_v3_hw); static const struct { int value; @@ -4225,22 +4165,7 @@ static ssize_t debugfs_bist_mode_v3_hw_write(struct file *filp, return count; } - -static int debugfs_bist_mode_v3_hw_open(struct inode *inode, - struct file *filp) -{ - return single_open(filp, debugfs_bist_mode_v3_hw_show, - inode->i_private); -} - -static const struct file_operations debugfs_bist_mode_v3_hw_fops = { - .open = debugfs_bist_mode_v3_hw_open, - .read = seq_read, - .write = debugfs_bist_mode_v3_hw_write, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_bist_mode_v3_hw); static ssize_t debugfs_bist_enable_v3_hw_write(struct file *filp, const char __user *buf, @@ -4278,22 +4203,7 @@ static int debugfs_bist_enable_v3_hw_show(struct seq_file *s, void *p) return 0; } - -static int debugfs_bist_enable_v3_hw_open(struct inode *inode, - struct file *filp) -{ - return single_open(filp, debugfs_bist_enable_v3_hw_show, - inode->i_private); -} - -static const struct file_operations debugfs_bist_enable_v3_hw_fops = { - .open = debugfs_bist_enable_v3_hw_open, - .read = seq_read, - .write = debugfs_bist_enable_v3_hw_write, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_bist_enable_v3_hw); static const struct { char *name; @@ -4331,21 +4241,7 @@ static int debugfs_v3_hw_show(struct seq_file *s, void *p) return 0; } - -static int debugfs_v3_hw_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, debugfs_v3_hw_show, - inode->i_private); -} - -static const struct file_operations debugfs_v3_hw_fops = { - .open = debugfs_v3_hw_open, - .read = seq_read, - .write = debugfs_v3_hw_write, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_v3_hw); static ssize_t debugfs_phy_down_cnt_v3_hw_write(struct file *filp, const char __user *buf, @@ -4376,22 +4272,7 @@ static int debugfs_phy_down_cnt_v3_hw_show(struct seq_file *s, void *p) return 0; } - -static int debugfs_phy_down_cnt_v3_hw_open(struct inode *inode, - struct file *filp) -{ - return single_open(filp, debugfs_phy_down_cnt_v3_hw_show, - inode->i_private); -} - -static const struct file_operations debugfs_phy_down_cnt_v3_hw_fops = { - .open = debugfs_phy_down_cnt_v3_hw_open, - .read = seq_read, - .write = debugfs_phy_down_cnt_v3_hw_write, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_phy_down_cnt_v3_hw); enum fifo_dump_mode_v3_hw { FIFO_DUMP_FORVER = (1U << 0), @@ -4830,7 +4711,7 @@ static void debugfs_bist_init_v3_hw(struct hisi_hba *hisi_hba) hisi_hba, &debugfs_bist_phy_v3_hw_fops); debugfs_create_file("cnt", 0600, hisi_hba->debugfs_bist_dentry, - hisi_hba, &debugfs_bist_cnt_v3_hw_ops); + hisi_hba, &debugfs_bist_cnt_v3_hw_fops); debugfs_create_file("loopback_mode", 0600, hisi_hba->debugfs_bist_dentry, diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index a7a364760b80..55ff3d7482b3 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -528,51 +528,22 @@ qla_dfs_naqp_show(struct seq_file *s, void *unused) * * Example for creating "TEST" sysfs file: * 1. struct qla_hw_data { ... struct dentry *dfs_TEST; } - * 2. QLA_DFS_SETUP_RD(TEST, scsi_qla_host_t); + * 2. QLA_DFS_SETUP_RD(TEST); * 3. In qla2x00_dfs_setup(): * QLA_DFS_CREATE_FILE(ha, TEST, 0600, ha->dfs_dir, vha); * 4. In qla2x00_dfs_remove(): * QLA_DFS_REMOVE_FILE(ha, TEST); */ -#define QLA_DFS_SETUP_RD(_name, _ctx_struct) \ -static int \ -qla_dfs_##_name##_open(struct inode *inode, struct file *file) \ -{ \ - _ctx_struct *__ctx = inode->i_private; \ - \ - return single_open(file, qla_dfs_##_name##_show, __ctx); \ -} \ - \ -static const struct file_operations qla_dfs_##_name##_ops = { \ - .open = qla_dfs_##_name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -}; +#define QLA_DFS_SETUP_RD(_name) DEFINE_SHOW_ATTRIBUTE(qla_dfs_##_name) -#define QLA_DFS_SETUP_RW(_name, _ctx_struct) \ -static int \ -qla_dfs_##_name##_open(struct inode *inode, struct file *file) \ -{ \ - _ctx_struct *__ctx = inode->i_private; \ - \ - return single_open(file, qla_dfs_##_name##_show, __ctx); \ -} \ - \ -static const struct file_operations qla_dfs_##_name##_ops = { \ - .open = qla_dfs_##_name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ - .write = qla_dfs_##_name##_write, \ -}; +#define QLA_DFS_SETUP_RW(_name) DEFINE_SHOW_STORE_ATTRIBUTE(qla_dfs_##_name) #define QLA_DFS_ROOT_CREATE_FILE(_name, _perm, _ctx) \ do { \ if (!qla_dfs_##_name) \ qla_dfs_##_name = debugfs_create_file(#_name, \ _perm, qla2x00_dfs_root, _ctx, \ - &qla_dfs_##_name##_ops); \ + &qla_dfs_##_name##_fops); \ } while (0) #define QLA_DFS_ROOT_REMOVE_FILE(_name) \ @@ -587,7 +558,7 @@ static const struct file_operations qla_dfs_##_name##_ops = { \ do { \ (_struct)->dfs_##_name = debugfs_create_file(#_name, \ _perm, _parent, _ctx, \ - &qla_dfs_##_name##_ops) \ + &qla_dfs_##_name##_fops) \ } while (0) #define QLA_DFS_REMOVE_FILE(_struct, _name) \ @@ -598,14 +569,6 @@ static const struct file_operations qla_dfs_##_name##_ops = { \ } \ } while (0) -static int -qla_dfs_naqp_open(struct inode *inode, struct file *file) -{ - struct scsi_qla_host *vha = inode->i_private; - - return single_open(file, qla_dfs_naqp_show, vha); -} - static ssize_t qla_dfs_naqp_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) @@ -653,15 +616,7 @@ out_free: kfree(buf); return rc; } - -static const struct file_operations dfs_naqp_ops = { - .open = qla_dfs_naqp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = qla_dfs_naqp_write, -}; - +QLA_DFS_SETUP_RW(naqp); int qla2x00_dfs_setup(scsi_qla_host_t *vha) @@ -707,7 +662,7 @@ create_nodes: if (IS_QLA27XX(ha) || IS_QLA83XX(ha) || IS_QLA28XX(ha)) { ha->tgt.dfs_naqp = debugfs_create_file("naqp", - 0400, ha->dfs_dir, vha, &dfs_naqp_ops); + 0400, ha->dfs_dir, vha, &qla_dfs_naqp_fops); if (IS_ERR(ha->tgt.dfs_naqp)) { ql_log(ql_log_warn, vha, 0xd011, "Unable to create debugFS naqp node.\n"); diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index d8cbd2dfc2c2..282efca64a01 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -298,12 +298,6 @@ struct usbip_device { __k; \ }) -#define kthread_stop_put(k) \ - do { \ - kthread_stop(k); \ - put_task_struct(k); \ - } while (0) - /* usbip_common.c */ void usbip_dump_urb(struct urb *purb); void usbip_dump_header(struct usbip_header *pdu); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 24192a7667ed..d26222b7eefe 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -24,8 +24,8 @@ #include <linux/uaccess.h> -static struct vfsmount *anon_inode_mnt __read_mostly; -static struct inode *anon_inode_inode; +static struct vfsmount *anon_inode_mnt __ro_after_init; +static struct inode *anon_inode_inode __ro_after_init; /* * anon_inodefs_dname() is called from d_path(). diff --git a/fs/buffer.c b/fs/buffer.c index 657a62bab73d..967f34b70aa8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2983,13 +2983,13 @@ EXPORT_SYMBOL(try_to_free_buffers); /* * Buffer-head allocation */ -static struct kmem_cache *bh_cachep __read_mostly; +static struct kmem_cache *bh_cachep __ro_after_init; /* * Once the number of bh's in the machine exceeds this level, we start * stripping them in writeback. */ -static unsigned long max_buffer_heads; +static unsigned long max_buffer_heads __ro_after_init; int buffer_heads_over_limit; diff --git a/fs/char_dev.c b/fs/char_dev.c index 6ba032442b39..57cc096c498a 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -25,7 +25,7 @@ #include "internal.h" -static struct kobj_map *cdev_map; +static struct kobj_map *cdev_map __ro_after_init; static DEFINE_MUTEX(chrdevs_lock); diff --git a/fs/dcache.c b/fs/dcache.c index 796e23761ba0..c82ae731df9a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -78,7 +78,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); -static struct kmem_cache *dentry_cache __read_mostly; +static struct kmem_cache *dentry_cache __ro_after_init; const struct qstr empty_name = QSTR_INIT("", 0); EXPORT_SYMBOL(empty_name); @@ -96,9 +96,9 @@ EXPORT_SYMBOL(dotdot_name); * information, yet avoid using a prime hash-size or similar. */ -static unsigned int d_hash_shift __read_mostly; +static unsigned int d_hash_shift __ro_after_init; -static struct hlist_bl_head *dentry_hashtable __read_mostly; +static struct hlist_bl_head *dentry_hashtable __ro_after_init; static inline struct hlist_bl_head *d_hash(unsigned int hash) { @@ -3332,7 +3332,7 @@ static void __init dcache_init(void) } /* SLAB cache for __getname() consumers */ -struct kmem_cache *names_cachep __read_mostly; +struct kmem_cache *names_cachep __ro_after_init; EXPORT_SYMBOL(names_cachep); void __init vfs_caches_init_early(void) diff --git a/fs/direct-io.c b/fs/direct-io.c index 7bc494ee56b9..20533266ade6 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -151,7 +151,7 @@ struct dio { }; } ____cacheline_aligned_in_smp; -static struct kmem_cache *dio_cache __read_mostly; +static struct kmem_cache *dio_cache __ro_after_init; /* * How many pages are in the queue? diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1d9a71a0c4c1..2877cc01cff1 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -256,10 +256,10 @@ static u64 loop_check_gen = 0; static struct eventpoll *inserting_into; /* Slab cache used to allocate "struct epitem" */ -static struct kmem_cache *epi_cache __read_mostly; +static struct kmem_cache *epi_cache __ro_after_init; /* Slab cache used to allocate "struct eppoll_entry" */ -static struct kmem_cache *pwq_cache __read_mostly; +static struct kmem_cache *pwq_cache __ro_after_init; /* * List of files with newly added links, where we may need to limit the number @@ -271,7 +271,7 @@ struct epitems_head { }; static struct epitems_head *tfile_check_list = EP_UNACTIVE_PTR; -static struct kmem_cache *ephead_cache __read_mostly; +static struct kmem_cache *ephead_cache __ro_after_init; static inline void free_ephead(struct epitems_head *head) { diff --git a/fs/fcntl.c b/fs/fcntl.c index e871009f6c88..c80a6acad742 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -844,7 +844,7 @@ int send_sigurg(struct fown_struct *fown) } static DEFINE_SPINLOCK(fasync_lock); -static struct kmem_cache *fasync_cache __read_mostly; +static struct kmem_cache *fasync_cache __ro_after_init; static void fasync_free_rcu(struct rcu_head *head) { diff --git a/fs/file_table.c b/fs/file_table.c index fa92743ba6a9..de4a2915bfd4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -40,7 +40,7 @@ static struct files_stat_struct files_stat = { }; /* SLAB cache for file structures */ -static struct kmem_cache *filp_cachep __read_mostly; +static struct kmem_cache *filp_cachep __ro_after_init; static struct percpu_counter nr_files __cacheline_aligned_in_smp; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 33ca04733e93..ecf789b7168c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1126,8 +1126,7 @@ static int init_threads(struct gfs2_sbd *sdp) return 0; fail: - kthread_stop(sdp->sd_logd_process); - put_task_struct(sdp->sd_logd_process); + kthread_stop_put(sdp->sd_logd_process); sdp->sd_logd_process = NULL; return error; } @@ -1135,13 +1134,11 @@ fail: void gfs2_destroy_threads(struct gfs2_sbd *sdp) { if (sdp->sd_logd_process) { - kthread_stop(sdp->sd_logd_process); - put_task_struct(sdp->sd_logd_process); + kthread_stop_put(sdp->sd_logd_process); sdp->sd_logd_process = NULL; } if (sdp->sd_quotad_process) { - kthread_stop(sdp->sd_quotad_process); - put_task_struct(sdp->sd_quotad_process); + kthread_stop_put(sdp->sd_quotad_process); sdp->sd_quotad_process = NULL; } } diff --git a/fs/inode.c b/fs/inode.c index 4f8984b97df0..edcd8a61975f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -54,9 +54,9 @@ * inode_hash_lock */ -static unsigned int i_hash_mask __read_mostly; -static unsigned int i_hash_shift __read_mostly; -static struct hlist_head *inode_hashtable __read_mostly; +static unsigned int i_hash_mask __ro_after_init; +static unsigned int i_hash_shift __ro_after_init; +static struct hlist_head *inode_hashtable __ro_after_init; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); /* @@ -70,7 +70,7 @@ EXPORT_SYMBOL(empty_aops); static DEFINE_PER_CPU(unsigned long, nr_inodes); static DEFINE_PER_CPU(unsigned long, nr_unused); -static struct kmem_cache *inode_cachep __read_mostly; +static struct kmem_cache *inode_cachep __ro_after_init; static long get_nr_inodes(void) { diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 79b96e74a8a0..4628edde2e7e 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -21,8 +21,9 @@ #include "kernfs-internal.h" -struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; -struct kernfs_global_locks *kernfs_locks; +struct kmem_cache *kernfs_node_cache __ro_after_init; +struct kmem_cache *kernfs_iattrs_cache __ro_after_init; +struct kernfs_global_locks *kernfs_locks __ro_after_init; static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) { diff --git a/fs/locks.c b/fs/locks.c index d4e49a990a8d..46d88b9e222c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -167,8 +167,8 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); */ static DEFINE_SPINLOCK(blocked_lock_lock); -static struct kmem_cache *flctx_cache __read_mostly; -static struct kmem_cache *filelock_cache __read_mostly; +static struct kmem_cache *flctx_cache __ro_after_init; +static struct kmem_cache *filelock_cache __ro_after_init; static struct file_lock_context * locks_get_lock_context(struct inode *inode, int type) diff --git a/fs/namespace.c b/fs/namespace.c index 6bde71735efa..fbf0e596fcd3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -39,10 +39,10 @@ /* Maximum number of mounts in a mount namespace */ static unsigned int sysctl_mount_max __read_mostly = 100000; -static unsigned int m_hash_mask __read_mostly; -static unsigned int m_hash_shift __read_mostly; -static unsigned int mp_hash_mask __read_mostly; -static unsigned int mp_hash_shift __read_mostly; +static unsigned int m_hash_mask __ro_after_init; +static unsigned int m_hash_shift __ro_after_init; +static unsigned int mp_hash_mask __ro_after_init; +static unsigned int mp_hash_shift __ro_after_init; static __initdata unsigned long mhash_entries; static int __init set_mhash_entries(char *str) @@ -68,9 +68,9 @@ static u64 event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); -static struct hlist_head *mount_hashtable __read_mostly; -static struct hlist_head *mountpoint_hashtable __read_mostly; -static struct kmem_cache *mnt_cache __read_mostly; +static struct hlist_head *mount_hashtable __ro_after_init; +static struct hlist_head *mountpoint_hashtable __ro_after_init; +static struct kmem_cache *mnt_cache __ro_after_init; static DECLARE_RWSEM(namespace_sem); static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ @@ -86,7 +86,7 @@ struct mount_kattr { }; /* /sys/fs */ -struct kobject *fs_kobj; +struct kobject *fs_kobj __ro_after_init; EXPORT_SYMBOL_GPL(fs_kobj); /* diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 869b016014d2..1cb9ad7e884e 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -39,9 +39,9 @@ static void __init dnotify_sysctl_init(void) #define dnotify_sysctl_init() do { } while (0) #endif -static struct kmem_cache *dnotify_struct_cache __read_mostly; -static struct kmem_cache *dnotify_mark_cache __read_mostly; -static struct fsnotify_group *dnotify_group __read_mostly; +static struct kmem_cache *dnotify_struct_cache __ro_after_init; +static struct kmem_cache *dnotify_mark_cache __ro_after_init; +static struct fsnotify_group *dnotify_group __ro_after_init; /* * dnotify will attach one of these to each inode (i_fsnotify_marks) which diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 62fe0b679e58..45aecdc302f4 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -112,10 +112,10 @@ static void __init fanotify_sysctls_init(void) extern const struct fsnotify_ops fanotify_fsnotify_ops; -struct kmem_cache *fanotify_mark_cache __read_mostly; -struct kmem_cache *fanotify_fid_event_cachep __read_mostly; -struct kmem_cache *fanotify_path_event_cachep __read_mostly; -struct kmem_cache *fanotify_perm_event_cachep __read_mostly; +struct kmem_cache *fanotify_mark_cache __ro_after_init; +struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; +struct kmem_cache *fanotify_path_event_cachep __ro_after_init; +struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; #define FANOTIFY_EVENT_ALIGN 4 #define FANOTIFY_FID_INFO_HDR_LEN \ diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 1c4bfdab008d..a3809ae92170 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -49,7 +49,7 @@ /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; -struct kmem_cache *inotify_inode_mark_cachep __read_mostly; +struct kmem_cache *inotify_inode_mark_cachep __ro_after_init; #ifdef CONFIG_SYSCTL diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f0937902f7b4..91b32b2377ac 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -967,7 +967,14 @@ int ocfs2_num_free_extents(struct ocfs2_extent_tree *et) el = &eb->h_list; } - BUG_ON(el->l_tree_depth != 0); + if (el->l_tree_depth != 0) { + retval = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), + "Owner %llu has leaf extent block %llu with an invalid l_tree_depth of %u\n", + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), + (unsigned long long)last_eb_blk, + le16_to_cpu(el->l_tree_depth)); + goto bail; + } retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); bail: @@ -7642,7 +7649,7 @@ out_mutex: goto next_group; } out: - range->len = trimmed * sb->s_blocksize; + range->len = trimmed * osb->s_clustersize; return ret; } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 196638a22b48..cdb9b9bdea1f 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -158,7 +158,7 @@ read_failure: if (new_bh && bh) { /* If middle bh fails, let previous bh * finish its read and then put it to - * aovoid bh leak + * avoid bh leak */ if (!buffer_jbd(bh)) wait_on_buffer(bh); @@ -345,7 +345,7 @@ read_failure: if (new_bh && bh) { /* If middle bh fails, let previous bh * finish its read and then put it to - * aovoid bh leak + * avoid bh leak */ if (!buffer_jbd(bh)) wait_on_buffer(bh); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 9b57d012fd5c..85215162c9dd 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -80,8 +80,7 @@ static int param_set_dlmfs_capabilities(const char *val, static int param_get_dlmfs_capabilities(char *buffer, const struct kernel_param *kp) { - return strlcpy(buffer, DLMFS_CAPABILITIES, - strlen(DLMFS_CAPABILITIES) + 1); + return sysfs_emit(buffer, DLMFS_CAPABILITIES); } module_param_call(capabilities, param_set_dlmfs_capabilities, param_get_dlmfs_capabilities, NULL, 0444); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index ce215565d061..604fea3a26ff 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -90,7 +90,7 @@ enum ocfs2_replay_state { struct ocfs2_replay_map { unsigned int rm_slots; enum ocfs2_replay_state rm_state; - unsigned char rm_replay_slots[]; + unsigned char rm_replay_slots[] __counted_by(rm_slots); }; static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 681e9501cdd3..814733ba2f4b 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1597,6 +1597,10 @@ static int ocfs2_rename(struct mnt_idmap *idmap, if (update_dot_dot) { status = ocfs2_update_entry(old_inode, handle, &old_inode_dot_dot_res, new_dir); + if (status < 0) { + mlog_errno(status); + goto bail; + } drop_nlink(old_dir); if (new_inode) { drop_nlink(new_inode); @@ -1636,6 +1640,10 @@ static int ocfs2_rename(struct mnt_idmap *idmap, INODE_CACHE(old_dir), old_dir_bh, OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto bail; + } fe = (struct ocfs2_dinode *) old_dir_bh->b_data; ocfs2_set_links_count(fe, old_dir->i_nlink); ocfs2_journal_dirty(handle, old_dir_bh); diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index dfaae1e52412..e09842fc9d4d 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -1240,6 +1240,10 @@ int ocfs2_create_local_dquot(struct dquot *dquot) &od->dq_local_phys_blk, &pcount, NULL); + if (status < 0) { + mlog_errno(status); + goto out; + } /* Initialize dquot structure on disk */ status = ocfs2_local_write_dquot(dquot); diff --git a/fs/pipe.c b/fs/pipe.c index 8916c455a469..804a7d789452 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -864,7 +864,7 @@ void free_pipe_info(struct pipe_inode_info *pipe) kfree(pipe); } -static struct vfsmount *pipe_mnt __read_mostly; +static struct vfsmount *pipe_mnt __ro_after_init; /* * pipefs_dname() is called from d_path(). diff --git a/fs/proc/array.c b/fs/proc/array.c index 2c2efbe685d8..ff08a8957552 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -536,12 +536,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { - struct task_struct *t = task; - do { + struct task_struct *t; + + __for_each_thread(sig, t) { min_flt += t->min_flt; maj_flt += t->maj_flt; gtime += task_gtime(t); - } while_each_thread(task, t); + } min_flt += sig->min_flt; maj_flt += sig->maj_flt; diff --git a/fs/proc/base.c b/fs/proc/base.c index 83396ab14998..dd31e3b6bf77 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1153,11 +1153,10 @@ err_unlock: static ssize_t oom_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char buffer[PROC_NUMBUF]; + char buffer[PROC_NUMBUF] = {}; int oom_adj; int err; - memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) { @@ -1213,11 +1212,10 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char buffer[PROC_NUMBUF]; + char buffer[PROC_NUMBUF] = {}; int oom_score_adj; int err; - memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) { @@ -1358,13 +1356,13 @@ static ssize_t proc_fault_inject_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task; - char buffer[PROC_NUMBUF]; + char buffer[PROC_NUMBUF] = {}; int make_it_fail; int rv; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) @@ -1509,11 +1507,10 @@ sched_autogroup_write(struct file *file, const char __user *buf, { struct inode *inode = file_inode(file); struct task_struct *p; - char buffer[PROC_NUMBUF]; + char buffer[PROC_NUMBUF] = {}; int nice; int err; - memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) @@ -1666,10 +1663,9 @@ static ssize_t comm_write(struct file *file, const char __user *buf, { struct inode *inode = file_inode(file); struct task_struct *p; - char buffer[TASK_COMM_LEN]; + char buffer[TASK_COMM_LEN] = {}; const size_t maxlen = sizeof(buffer) - 1; - memset(buffer, 0, sizeof(buffer)); if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) return -EFAULT; @@ -2976,8 +2972,7 @@ static const struct file_operations proc_coredump_filter_operations = { #ifdef CONFIG_TASK_IO_ACCOUNTING static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) { - struct task_io_accounting acct = task->ioac; - unsigned long flags; + struct task_io_accounting acct; int result; result = down_read_killable(&task->signal->exec_update_lock); @@ -2989,15 +2984,28 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh goto out_unlock; } - if (whole && lock_task_sighand(task, &flags)) { - struct task_struct *t = task; + if (whole) { + struct signal_struct *sig = task->signal; + struct task_struct *t; + unsigned int seq = 1; + unsigned long flags; + + rcu_read_lock(); + do { + seq++; /* 2 on the 1st/lockless path, otherwise odd */ + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); - task_io_accounting_add(&acct, &task->signal->ioac); - while_each_thread(task, t) - task_io_accounting_add(&acct, &t->ioac); + acct = sig->ioac; + __for_each_thread(sig, t) + task_io_accounting_add(&acct, &t->ioac); - unlock_task_sighand(task, &flags); + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + rcu_read_unlock(); + } else { + acct = task->ioac; } + seq_printf(m, "rchar: %llu\n" "wchar: %llu\n" @@ -3818,7 +3826,7 @@ static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, for_each_thread(task, pos) { if (!nr--) goto found; - }; + } fail: pos = NULL; goto out; @@ -3840,10 +3848,8 @@ static struct task_struct *next_tid(struct task_struct *start) struct task_struct *pos = NULL; rcu_read_lock(); if (pid_alive(start)) { - pos = next_thread(start); - if (thread_group_leader(pos)) - pos = NULL; - else + pos = __next_thread(start); + if (pos) get_task_struct(pos); } rcu_read_unlock(); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 592ed2516f47..b33e490e3fd9 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -110,18 +110,15 @@ void __init proc_init_kmemcache(void) void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock) { - struct inode *inode; - struct proc_inode *ei; struct hlist_node *node; struct super_block *old_sb = NULL; rcu_read_lock(); - for (;;) { + while ((node = hlist_first_rcu(inodes))) { + struct proc_inode *ei = hlist_entry(node, struct proc_inode, sibling_inodes); struct super_block *sb; - node = hlist_first_rcu(inodes); - if (!node) - break; - ei = hlist_entry(node, struct proc_inode, sibling_inodes); + struct inode *inode; + spin_lock(lock); hlist_del_init_rcu(&ei->sibling_inodes); spin_unlock(lock); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4abd51053f76..ef2eb12906da 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -851,9 +851,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, static int show_smap(struct seq_file *m, void *v) { struct vm_area_struct *vma = v; - struct mem_size_stats mss; - - memset(&mss, 0, sizeof(mss)); + struct mem_size_stats mss = {}; smap_gather_stats(vma, &mss, 0); @@ -879,7 +877,7 @@ static int show_smap(struct seq_file *m, void *v) static int show_smaps_rollup(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; - struct mem_size_stats mss; + struct mem_size_stats mss = {}; struct mm_struct *mm = priv->mm; struct vm_area_struct *vma; unsigned long vma_start = 0, last_vma_end = 0; @@ -895,8 +893,6 @@ static int show_smaps_rollup(struct seq_file *m, void *v) goto out_put_task; } - memset(&mss, 0, sizeof(mss)); - ret = mmap_read_lock_killable(mm); if (ret) goto out_put_mm; @@ -1248,14 +1244,13 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; - char buffer[PROC_NUMBUF]; + char buffer[PROC_NUMBUF] = {}; struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; int itype; int rv; - memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ac616cfbacf5..e8af40b05549 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -49,7 +49,7 @@ static struct ctl_table vm_userfaultfd_table[] = { }; #endif -static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; +static struct kmem_cache *userfaultfd_ctx_cachep __ro_after_init; /* * Start with fault_pending_wqh and fault_wqh so they're more likely diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 9b673fefcef8..ddab1ef22bee 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -14,11 +14,6 @@ #undef __cleanup #define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) -/* same as gcc, this was present in clang-2.6 so we can assume it works - * with any version that can compile the kernel - */ -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7af9e34ec261..2ceba3fe4ec1 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -39,8 +39,6 @@ #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24f..bb1339c7057b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -177,10 +177,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __asm__ ("" : "=r" (var) : "0" (var)) #endif -/* Not-quite-unique ID. */ -#ifndef __UNIQUE_ID -# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) -#endif +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) /** * data_race - mark an expression as containing intentional data races @@ -231,6 +228,14 @@ static inline void *offset_to_ptr(const int *off) #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) /* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +/* * Whether 'type' is a signed type or an unsigned type. Supports scalar types, * bool and also pointer types. */ diff --git a/include/linux/const.h b/include/linux/const.h index 435ddd72d2c4..81b8aae5a855 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -3,12 +3,4 @@ #include <vdso/const.h> -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - #endif /* _LINUX_CONST_H */ diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 08704c29fdb4..5126a4fecb44 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -5,6 +5,14 @@ #include <linux/linkage.h> #include <linux/elfcore.h> #include <linux/elf.h> +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#include <asm/crash_core.h> +#endif + +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; #define CRASH_CORE_NOTE_NAME "CORE" #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) @@ -79,12 +87,43 @@ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); + unsigned long long *crash_size, unsigned long long *crash_base, + unsigned long long *low_size, bool *high); + +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#ifndef CRASH_ALIGN +#define CRASH_ALIGN SZ_2M +#endif +#ifndef CRASH_ADDR_LOW_MAX +#define CRASH_ADDR_LOW_MAX SZ_4G +#endif +#ifndef CRASH_ADDR_HIGH_MAX +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() +#endif + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); +#else +static inline void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{} +#endif /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 1e7711185ec6..79ef6ac4c021 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -639,7 +639,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, __q_size_field, #op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ - "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ + "field \"" #p "\" at " FILE_LINE, \ __p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 32c78078552c..8227455192b7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -22,10 +22,6 @@ #include <uapi/linux/kexec.h> #include <linux/verification.h> -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; extern note_buf_t __percpu *crash_notes; #ifdef CONFIG_KEXEC_CORE diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h index 529974e22ea7..7fcf29a4e0de 100644 --- a/include/linux/kstrtox.h +++ b/include/linux/kstrtox.h @@ -147,9 +147,4 @@ extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); -static inline int strtobool(const char *s, bool *res) -{ - return kstrtobool(s, res); -} - #endif /* _LINUX_KSTRTOX_H */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2c30ade43bc8..b11f53c1ba2e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -86,6 +86,7 @@ void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); +int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); bool kthread_should_stop_or_park(void); diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 83aebc244cba..2ec559284a9f 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,60 +2,77 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include <linux/build_bug.h> +#include <linux/compiler.h> #include <linux/const.h> #include <linux/types.h> /* * min()/max()/clamp() macros must accomplish three things: * - * - avoid multiple evaluations of the arguments (so side-effects like + * - Avoid multiple evaluations of the arguments (so side-effects like * "x++" happen only once) when non-constant. - * - perform strict type-checking (to generate warnings instead of - * nasty runtime surprises). See the "unnecessary" pointer comparison - * in __typecheck(). - * - retain result as a constant expressions when called with only + * - Retain result as a constant expressions when called with only * constant expressions (to avoid tripping VLA warnings in stack * allocation usage). + * - Perform signed v unsigned type-checking (to generate compile + * errors instead of nasty runtime surprises). + * - Unsigned char/short are always promoted to signed int and can be + * compared against signed or unsigned arguments. + * - Unsigned arguments can be compared against non-negative signed constants. + * - Comparison of a signed argument against an unsigned constant fails + * even if the constant is below __INT_MAX__ and could be cast to int. */ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -#define __no_side_effects(x, y) \ - (__is_constexpr(x) && __is_constexpr(y)) +/* is_signed_type() isn't a constexpr for pointer types */ +#define __is_signed(x) \ + __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ + is_signed_type(typeof(x)), 0) -#define __safe_cmp(x, y) \ - (__typecheck(x, y) && __no_side_effects(x, y)) +/* True for a non-negative signed int constant */ +#define __is_noneg_int(x) \ + (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) -#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) +#define __types_ok(x, y) \ + (__is_signed(x) == __is_signed(y) || \ + __is_signed((x) + 0) == __is_signed((y) + 0) || \ + __is_noneg_int(x) || __is_noneg_int(y)) -#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - __cmp(unique_x, unique_y, op); }) +#define __cmp_op_min < +#define __cmp_op_max > -#define __careful_cmp(x, y, op) \ - __builtin_choose_expr(__safe_cmp(x, y), \ - __cmp(x, y, op), \ - __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) +#define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y)) + +#define __cmp_once(op, x, y, unique_x, unique_y) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + static_assert(__types_ok(x, y), \ + #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ + __cmp(op, unique_x, unique_y); }) + +#define __careful_cmp(op, x, y) \ + __builtin_choose_expr(__is_constexpr((x) - (y)), \ + __cmp(op, x, y), \ + __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y))) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) -#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ - typeof(val) unique_val = (val); \ - typeof(lo) unique_lo = (lo); \ - typeof(hi) unique_hi = (hi); \ - __clamp(unique_val, unique_lo, unique_hi); }) - -#define __clamp_input_check(lo, hi) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((lo) > (hi)), (lo) > (hi), false))) +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ + typeof(val) unique_val = (val); \ + typeof(lo) unique_lo = (lo); \ + typeof(hi) unique_hi = (hi); \ + static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ + (lo) <= (hi), true), \ + "clamp() low limit " #lo " greater than high limit " #hi); \ + static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ + static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ + __clamp(unique_val, unique_lo, unique_hi); }) #define __careful_clamp(val, lo, hi) ({ \ - __clamp_input_check(lo, hi) + \ - __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) && \ - __typecheck(hi, lo) && __is_constexpr(val) && \ - __is_constexpr(lo) && __is_constexpr(hi), \ + __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ __clamp(val, lo, hi), \ __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) @@ -65,14 +82,31 @@ * @x: first value * @y: second value */ -#define min(x, y) __careful_cmp(x, y, <) +#define min(x, y) __careful_cmp(min, x, y) /** * max - return maximum of two values of the same or compatible types * @x: first value * @y: second value */ -#define max(x, y) __careful_cmp(x, y, >) +#define max(x, y) __careful_cmp(max, x, y) + +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp(min, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) /** * min3 - return minimum of three values @@ -124,7 +158,7 @@ * @x: first value * @y: second value */ -#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) +#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y)) /** * max_t - return maximum of two values, using the specified type @@ -132,28 +166,7 @@ * @x: first value * @y: second value */ -#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) - -/* - * Remove a const qualifier from integer types - * _Generic(foo, type-name: association, ..., default: association) performs a - * comparison against the foo type (not the qualified type). - * Do not use the const keyword in the type-name as it will not match the - * unqualified type of foo. - */ -#define __unconst_integer_type_cases(type) \ - unsigned type: (unsigned type)0, \ - signed type: (signed type)0 - -#define __unconst_integer_typeof(x) typeof( \ - _Generic((x), \ - char: (char)0, \ - __unconst_integer_type_cases(char), \ - __unconst_integer_type_cases(short), \ - __unconst_integer_type_cases(int), \ - __unconst_integer_type_cases(long), \ - __unconst_integer_type_cases(long long), \ - default: (x))) +#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y)) /* * Do not check the array parameter using __must_be_array(). @@ -169,13 +182,13 @@ * 'int *buff' and 'int buff[N]' types. * * The array can be an array of const items. - * typeof() keeps the const qualifier. Use __unconst_integer_typeof() in order + * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order * to discard the const qualifier for the __element variable. */ #define __minmax_array(op, array, len) ({ \ typeof(&(array)[0]) __array = (array); \ typeof(len) __len = (len); \ - __unconst_integer_typeof(__array[0]) __element = __array[--__len]; \ + __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\ while (__len--) \ __element = op(__element, __array[__len]); \ __element; }) diff --git a/include/linux/sched.h b/include/linux/sched.h index b49ca40f6335..292c31697248 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1007,7 +1007,6 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; - struct list_head thread_group; struct list_head thread_node; struct completion *vfork_done; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 3b28cff24cc1..3499c1a8b929 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -707,15 +707,26 @@ bool same_thread_group(struct task_struct *p1, struct task_struct *p2) return p1->signal == p2->signal; } -static inline struct task_struct *next_thread(const struct task_struct *p) +/* + * returns NULL if p is the last thread in the thread group + */ +static inline struct task_struct *__next_thread(struct task_struct *p) +{ + return list_next_or_null_rcu(&p->signal->thread_head, + &p->thread_node, + struct task_struct, + thread_node); +} + +static inline struct task_struct *next_thread(struct task_struct *p) { - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); + return __next_thread(p) ?: p->group_leader; } static inline int thread_group_empty(struct task_struct *p) { - return list_empty(&p->thread_group); + return thread_group_leader(p) && + list_is_last(&p->thread_node, &p->signal->thread_head); } #define delay_group_leader(p) \ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 386ab580b839..234bcdb1fba4 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -207,6 +207,21 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } +#define DEFINE_SHOW_STORE_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .write = __name ## _write, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ diff --git a/include/linux/stringify.h b/include/linux/stringify.h index 841cec8ed525..0e84cbe65270 100644 --- a/include/linux/stringify.h +++ b/include/linux/stringify.h @@ -9,4 +9,6 @@ #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x) +#define FILE_LINE __FILE__ ":" __stringify(__LINE__) + #endif /* !__LINUX_STRINGIFY_H */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 9162f275819a..26a545bb0153 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -77,8 +77,7 @@ struct timer_list { .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .flags = (_flags), \ - __TIMER_LOCKDEP_MAP_INITIALIZER( \ - __FILE__ ":" __stringify(__LINE__)) \ + __TIMER_LOCKDEP_MAP_INITIALIZER(FILE_LINE) \ } #define DEFINE_TIMER(_name, _function) \ diff --git a/init/init_task.c b/init/init_task.c index f703116e0523..5727d42149c3 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -133,7 +133,6 @@ struct task_struct init_task .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), .timer_slack_ns = 50000, /* 50 usec default slack */ .thread_pid = &init_struct_pid, - .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), #ifdef CONFIG_AUDIT .loginuid = INVALID_UID, diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 9bfe68fe9676..7aff28ded2f4 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -110,7 +110,7 @@ config CRASH_DUMP For more details see Documentation/admin-guide/kdump/kdump.rst For s390, this option also enables zfcpdump. - See also <file:Documentation/s390/zfcpdump.rst> + See also <file:Documentation/arch/s390/zfcpdump.rst> config CRASH_HOTPLUG bool "Update the crash elfcorehdr on system configuration changes" diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 85a5b306733b..1b07e6f12a07 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -87,8 +87,8 @@ static struct task_struct *prune_thread; * that makes a difference. Some. */ -static struct fsnotify_group *audit_tree_group; -static struct kmem_cache *audit_tree_mark_cachep __read_mostly; +static struct fsnotify_group *audit_tree_group __ro_after_init; +static struct kmem_cache *audit_tree_mark_cachep __ro_after_init; static struct audit_tree *alloc_tree(const char *s) { diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 2f675ef045d4..efe87d501c8c 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -5,7 +5,6 @@ */ #include <linux/buildid.h> -#include <linux/crash_core.h> #include <linux/init.h> #include <linux/utsname.h> #include <linux/vmalloc.h> @@ -13,6 +12,9 @@ #include <linux/kexec.h> #include <linux/memory.h> #include <linux/cpuhotplug.h> +#include <linux/memblock.h> +#include <linux/kexec.h> +#include <linux/kmemleak.h> #include <asm/page.h> #include <asm/sections.h> @@ -33,6 +35,22 @@ u32 *vmcoreinfo_note; /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ static unsigned char *vmcoreinfo_data_safecopy; +/* Location of the reserved area for the crash kernel */ +struct resource crashk_res = { + .name = "Crash kernel", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_CRASH_KERNEL +}; +struct resource crashk_low_res = { + .name = "Crash kernel", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_CRASH_KERNEL +}; + /* * parsing the "crashkernel" commandline * @@ -248,11 +266,11 @@ static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - const char *name, const char *suffix) { char *first_colon, *first_space; char *ck_cmdline; + char *name = "crashkernel="; BUG_ON(!crash_size || !crash_base); *crash_size = 0; @@ -283,32 +301,53 @@ static int __init __parse_crashkernel(char *cmdline, /* * That function is the entry point for command line parsing and should be * called from the arch-specific code. + * + * If crashkernel=,high|low is supported on architecture, non-NULL values + * should be passed to parameters 'low_size' and 'high'. */ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, - unsigned long long *crash_base) + unsigned long long *crash_base, + unsigned long long *low_size, + bool *high) { - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel=", NULL); -} + int ret; -int __init parse_crashkernel_high(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel=", suffix_tbl[SUFFIX_HIGH]); -} + /* crashkernel=X[@offset] */ + ret = __parse_crashkernel(cmdline, system_ram, crash_size, + crash_base, NULL); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + /* + * If non-NULL 'high' passed in and no normal crashkernel + * setting detected, try parsing crashkernel=,high|low. + */ + if (high && ret == -ENOENT) { + ret = __parse_crashkernel(cmdline, 0, crash_size, + crash_base, suffix_tbl[SUFFIX_HIGH]); + if (ret || !*crash_size) + return -EINVAL; -int __init parse_crashkernel_low(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel=", suffix_tbl[SUFFIX_LOW]); + /* + * crashkernel=Y,low can be specified or not, but invalid value + * is not allowed. + */ + ret = __parse_crashkernel(cmdline, 0, low_size, + crash_base, suffix_tbl[SUFFIX_LOW]); + if (ret == -ENOENT) { + *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; + ret = 0; + } else if (ret) { + return ret; + } + + *high = true; + } +#endif + if (!*crash_size) + ret = -EINVAL; + + return ret; } /* @@ -321,6 +360,109 @@ static int __init parse_crashkernel_dummy(char *arg) } early_param("crashkernel", parse_crashkernel_dummy); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +static int __init reserve_crashkernel_low(unsigned long long low_size) +{ +#ifdef CONFIG_64BIT + unsigned long long low_base; + + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); + if (!low_base) { + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); + return -ENOMEM; + } + + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", + low_base, low_base + low_size, low_size >> 20); + + crashk_low_res.start = low_base; + crashk_low_res.end = low_base + low_size - 1; + insert_resource(&iomem_resource, &crashk_low_res); +#endif + return 0; +} + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{ + unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; + bool fixed_base = false; + + /* User specifies base address explicitly. */ + if (crash_base) { + fixed_base = true; + search_base = crash_base; + search_end = crash_base + crash_size; + } else if (high) { + search_base = CRASH_ADDR_LOW_MAX; + search_end = CRASH_ADDR_HIGH_MAX; + } + +retry: + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, + search_base, search_end); + if (!crash_base) { + /* + * For crashkernel=size[KMG]@offset[KMG], print out failure + * message if can't reserve the specified region. + */ + if (fixed_base) { + pr_warn("crashkernel reservation failed - memory is in use.\n"); + return; + } + + /* + * For crashkernel=size[KMG], if the first attempt was for + * low memory, fall back to high memory, the minimum required + * low memory will be reserved later. + */ + if (!high && search_end == CRASH_ADDR_LOW_MAX) { + search_end = CRASH_ADDR_HIGH_MAX; + search_base = CRASH_ADDR_LOW_MAX; + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; + goto retry; + } + + /* + * For crashkernel=size[KMG],high, if the first attempt was + * for high memory, fall back to low memory. + */ + if (high && search_end == CRASH_ADDR_HIGH_MAX) { + search_end = CRASH_ADDR_LOW_MAX; + search_base = 0; + goto retry; + } + pr_warn("cannot allocate crashkernel (size:0x%llx)\n", + crash_size); + return; + } + + if ((crash_base > CRASH_ADDR_LOW_MAX) && + crash_low_size && reserve_crashkernel_low(crash_low_size)) { + memblock_phys_free(crash_base, crash_size); + return; + } + + pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", + crash_base, crash_base + crash_size, crash_size >> 20); + + /* + * The crashkernel memory will be removed from the kernel linear + * map. Inform kmemleak so that it won't try to access it. + */ + kmemleak_ignore_phys(crash_base); + if (crashk_low_res.end) + kmemleak_ignore_phys(crashk_low_res.start); + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); +} +#endif + int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { diff --git a/kernel/exit.c b/kernel/exit.c index 61ebba96909b..ee9f43bed49a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -135,7 +135,6 @@ static void __unhash_process(struct task_struct *p, bool group_dead) list_del_init(&p->sibling); __this_cpu_dec(process_counts); } - list_del_rcu(&p->thread_group); list_del_rcu(&p->thread_node); } diff --git a/kernel/fork.c b/kernel/fork.c index 373fa2f739bc..10917c3e1f03 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2577,7 +2577,6 @@ __latent_entropy struct task_struct *copy_process( p->dirty_paused_when = 0; p->pdeath_signal = 0; - INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; clear_posix_cputimers_work(p); @@ -2705,8 +2704,6 @@ __latent_entropy struct task_struct *copy_process( atomic_inc(¤t->signal->live); refcount_inc(¤t->signal->sigcnt); task_join_group_stop(p); - list_add_tail_rcu(&p->thread_group, - &p->group_leader->thread_group); list_add_tail_rcu(&p->thread_node, &p->signal->thread_head); } diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 5c3086cad8f9..01520689b57c 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -99,7 +99,7 @@ struct gcov_iterator { struct gcov_info *info; size_t size; loff_t pos; - char buffer[]; + char buffer[] __counted_by(size); }; /** diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d309ba84e08a..1782f90cd8c6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1852,15 +1852,13 @@ out_thread: struct task_struct *t = new->thread; new->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } if (new->secondary && new->secondary->thread) { struct task_struct *t = new->secondary->thread; new->secondary->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } out_mput: module_put(desc->owner); @@ -1971,12 +1969,9 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * the same bit to a newly requested action. */ if (action->thread) { - kthread_stop(action->thread); - put_task_struct(action->thread); - if (action->secondary && action->secondary->thread) { - kthread_stop(action->secondary->thread); - put_task_struct(action->secondary->thread); - } + kthread_stop_put(action->thread); + if (action->secondary && action->secondary->thread) + kthread_stop_put(action->secondary->thread); } /* Last action releases resources */ diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9dc728982d79..be5642a4ec49 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -52,23 +52,6 @@ atomic_t __kexec_lock = ATOMIC_INIT(0); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; - -/* Location of the reserved area for the crash kernel */ -struct resource crashk_res = { - .name = "Crash kernel", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, - .desc = IORES_DESC_CRASH_KERNEL -}; -struct resource crashk_low_res = { - .name = "Crash kernel", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, - .desc = IORES_DESC_CRASH_KERNEL -}; - int kexec_should_crash(struct task_struct *p) { /* diff --git a/kernel/kthread.c b/kernel/kthread.c index c46128ec0c0a..c5e40830c1f2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -715,6 +715,24 @@ int kthread_stop(struct task_struct *k) } EXPORT_SYMBOL(kthread_stop); +/** + * kthread_stop_put - stop a thread and put its task struct + * @k: thread created by kthread_create(). + * + * Stops a thread created by kthread_create() and put its task_struct. + * Only use when holding an extra task struct reference obtained by + * calling get_task_struct(). + */ +int kthread_stop_put(struct task_struct *k) +{ + int ret; + + ret = kthread_stop(k); + put_task_struct(k); + return ret; +} +EXPORT_SYMBOL(kthread_stop_put); + int kthreadd(void *unused) { struct task_struct *tsk = current; diff --git a/kernel/panic.c b/kernel/panic.c index ffa037fa777d..2807639aab51 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -192,14 +192,15 @@ atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); */ void nmi_panic(struct pt_regs *regs, const char *msg) { - int old_cpu, cpu; + int old_cpu, this_cpu; - cpu = raw_smp_processor_id(); - old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); + old_cpu = PANIC_CPU_INVALID; + this_cpu = raw_smp_processor_id(); - if (old_cpu == PANIC_CPU_INVALID) + /* atomic_try_cmpxchg updates old_cpu on failure */ + if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) panic("%s", msg); - else if (old_cpu != cpu) + else if (old_cpu != this_cpu) nmi_panic_self_stop(regs); } EXPORT_SYMBOL(nmi_panic); @@ -311,15 +312,18 @@ void panic(const char *fmt, ...) * stop themself or will wait until they are stopped by the 1st CPU * with smp_send_stop(). * - * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which - * comes here, so go ahead. + * cmpxchg success means this is the 1st CPU which comes here, + * so go ahead. * `old_cpu == this_cpu' means we came from nmi_panic() which sets * panic_cpu to this CPU. In this case, this is also the 1st CPU. */ + old_cpu = PANIC_CPU_INVALID; this_cpu = raw_smp_processor_id(); - old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); - if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) + /* atomic_try_cmpxchg updates old_cpu on failure */ + if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) { + /* go ahead */ + } else if (old_cpu != this_cpu) panic_smp_self_stop(); console_verbose(); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 619972c78774..3028b2218aa4 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -286,12 +286,6 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, if (write && !checkpoint_restore_ns_capable(pid_ns->user_ns)) return -EPERM; - /* - * Writing directly to ns' last_pid field is OK, since this field - * is volatile in a living namespace anyway and a code writing to - * it should synchronize its usage with external means. - */ - next = idr_get_cursor(&pid_ns->idr) - 1; tmp.data = &next; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3d7e2d702699..a708d225c28e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9869,7 +9869,7 @@ struct task_group root_task_group; LIST_HEAD(task_groups); /* Cacheline aligned slab cache for task_group */ -static struct kmem_cache *task_group_cache __read_mostly; +static struct kmem_cache *task_group_cache __ro_after_init; #endif void __init sched_init(void) diff --git a/kernel/signal.c b/kernel/signal.c index 83fcbaf0e82d..47a7602dfe8d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -415,7 +415,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, int override_rlimit, const unsigned int sigqueue_flags) { struct sigqueue *q = NULL; - struct ucounts *ucounts = NULL; + struct ucounts *ucounts; long sigpending; /* @@ -1058,12 +1058,11 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type) signal->flags = SIGNAL_GROUP_EXIT; signal->group_exit_code = sig; signal->group_stop_count = 0; - t = p; - do { + __for_each_thread(signal, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); - } while_each_thread(p, t); + } return; } } @@ -1471,16 +1470,21 @@ int group_send_sig_info(int sig, struct kernel_siginfo *info, int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp) { struct task_struct *p = NULL; - int retval, success; + int ret = -ESRCH; - success = 0; - retval = -ESRCH; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID); - success |= !err; - retval = err; + /* + * If group_send_sig_info() succeeds at least once ret + * becomes 0 and after that the code below has no effect. + * Otherwise we return the last err or -ESRCH if this + * process group is empty. + */ + if (ret) + ret = err; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); - return success ? 0 : retval; + + return ret; } int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index f47d8f375946..1992b62e980b 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -272,8 +272,7 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); if (tsk) { - kthread_stop(tsk); - put_task_struct(tsk); + kthread_stop_put(tsk); *per_cpu_ptr(ht->store, cpu) = NULL; } } diff --git a/kernel/sys.c b/kernel/sys.c index 4a8073c1b255..420d9cb9cc8e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1786,6 +1786,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unsigned long flags; u64 tgutime, tgstime, utime, stime; unsigned long maxrss = 0; + struct signal_struct *sig = p->signal; memset((char *)r, 0, sizeof (*r)); utime = stime = 0; @@ -1793,7 +1794,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); - maxrss = p->signal->maxrss; + maxrss = sig->maxrss; goto out; } @@ -1803,15 +1804,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) switch (who) { case RUSAGE_BOTH: case RUSAGE_CHILDREN: - utime = p->signal->cutime; - stime = p->signal->cstime; - r->ru_nvcsw = p->signal->cnvcsw; - r->ru_nivcsw = p->signal->cnivcsw; - r->ru_minflt = p->signal->cmin_flt; - r->ru_majflt = p->signal->cmaj_flt; - r->ru_inblock = p->signal->cinblock; - r->ru_oublock = p->signal->coublock; - maxrss = p->signal->cmaxrss; + utime = sig->cutime; + stime = sig->cstime; + r->ru_nvcsw = sig->cnvcsw; + r->ru_nivcsw = sig->cnivcsw; + r->ru_minflt = sig->cmin_flt; + r->ru_majflt = sig->cmaj_flt; + r->ru_inblock = sig->cinblock; + r->ru_oublock = sig->coublock; + maxrss = sig->cmaxrss; if (who == RUSAGE_CHILDREN) break; @@ -1821,18 +1822,16 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) thread_group_cputime_adjusted(p, &tgutime, &tgstime); utime += tgutime; stime += tgstime; - r->ru_nvcsw += p->signal->nvcsw; - r->ru_nivcsw += p->signal->nivcsw; - r->ru_minflt += p->signal->min_flt; - r->ru_majflt += p->signal->maj_flt; - r->ru_inblock += p->signal->inblock; - r->ru_oublock += p->signal->oublock; - if (maxrss < p->signal->maxrss) - maxrss = p->signal->maxrss; - t = p; - do { + r->ru_nvcsw += sig->nvcsw; + r->ru_nivcsw += sig->nivcsw; + r->ru_minflt += sig->min_flt; + r->ru_majflt += sig->maj_flt; + r->ru_inblock += sig->inblock; + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; + __for_each_thread(sig, t) accumulate_thread_rusage(t, r); - } while_each_thread(p, t); break; default: diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 8ce3fa0c19e2..4354ea231fab 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -233,9 +233,8 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) else memset(stats, 0, sizeof(*stats)); - tsk = first; start_time = ktime_get_ns(); - do { + for_each_thread(first, tsk) { if (tsk->exit_state) continue; /* @@ -258,7 +257,7 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) stats->nvcsw += tsk->nvcsw; stats->nivcsw += tsk->nivcsw; - } while_each_thread(first, tsk); + } unlock_task_sighand(first, &flags); rc = 0; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index d52a894ecf57..eabe8bcc7042 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -22,7 +22,7 @@ #include <linux/bsearch.h> #include <linux/sort.h> -static struct kmem_cache *user_ns_cachep __read_mostly; +static struct kmem_cache *user_ns_cachep __ro_after_init; static DEFINE_MUTEX(userns_state_mutex); static bool new_idmap_permitted(const struct file *file, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0f682da96e1c..6e578f576a6f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -418,21 +418,21 @@ static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS]; * process context while holding a pool lock. Bounce to a dedicated kthread * worker to avoid A-A deadlocks. */ -static struct kthread_worker *pwq_release_worker; +static struct kthread_worker *pwq_release_worker __ro_after_init; -struct workqueue_struct *system_wq __read_mostly; +struct workqueue_struct *system_wq __ro_after_init; EXPORT_SYMBOL(system_wq); -struct workqueue_struct *system_highpri_wq __read_mostly; +struct workqueue_struct *system_highpri_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_highpri_wq); -struct workqueue_struct *system_long_wq __read_mostly; +struct workqueue_struct *system_long_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_long_wq); -struct workqueue_struct *system_unbound_wq __read_mostly; +struct workqueue_struct *system_unbound_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_unbound_wq); -struct workqueue_struct *system_freezable_wq __read_mostly; +struct workqueue_struct *system_freezable_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_freezable_wq); -struct workqueue_struct *system_power_efficient_wq __read_mostly; +struct workqueue_struct *system_power_efficient_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_power_efficient_wq); -struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; +struct workqueue_struct *system_freezable_power_efficient_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); static int worker_thread(void *__worker); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a517256a270b..2a8e9d63fbe3 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -89,7 +89,7 @@ static int debug_objects_pool_size __read_mostly static int debug_objects_pool_min_level __read_mostly = ODEBUG_POOL_MIN_LEVEL; static const struct debug_obj_descr *descr_test __read_mostly; -static struct kmem_cache *obj_cache __read_mostly; +static struct kmem_cache *obj_cache __ro_after_init; /* * Track numbers of kmem_cache_alloc()/free() calls done. diff --git a/mm/damon/core.c b/mm/damon/core.c index aa2dc7087cd9..630077d95dc6 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -735,8 +735,7 @@ static int __damon_stop(struct damon_ctx *ctx) if (tsk) { get_task_struct(tsk); mutex_unlock(&ctx->kdamond_lock); - kthread_stop(tsk); - put_task_struct(tsk); + kthread_stop_put(tsk); return 0; } mutex_unlock(&ctx->kdamond_lock); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index bc2d8ff269c7..064654717843 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -91,7 +91,7 @@ static unsigned int khugepaged_max_ptes_shared __read_mostly; #define MM_SLOTS_HASH_BITS 10 static DEFINE_READ_MOSTLY_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); -static struct kmem_cache *mm_slot_cache __read_mostly; +static struct kmem_cache *mm_slot_cache __ro_after_init; struct collapse_control { bool is_khugepaged; diff --git a/mm/shmem.c b/mm/shmem.c index 71b8d957b63b..91e2620148b2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -42,7 +42,7 @@ #include <linux/iversion.h> #include "swap.h" -static struct vfsmount *shm_mnt; +static struct vfsmount *shm_mnt __ro_after_init; #ifdef CONFIG_SHMEM /* @@ -4400,7 +4400,7 @@ static const struct fs_context_operations shmem_fs_context_ops = { #endif }; -static struct kmem_cache *shmem_inode_cachep; +static struct kmem_cache *shmem_inode_cachep __ro_after_init; static struct inode *shmem_alloc_inode(struct super_block *sb) { @@ -4432,14 +4432,14 @@ static void shmem_init_inode(void *foo) inode_init_once(&info->vfs_inode); } -static void shmem_init_inodecache(void) +static void __init shmem_init_inodecache(void) { shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", sizeof(struct shmem_inode_info), 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); } -static void shmem_destroy_inodecache(void) +static void __init shmem_destroy_inodecache(void) { kmem_cache_destroy(shmem_inode_cachep); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8afcfadf8d5a..57cea67b7562 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -4032,8 +4032,7 @@ static void __net_exit pg_net_exit(struct net *net) list_for_each_safe(q, n, &list) { t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); - kthread_stop(t->tsk); - put_task_struct(t->tsk); + kthread_stop_put(t->tsk); kfree(t); } diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index e3517d4ab8ec..e810e0c27ff1 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -66,10 +66,11 @@ LX_GDBPARSED(IRQD_LEVEL) LX_GDBPARSED(IRQ_HIDDEN) /* linux/module.h */ -LX_GDBPARSED(MOD_TEXT) -LX_GDBPARSED(MOD_DATA) -LX_GDBPARSED(MOD_RODATA) -LX_GDBPARSED(MOD_RO_AFTER_INIT) +if IS_BUILTIN(CONFIG_MODULES): + LX_GDBPARSED(MOD_TEXT) + LX_GDBPARSED(MOD_DATA) + LX_GDBPARSED(MOD_RODATA) + LX_GDBPARSED(MOD_RO_AFTER_INIT) /* linux/mount.h */ LX_VALUE(MNT_NOSUID) @@ -157,3 +158,4 @@ LX_CONFIG(CONFIG_STACKDEPOT) LX_CONFIG(CONFIG_PAGE_OWNER) LX_CONFIG(CONFIG_SLUB_DEBUG) LX_CONFIG(CONFIG_SLAB_FREELIST_HARDENED) +LX_CONFIG(CONFIG_MMU) diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index 255dc18cb9da..cba589e5b57d 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -179,6 +179,21 @@ def get_current_task(cpu): else: raise gdb.GdbError("Sorry, obtaining the current task is not allowed " "while running in userspace(EL0)") + elif utils.is_target_arch("riscv"): + current_tp = gdb.parse_and_eval("$tp") + scratch_reg = gdb.parse_and_eval("$sscratch") + + # by default tp points to current task + current_task = current_tp.cast(task_ptr_type) + + # scratch register is set 0 in trap handler after entering kernel. + # When hart is in user mode, scratch register is pointing to task_struct. + # and tp is used by user mode. So when scratch register holds larger value + # (negative address as ulong is larger value) than tp, then use scratch register. + if (scratch_reg.cast(utils.get_ulong_type()) > current_tp.cast(utils.get_ulong_type())): + current_task = scratch_reg.cast(task_ptr_type) + + return current_task.dereference() else: raise gdb.GdbError("Sorry, obtaining the current task is not yet " "supported with this arch") diff --git a/scripts/gdb/linux/vmalloc.py b/scripts/gdb/linux/vmalloc.py index 48e4a4fae7bb..d3c8a0274d1e 100644 --- a/scripts/gdb/linux/vmalloc.py +++ b/scripts/gdb/linux/vmalloc.py @@ -10,8 +10,9 @@ import gdb import re from linux import lists, utils, stackdepot, constants, mm -vmap_area_type = utils.CachedType('struct vmap_area') -vmap_area_ptr_type = vmap_area_type.get_type().pointer() +if constants.LX_CONFIG_MMU: + vmap_area_type = utils.CachedType('struct vmap_area') + vmap_area_ptr_type = vmap_area_type.get_type().pointer() def is_vmalloc_addr(x): pg_ops = mm.page_ops().ops @@ -25,6 +26,9 @@ class LxVmallocInfo(gdb.Command): super(LxVmallocInfo, self).__init__("lx-vmallocinfo", gdb.COMMAND_DATA) def invoke(self, arg, from_tty): + if not constants.LX_CONFIG_MMU: + raise gdb.GdbError("Requires MMU support") + vmap_area_list = gdb.parse_and_eval('vmap_area_list') for vmap_area in lists.list_for_each_entry(vmap_area_list, vmap_area_ptr_type, "list"): if not vmap_area['vm']: diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index ab123b498fd9..16d8ac6005b6 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -57,6 +57,7 @@ my $subsystem = 0; my $status = 0; my $letters = ""; my $keywords = 1; +my $keywords_in_file = 0; my $sections = 0; my $email_file_emails = 0; my $from_filename = 0; @@ -272,6 +273,7 @@ if (!GetOptions( 'letters=s' => \$letters, 'pattern-depth=i' => \$pattern_depth, 'k|keywords!' => \$keywords, + 'kf|keywords-in-file!' => \$keywords_in_file, 'sections!' => \$sections, 'fe|file-emails!' => \$email_file_emails, 'f|file' => \$from_filename, @@ -318,6 +320,7 @@ if ($sections || $letters ne "") { $subsystem = 0; $web = 0; $keywords = 0; + $keywords_in_file = 0; $interactive = 0; } else { my $selections = $email + $scm + $status + $subsystem + $web; @@ -548,16 +551,14 @@ foreach my $file (@ARGV) { $file =~ s/^\Q${cur_path}\E//; #strip any absolute path $file =~ s/^\Q${lk_path}\E//; #or the path to the lk tree push(@files, $file); - if ($file ne "MAINTAINERS" && -f $file && $keywords) { + if ($file ne "MAINTAINERS" && -f $file && $keywords && $keywords_in_file) { open(my $f, '<', $file) or die "$P: Can't open $file: $!\n"; my $text = do { local($/) ; <$f> }; close($f); - if ($keywords) { - foreach my $line (keys %keyword_hash) { - if ($text =~ m/$keyword_hash{$line}/x) { - push(@keyword_tvi, $line); - } + foreach my $line (keys %keyword_hash) { + if ($text =~ m/$keyword_hash{$line}/x) { + push(@keyword_tvi, $line); } } } @@ -919,7 +920,7 @@ sub get_maintainers { } foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { - add_categories($line); + add_categories($line, ""); if ($sections) { my $i; my $start = find_starting_index($line); @@ -947,7 +948,7 @@ sub get_maintainers { if ($keywords) { @keyword_tvi = sort_and_uniq(@keyword_tvi); foreach my $line (@keyword_tvi) { - add_categories($line); + add_categories($line, ":Keyword:$keyword_hash{$line}"); } } @@ -1076,6 +1077,7 @@ Output type options: Other options: --pattern-depth => Number of pattern directory traversals (default: 0 (all)) --keywords => scan patch for keywords (default: $keywords) + --keywords-in-file => scan file for keywords (default: $keywords_in_file) --sections => print all of the subsystem sections with pattern matches --letters => print all matching 'letter' types from all matching sections --mailmap => use .mailmap file (default: $email_use_mailmap) @@ -1086,7 +1088,7 @@ Other options: Default options: [--email --tree --nogit --git-fallback --m --r --n --l --multiline - --pattern-depth=0 --remove-duplicates --rolestats] + --pattern-depth=0 --remove-duplicates --rolestats --keywords] Notes: Using "-f directory" may give unexpected results: @@ -1312,7 +1314,7 @@ sub get_list_role { } sub add_categories { - my ($index) = @_; + my ($index, $suffix) = @_; my $i; my $start = find_starting_index($index); @@ -1342,7 +1344,7 @@ sub add_categories { if (!$hash_list_to{lc($list_address)}) { $hash_list_to{lc($list_address)} = 1; push(@list_to, [$list_address, - "subscriber list${list_role}"]); + "subscriber list${list_role}" . $suffix]); } } } else { @@ -1352,12 +1354,12 @@ sub add_categories { if ($email_moderated_list) { $hash_list_to{lc($list_address)} = 1; push(@list_to, [$list_address, - "moderated list${list_role}"]); + "moderated list${list_role}" . $suffix]); } } else { $hash_list_to{lc($list_address)} = 1; push(@list_to, [$list_address, - "open list${list_role}"]); + "open list${list_role}" . $suffix]); } } } @@ -1365,19 +1367,19 @@ sub add_categories { } elsif ($ptype eq "M") { if ($email_maintainer) { my $role = get_maintainer_role($i); - push_email_addresses($pvalue, $role); + push_email_addresses($pvalue, $role . $suffix); } } elsif ($ptype eq "R") { if ($email_reviewer) { my $subsystem = get_subsystem_name($i); - push_email_addresses($pvalue, "reviewer:$subsystem"); + push_email_addresses($pvalue, "reviewer:$subsystem" . $suffix); } } elsif ($ptype eq "T") { - push(@scm, $pvalue); + push(@scm, $pvalue . $suffix); } elsif ($ptype eq "W") { - push(@web, $pvalue); + push(@web, $pvalue . $suffix); } elsif ($ptype eq "S") { - push(@status, $pvalue); + push(@status, $pvalue . $suffix); } } } diff --git a/scripts/show_delta b/scripts/show_delta index 28e67e178194..291ad65e3089 100755 --- a/scripts/show_delta +++ b/scripts/show_delta @@ -125,4 +125,5 @@ def main(): for line in lines: print (convert_line(line, base_time),) -main() +if __name__ == "__main__": + main() diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 27ea19fb1f54..d4419a2a1e24 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -23,7 +23,7 @@ static struct rb_root integrity_iint_tree = RB_ROOT; static DEFINE_RWLOCK(integrity_iint_lock); -static struct kmem_cache *iint_cache __read_mostly; +static struct kmem_cache *iint_cache __ro_after_init; struct dentry *integrity_dir; diff --git a/sound/pci/asihpi/hpidebug.h b/sound/pci/asihpi/hpidebug.h index c24ed69eb743..c6dfc229213d 100644 --- a/sound/pci/asihpi/hpidebug.h +++ b/sound/pci/asihpi/hpidebug.h @@ -29,16 +29,15 @@ enum { HPI_DEBUG_LEVEL_ERROR = 0, /* always log errors */ the start of each message, eg see linux kernel hpios.h */ #ifdef SOURCEFILE_NAME +#undef FILE_LINE #define FILE_LINE SOURCEFILE_NAME ":" __stringify(__LINE__) " " -#else -#define FILE_LINE __FILE__ ":" __stringify(__LINE__) " " #endif #define HPI_DEBUG_ASSERT(expression) \ do { \ if (!(expression)) { \ printk(KERN_ERR FILE_LINE \ - "ASSERT " __stringify(expression)); \ + " ASSERT " __stringify(expression)); \ } \ } while (0) @@ -46,7 +45,7 @@ enum { HPI_DEBUG_LEVEL_ERROR = 0, /* always log errors */ do { \ if (hpi_debug_level >= HPI_DEBUG_LEVEL_##level) { \ printk(HPI_DEBUG_FLAG_##level \ - FILE_LINE __VA_ARGS__); \ + FILE_LINE " " __VA_ARGS__); \ } \ } while (0) @@ -70,7 +69,7 @@ void hpi_debug_data(u16 *pdata, u32 len); do { \ if (hpi_debug_level >= HPI_DEBUG_LEVEL_##level) { \ hpi_debug_message(phm, HPI_DEBUG_FLAG_##level \ - FILE_LINE __stringify(level)); \ + FILE_LINE " " __stringify(level)); \ } \ } while (0) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 1684216e826a..7b65566f3e42 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -63,6 +63,14 @@ # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #ifdef __ANDROID__ /* * FIXME: Big hammer to get rid of tons of: diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h index 435ddd72d2c4..81b8aae5a855 100644 --- a/tools/include/linux/const.h +++ b/tools/include/linux/const.h @@ -3,12 +3,4 @@ #include <vdso/const.h> -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - #endif /* _LINUX_CONST_H */ diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index bf4c4cd46600..cc16f6ca8533 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -309,6 +309,7 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope CATEGORY="memfd_secret" run_test ./memfd_secret # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index ee71ce52cb6a..56198d4ca2bf 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -23,6 +23,9 @@ * /proc/${pid}/smaps * /proc/${pid}/smaps_rollup */ +#undef _GNU_SOURCE +#define _GNU_SOURCE + #undef NDEBUG #include <assert.h> #include <errno.h> @@ -34,6 +37,7 @@ #include <sys/mman.h> #include <sys/ptrace.h> #include <sys/resource.h> +#include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> @@ -42,6 +46,43 @@ #define TEST_VSYSCALL #endif +#if defined __amd64__ + #ifndef SYS_pkey_alloc + #define SYS_pkey_alloc 330 + #endif + #ifndef SYS_pkey_free + #define SYS_pkey_free 331 + #endif +#elif defined __i386__ + #ifndef SYS_pkey_alloc + #define SYS_pkey_alloc 381 + #endif + #ifndef SYS_pkey_free + #define SYS_pkey_free 382 + #endif +#else + #error "SYS_pkey_alloc" +#endif + +static int g_protection_key_support; + +static int protection_key_support(void) +{ + long rv = syscall(SYS_pkey_alloc, 0, 0); + if (rv > 0) { + syscall(SYS_pkey_free, (int)rv); + return 1; + } else if (rv == -1 && errno == ENOSYS) { + return 0; + } else if (rv == -1 && errno == EINVAL) { + // ospke=n + return 0; + } else { + fprintf(stderr, "%s: error: rv %ld, errno %d\n", __func__, rv, errno); + exit(EXIT_FAILURE); + } +} + /* * 0: vsyscall VMA doesn't exist vsyscall=none * 1: vsyscall VMA is --xp vsyscall=xonly @@ -60,7 +101,7 @@ static const char proc_pid_maps_vsyscall_2[] = static const char proc_pid_smaps_vsyscall_0[] = ""; static const char proc_pid_smaps_vsyscall_1[] = -"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" "Size: 4 kB\n" "KernelPageSize: 4 kB\n" "MMUPageSize: 4 kB\n" @@ -73,6 +114,7 @@ static const char proc_pid_smaps_vsyscall_1[] = "Private_Dirty: 0 kB\n" "Referenced: 0 kB\n" "Anonymous: 0 kB\n" +"KSM: 0 kB\n" "LazyFree: 0 kB\n" "AnonHugePages: 0 kB\n" "ShmemPmdMapped: 0 kB\n" @@ -83,14 +125,10 @@ static const char proc_pid_smaps_vsyscall_1[] = "SwapPss: 0 kB\n" "Locked: 0 kB\n" "THPeligible: 0\n" -/* - * "ProtectionKey:" field is conditional. It is possible to check it as well, - * but I don't have such machine. - */ ; static const char proc_pid_smaps_vsyscall_2[] = -"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" +"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" "Size: 4 kB\n" "KernelPageSize: 4 kB\n" "MMUPageSize: 4 kB\n" @@ -103,6 +141,7 @@ static const char proc_pid_smaps_vsyscall_2[] = "Private_Dirty: 0 kB\n" "Referenced: 0 kB\n" "Anonymous: 0 kB\n" +"KSM: 0 kB\n" "LazyFree: 0 kB\n" "AnonHugePages: 0 kB\n" "ShmemPmdMapped: 0 kB\n" @@ -113,10 +152,6 @@ static const char proc_pid_smaps_vsyscall_2[] = "SwapPss: 0 kB\n" "Locked: 0 kB\n" "THPeligible: 0\n" -/* - * "ProtectionKey:" field is conditional. It is possible to check it as well, - * but I'm too tired. - */ ; static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) @@ -238,19 +273,27 @@ static int test_proc_pid_smaps(pid_t pid) } perror("open /proc/${pid}/smaps"); return EXIT_FAILURE; + } + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + + assert(0 <= rv); + assert(rv <= sizeof(buf)); + + if (g_vsyscall == 0) { + assert(rv == 0); } else { - ssize_t rv = read(fd, buf, sizeof(buf)); - close(fd); - if (g_vsyscall == 0) { - assert(rv == 0); - } else { - size_t len = strlen(g_proc_pid_maps_vsyscall); - /* TODO "ProtectionKey:" */ - assert(rv > len); - assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); + size_t len = strlen(g_proc_pid_smaps_vsyscall); + assert(rv > len); + assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0); + + if (g_protection_key_support) { +#define PROTECTION_KEY "ProtectionKey: 0\n" + assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY))); } - return EXIT_SUCCESS; } + + return EXIT_SUCCESS; } static const char g_smaps_rollup[] = @@ -303,6 +346,95 @@ static int test_proc_pid_smaps_rollup(pid_t pid) } } +static const char *parse_u64(const char *p, const char *const end, uint64_t *rv) +{ + *rv = 0; + for (; p != end; p += 1) { + if ('0' <= *p && *p <= '9') { + assert(!__builtin_mul_overflow(*rv, 10, rv)); + assert(!__builtin_add_overflow(*rv, *p - '0', rv)); + } else { + break; + } + } + assert(p != end); + return p; +} + +/* + * There seems to be 2 types of valid output: + * "0 A A B 0 0 0\n" for dynamic exeuctables, + * "0 0 0 B 0 0 0\n" for static executables. + */ +static int test_proc_pid_statm(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + perror("open /proc/${pid}/statm"); + return EXIT_FAILURE; + } + + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + + assert(rv >= 0); + assert(rv <= sizeof(buf)); + if (0) { + write(1, buf, rv); + } + + const char *p = buf; + const char *const end = p + rv; + + /* size */ + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + uint64_t resident; + p = parse_u64(p, end, &resident); + assert(p != end && *p++ == ' '); + + uint64_t shared; + p = parse_u64(p, end, &shared); + assert(p != end && *p++ == ' '); + + uint64_t text; + p = parse_u64(p, end, &text); + assert(p != end && *p++ == ' '); + + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + /* data */ + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == '\n'); + + assert(p == end); + + /* + * "text" is "mm->end_code - mm->start_code" at execve(2) time. + * munmap() doesn't change it. It can be anything (just link + * statically). It can't be 0 because executing to this point + * implies at least 1 page of code. + */ + assert(text > 0); + + /* + * These two are always equal. Always 0 for statically linked + * executables and sometimes 0 for dynamically linked executables. + * There is no way to tell one from another without parsing ELF + * which is too much for this test. + */ + assert(resident == shared); + + return EXIT_SUCCESS; +} + int main(void) { int rv = EXIT_SUCCESS; @@ -328,6 +460,8 @@ int main(void) abort(); } + g_protection_key_support = protection_key_support(); + pid_t pid = fork(); if (pid == -1) { perror("fork"); @@ -389,11 +523,9 @@ int main(void) if (rv == EXIT_SUCCESS) { rv = test_proc_pid_smaps_rollup(pid); } - /* - * TODO test /proc/${pid}/statm, task_statm() - * ->start_code, ->end_code aren't updated by munmap(). - * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything. - */ + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_statm(pid); + } /* Cut the rope. */ int wstatus; |