summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-08 11:11:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-08 11:11:38 -0700
commit20b0d06722169e6e66049c8fe6f1a48adffb79c6 (patch)
tree1b88278ca547c07f58297325aea1ab3c447e844d
parent63d72b93f2262900c8de74ad0f5a58e0d452c9d3 (diff)
parentdb33ec371be8e45956e8cebb5b0fe641f008430b (diff)
Merge branch 'akpm' (patches from Andrew)
Merge still more updates from Andrew Morton: "Various trees. Mainly those parts of MM whose linux-next dependents are now merged. I'm still sitting on ~160 patches which await merges from -next. Subsystems affected by this patch series: mm/proc, ipc, dynamic-debug, panic, lib, sysctl, mm/gup, mm/pagemap" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (52 commits) doc: cgroup: update note about conditions when oom killer is invoked module: move the set_fs hack for flush_icache_range to m68k nommu: use flush_icache_user_range in brk and mmap binfmt_flat: use flush_icache_user_range exec: use flush_icache_user_range in read_code exec: only build read_code when needed m68k: implement flush_icache_user_range arm: rename flush_cache_user_range to flush_icache_user_range xtensa: implement flush_icache_user_range sh: implement flush_icache_user_range asm-generic: add a flush_icache_user_range stub mm: rename flush_icache_user_range to flush_icache_user_page arm,sparc,unicore32: remove flush_icache_user_range riscv: use asm-generic/cacheflush.h powerpc: use asm-generic/cacheflush.h openrisc: use asm-generic/cacheflush.h m68knommu: use asm-generic/cacheflush.h microblaze: use asm-generic/cacheflush.h ia64: use asm-generic/cacheflush.h hexagon: use asm-generic/cacheflush.h ...
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst17
-rw-r--r--Documentation/admin-guide/dynamic-debug-howto.rst5
-rw-r--r--Documentation/admin-guide/kdump/kdump.rst8
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt34
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst37
-rw-r--r--Documentation/core-api/pin_user_pages.rst51
-rw-r--r--arch/alpha/include/asm/cacheflush.h32
-rw-r--r--arch/alpha/kernel/smp.c2
-rw-r--r--arch/arm/include/asm/cacheflush.h7
-rw-r--r--arch/arm/kernel/fiq.c4
-rw-r--r--arch/arm/kernel/traps.c2
-rw-r--r--arch/arm64/include/asm/cacheflush.h46
-rw-r--r--arch/c6x/include/asm/cacheflush.h19
-rw-r--r--arch/hexagon/include/asm/cacheflush.h19
-rw-r--r--arch/ia64/include/asm/cacheflush.h30
-rw-r--r--arch/m68k/include/asm/cacheflush_mm.h6
-rw-r--r--arch/m68k/include/asm/cacheflush_no.h19
-rw-r--r--arch/m68k/mm/cache.c13
-rw-r--r--arch/microblaze/include/asm/cacheflush.h29
-rw-r--r--arch/nds32/include/asm/cacheflush.h4
-rw-r--r--arch/nds32/mm/cacheflush.c3
-rw-r--r--arch/openrisc/include/asm/cacheflush.h31
-rw-r--r--arch/powerpc/include/asm/cacheflush.h42
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/powerpc/perf/callchain_64.c4
-rw-r--r--arch/riscv/include/asm/cacheflush.h65
-rw-r--r--arch/sh/include/asm/cacheflush.h1
-rw-r--r--arch/sparc/include/asm/cacheflush_32.h2
-rw-r--r--arch/sparc/include/asm/cacheflush_64.h1
-rw-r--r--arch/um/include/asm/tlb.h2
-rw-r--r--arch/unicore32/include/asm/cacheflush.h11
-rw-r--r--arch/x86/include/asm/cacheflush.h2
-rw-r--r--arch/xtensa/include/asm/cacheflush.h2
-rw-r--r--drivers/media/platform/omap3isp/ispvideo.c2
-rw-r--r--drivers/nvdimm/pmem.c3
-rw-r--r--drivers/vhost/vhost.c5
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/exec.c5
-rw-r--r--fs/proc/proc_sysctl.c145
-rw-r--r--include/asm-generic/cacheflush.h25
-rw-r--r--include/linux/dev_printk.h6
-rw-r--r--include/linux/dynamic_debug.h2
-rw-r--r--include/linux/ipc_namespace.h2
-rw-r--r--include/linux/kernel.h9
-rw-r--r--include/linux/mm.h12
-rw-r--r--include/linux/net.h3
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/linux/printk.h9
-rw-r--r--include/linux/sched/sysctl.h7
-rw-r--r--include/linux/sysctl.h4
-rw-r--r--include/linux/xarray.h4
-rw-r--r--include/rdma/ib_verbs.h6
-rw-r--r--init/main.c2
-rw-r--r--ipc/msg.c2
-rw-r--r--ipc/namespace.c24
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/hung_task.c30
-rw-r--r--kernel/module.c8
-rw-r--r--kernel/panic.c45
-rw-r--r--kernel/sysctl.c38
-rw-r--r--kernel/watchdog.c37
-rw-r--r--lib/Kconfig.debug12
-rw-r--r--lib/Makefile2
-rw-r--r--lib/dynamic_debug.c9
-rw-r--r--lib/test_sysctl.c13
-rw-r--r--mm/frame_vector.c7
-rw-r--r--mm/gup.c73
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page_alloc.c9
-rw-r--r--mm/page_idle.c7
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh44
-rw-r--r--virt/kvm/kvm_main.c8
75 files changed, 701 insertions, 493 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index c2a4b652bd1a..ce3e05e41724 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1170,6 +1170,13 @@ PAGE_SIZE multiple when read back.
Under certain circumstances, the usage may go over the limit
temporarily.
+ In default configuration regular 0-order allocations always
+ succeed unless OOM killer chooses current task as a victim.
+
+ Some kinds of allocations don't invoke the OOM killer.
+ Caller could retry them differently, return into userspace
+ as -ENOMEM or silently ignore in cases like disk readahead.
+
This is the ultimate protection mechanism. As long as the
high limit is used and monitored properly, this limit's
utility is limited to providing the final safety net.
@@ -1226,17 +1233,9 @@ PAGE_SIZE multiple when read back.
The number of time the cgroup's memory usage was
reached the limit and allocation was about to fail.
- Depending on context result could be invocation of OOM
- killer and retrying allocation or failing allocation.
-
- Failed allocation in its turn could be returned into
- userspace as -ENOMEM or silently ignored in cases like
- disk readahead. For now OOM in memory cgroup kills
- tasks iff shortage has happened inside page fault.
-
This event is not raised if the OOM killer is not
considered as an option, e.g. for failed high-order
- allocations.
+ allocations or if caller asked to not retry attempts.
oom_kill
The number of processes belonging to this cgroup
diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst
index 0dc2eb8e44e5..1012bd9305e9 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -13,6 +13,11 @@ kernel code to obtain additional kernel information. Currently, if
``print_hex_dump_debug()``/``print_hex_dump_bytes()`` calls can be dynamically
enabled per-callsite.
+If you do not want to enable dynamic debug globally (i.e. in some embedded
+system), you may set ``CONFIG_DYNAMIC_DEBUG_CORE`` as basic support of dynamic
+debug and add ``ccflags := -DDYNAMIC_DEBUG_MODULE`` into the Makefile of any
+modules which you'd like to dynamically debug later.
+
If ``CONFIG_DYNAMIC_DEBUG`` is not set, ``print_hex_dump_debug()`` is just
shortcut for ``print_hex_dump(KERN_DEBUG)``.
diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index ac7e131d2935..2da65fef2a1c 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -521,6 +521,14 @@ will cause a kdump to occur at the panic() call. In cases where a user wants
to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
to achieve the same behaviour.
+Trigger Kdump on add_taint()
+============================
+
+The kernel parameter panic_on_taint facilitates a conditional call to panic()
+from within add_taint() whenever the value set in this bitmask matches with the
+bit flag being set by add_taint().
+This will cause a kdump to occur at the add_taint()->panic() call.
+
Contact
=======
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f3eeecbb3f63..ed9597e37415 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1445,7 +1445,7 @@
hardlockup_all_cpu_backtrace=
[KNL] Should the hard-lockup detector generate
backtraces on all cpus.
- Format: <integer>
+ Format: 0 | 1
hashdist= [KNL,NUMA] Large hashes allocated during boot
are distributed across NUMA nodes. Defaults on
@@ -1513,9 +1513,9 @@
hung_task_panic=
[KNL] Should the hung task detector generate panics.
- Format: <integer>
+ Format: 0 | 1
- A nonzero value instructs the kernel to panic when a
+ A value of 1 instructs the kernel to panic when a
hung task is detected. The default value is controlled
by the CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time
option. The value selected by this boot parameter can
@@ -3447,6 +3447,19 @@
bit 4: print ftrace buffer
bit 5: print all printk messages in buffer
+ panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
+ Format: <hex>[,nousertaint]
+ Hexadecimal bitmask representing the set of TAINT flags
+ that will cause the kernel to panic when add_taint() is
+ called with any of the flags in this set.
+ The optional switch "nousertaint" can be utilized to
+ prevent userspace forced crashes by writing to sysctl
+ /proc/sys/kernel/tainted any flagset matching with the
+ bitmask set on panic_on_taint.
+ See Documentation/admin-guide/tainted-kernels.rst for
+ extra details on the taint flags that users can pick
+ to compose the bitmask to assign to panic_on_taint.
+
panic_on_warn panic() instead of WARN(). Useful to cause kdump
on a WARN().
@@ -4652,9 +4665,9 @@
softlockup_panic=
[KNL] Should the soft-lockup detector generate panics.
- Format: <integer>
+ Format: 0 | 1
- A nonzero value instructs the soft-lockup detector
+ A value of 1 instructs the soft-lockup detector
to panic the machine when a soft-lockup occurs. It is
also controlled by the kernel.softlockup_panic sysctl
and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
@@ -4663,7 +4676,7 @@
softlockup_all_cpu_backtrace=
[KNL] Should the soft-lockup detector generate
backtraces on all cpus.
- Format: <integer>
+ Format: 0 | 1
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst
@@ -4956,6 +4969,15 @@
switches= [HW,M68k]
+ sysctl.*= [KNL]
+ Set a sysctl parameter, right before loading the init
+ process, as if the value was written to the respective
+ /proc/sys/... file. Both '.' and '/' are recognized as
+ separators. Unrecognized parameters and invalid values
+ are reported in the kernel log. Sysctls registered
+ later by a loaded module cannot be set this way.
+ Example: sysctl.vm.swappiness=40
+
sysfs.deprecated=0|1 [KNL]
Enable/disable old style sysfs layout for old udev
on older distributions. When this option is enabled
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1ebf68d01141..83acf5025488 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -335,6 +335,20 @@ Path for the hotplug policy agent.
Default value is "``/sbin/hotplug``".
+hung_task_all_cpu_backtrace:
+================
+
+If this option is set, the kernel will send an NMI to all CPUs to dump
+their backtraces when a hung task is detected. This file shows up if
+CONFIG_DETECT_HUNG_TASK and CONFIG_SMP are enabled.
+
+0: Won't show all CPUs backtraces when a hung task is detected.
+This is the default behavior.
+
+1: Will non-maskably interrupt all CPUs and dump their backtraces when
+a hung task is detected.
+
+
hung_task_panic
===============
@@ -632,6 +646,22 @@ rate for each task.
scanned for a given scan.
+oops_all_cpu_backtrace:
+================
+
+If this option is set, the kernel will send an NMI to all CPUs to dump
+their backtraces when an oops event occurs. It should be used as a last
+resort in case a panic cannot be triggered (to protect VMs running, for
+example) or kdump can't be collected. This file shows up if CONFIG_SMP
+is enabled.
+
+0: Won't show all CPUs backtraces when an oops is detected.
+This is the default behavior.
+
+1: Will non-maskably interrupt all CPUs and dump their backtraces when
+an oops event is detected.
+
+
osrelease, ostype & version
===========================
@@ -1239,6 +1269,13 @@ ORed together. The letters are seen in "Tainted" line of Oops reports.
See :doc:`/admin-guide/tainted-kernels` for more information.
+Note:
+ writes to this sysctl interface will fail with ``EINVAL`` if the kernel is
+ booted with the command line option ``panic_on_taint=<bitmask>,nousertaint``
+ and any of the ORed together values being written to ``tainted`` match with
+ the bitmask declared on panic_on_taint.
+ See :doc:`/admin-guide/kernel-parameters` for more details on that particular
+ kernel command line option and its optional ``nousertaint`` switch.
threads-max
===========
diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst
index 2e939ff10b86..6068266dd303 100644
--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -148,23 +148,46 @@ NOTE: Some pages, such as DAX pages, cannot be pinned with longterm pins. That's
because DAX pages do not have a separate page cache, and so "pinning" implies
locking down file system blocks, which is not (yet) supported in that way.
-CASE 3: Hardware with page faulting support
--------------------------------------------
-Here, a well-written driver doesn't normally need to pin pages at all. However,
-if the driver does choose to do so, it can register MMU notifiers for the range,
-and will be called back upon invalidation. Either way (avoiding page pinning, or
-using MMU notifiers to unpin upon request), there is proper synchronization with
-both filesystem and mm (page_mkclean(), munmap(), etc).
-
-Therefore, neither flag needs to be set.
-
-In this case, ideally, neither get_user_pages() nor pin_user_pages() should be
-called. Instead, the software should be written so that it does not pin pages.
-This allows mm and filesystems to operate more efficiently and reliably.
+CASE 3: MMU notifier registration, with or without page faulting hardware
+-------------------------------------------------------------------------
+Device drivers can pin pages via get_user_pages*(), and register for mmu
+notifier callbacks for the memory range. Then, upon receiving a notifier
+"invalidate range" callback , stop the device from using the range, and unpin
+the pages. There may be other possible schemes, such as for example explicitly
+synchronizing against pending IO, that accomplish approximately the same thing.
+
+Or, if the hardware supports replayable page faults, then the device driver can
+avoid pinning entirely (this is ideal), as follows: register for mmu notifier
+callbacks as above, but instead of stopping the device and unpinning in the
+callback, simply remove the range from the device's page tables.
+
+Either way, as long as the driver unpins the pages upon mmu notifier callback,
+then there is proper synchronization with both filesystem and mm
+(page_mkclean(), munmap(), etc). Therefore, neither flag needs to be set.
CASE 4: Pinning for struct page manipulation only
-------------------------------------------------
-Here, normal GUP calls are sufficient, so neither flag needs to be set.
+If only struct page data (as opposed to the actual memory contents that a page
+is tracking) is affected, then normal GUP calls are sufficient, and neither flag
+needs to be set.
+
+CASE 5: Pinning in order to write to the data within the page
+-------------------------------------------------------------
+Even though neither DMA nor Direct IO is involved, just a simple case of "pin,
+write to a page's data, unpin" can cause a problem. Case 5 may be considered a
+superset of Case 1, plus Case 2, plus anything that invokes that pattern. In
+other words, if the code is neither Case 1 nor Case 2, it may still require
+FOLL_PIN, for patterns like this:
+
+Correct (uses FOLL_PIN calls):
+ pin_user_pages()
+ write to the data within the pages
+ unpin_user_pages()
+
+INCORRECT (uses FOLL_GET calls):
+ get_user_pages()
+ write to the data within the pages
+ put_page()
page_maybe_dma_pinned(): the whole point of pinning
===================================================
diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h
index 89128489cb59..9945ff483eaf 100644
--- a/arch/alpha/include/asm/cacheflush.h
+++ b/arch/alpha/include/asm/cacheflush.h
@@ -4,19 +4,6 @@
#include <linux/mm.h>
-/* Caches aren't brain-dead on the Alpha. */
-#define flush_cache_all() do { } while (0)
-#define flush_cache_mm(mm) do { } while (0)
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define flush_dcache_page(page) do { } while (0)
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
-
/* Note that the following two definitions are _highly_ dependent
on the contexts in which they are used in the kernel. I personally
think it is criminal how loosely defined these macros are. */
@@ -48,7 +35,7 @@ extern void smp_imb(void);
extern void __load_new_mm_context(struct mm_struct *);
static inline void
-flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len)
{
if (vma->vm_flags & VM_EXEC) {
@@ -59,20 +46,17 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
mm->context[smp_processor_id()] = 0;
}
}
-#else
-extern void flush_icache_user_range(struct vm_area_struct *vma,
+#define flush_icache_user_page flush_icache_user_page
+#else /* CONFIG_SMP */
+extern void flush_icache_user_page(struct vm_area_struct *vma,
struct page *page, unsigned long addr, int len);
-#endif
+#define flush_icache_user_page flush_icache_user_page
+#endif /* CONFIG_SMP */
/* This is used only in __do_fault and do_swap_page. */
#define flush_icache_page(vma, page) \
- flush_icache_user_range((vma), (page), 0, 0)
+ flush_icache_user_page((vma), (page), 0, 0)
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { memcpy(dst, src, len); \
- flush_icache_user_range(vma, page, vaddr, len); \
-} while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
+#include <asm-generic/cacheflush.h>
#endif /* _ALPHA_CACHEFLUSH_H */
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 5f90df30be20..52995bf413fe 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -740,7 +740,7 @@ ipi_flush_icache_page(void *x)
}
void
-flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len)
{
struct mm_struct *mm = vma->vm_mm;
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 7114b9aa46b8..2e24e765e6d3 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -258,11 +258,11 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
/*
- * flush_cache_user_range is used when we want to ensure that the
+ * flush_icache_user_range is used when we want to ensure that the
* Harvard caches are synchronised for the user space address range.
* This is used for the ARM private sys_cacheflush system call.
*/
-#define flush_cache_user_range(s,e) __cpuc_coherent_user_range(s,e)
+#define flush_icache_user_range(s,e) __cpuc_coherent_user_range(s,e)
/*
* Perform necessary cache operations to ensure that data previously
@@ -318,9 +318,6 @@ extern void flush_kernel_dcache_page(struct page *);
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
-#define flush_icache_user_range(vma,page,addr,len) \
- flush_dcache_page(page)
-
/*
* We don't appear to need to do anything here. In fact, if we did, we'd
* duplicate cache flushing elsewhere performed by flush_dcache_page().
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index cd1234c103fc..98ca3e3fa847 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -98,8 +98,8 @@ void set_fiq_handler(void *start, unsigned int length)
memcpy(base + offset, start, length);
if (!cache_is_vipt_nonaliasing())
- flush_icache_range((unsigned long)base + offset, offset +
- length);
+ flush_icache_range((unsigned long)base + offset,
+ (unsigned long)base + offset + length);
flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length);
}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1e70e7227f0f..316a7687f813 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -566,7 +566,7 @@ __do_cache_op(unsigned long start, unsigned long end)
if (fatal_signal_pending(current))
return 0;
- ret = flush_cache_user_range(start, start + chunk);
+ ret = flush_icache_user_range(start, start + chunk);
if (ret)
return ret;
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index ce50c1f1f1ea..9384fd8fc13c 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -94,20 +94,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
kick_all_cpus_sync();
}
-
-static inline void flush_cache_mm(struct mm_struct *mm)
-{
-}
-
-static inline void flush_cache_page(struct vm_area_struct *vma,
- unsigned long user_addr, unsigned long pfn)
-{
-}
-
-static inline void flush_cache_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
+#define flush_icache_range flush_icache_range
/*
* Cache maintenance functions used by the DMA API. No to be used directly.
@@ -123,12 +110,7 @@ extern void __dma_flush_area(const void *, size_t);
*/
extern void copy_to_user_page(struct vm_area_struct *, struct page *,
unsigned long, void *, const void *, unsigned long);
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- do { \
- memcpy(dst, src, len); \
- } while (0)
-
-#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+#define copy_to_user_page copy_to_user_page
/*
* flush_dcache_page is used when the kernel has written to the page
@@ -154,29 +136,11 @@ static __always_inline void __flush_icache_all(void)
dsb(ish);
}
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-
-/*
- * We don't appear to need to do anything here. In fact, if we did, we'd
- * duplicate cache flushing elsewhere performed by flush_dcache_page().
- */
-#define flush_icache_page(vma,page) do { } while (0)
-
-/*
- * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache).
- */
-static inline void flush_cache_vmap(unsigned long start, unsigned long end)
-{
-}
-
-static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
-{
-}
-
int set_memory_valid(unsigned long addr, int numpages, int enable);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
-#endif
+#include <asm-generic/cacheflush.h>
+
+#endif /* __ASM_CACHEFLUSH_H */
diff --git a/arch/c6x/include/asm/cacheflush.h b/arch/c6x/include/asm/cacheflush.h
index 4540b40475e6..10922d528de6 100644
--- a/arch/c6x/include/asm/cacheflush.h
+++ b/arch/c6x/include/asm/cacheflush.h
@@ -17,21 +17,6 @@
#include <asm/string.h>
/*
- * virtually-indexed cache management (our cache is physically indexed)
- */
-#define flush_cache_all() do {} while (0)
-#define flush_cache_mm(mm) do {} while (0)
-#define flush_cache_dup_mm(mm) do {} while (0)
-#define flush_cache_range(mm, start, end) do {} while (0)
-#define flush_cache_page(vma, vmaddr, pfn) do {} while (0)
-#define flush_cache_vmap(start, end) do {} while (0)
-#define flush_cache_vunmap(start, end) do {} while (0)
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define flush_dcache_page(page) do {} while (0)
-#define flush_dcache_mmap_lock(mapping) do {} while (0)
-#define flush_dcache_mmap_unlock(mapping) do {} while (0)
-
-/*
* physically-indexed cache management
*/
#define flush_icache_range(s, e) \
@@ -49,14 +34,12 @@ do { \
(unsigned long) page_address(page) + PAGE_SIZE)); \
} while (0)
-
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
flush_icache_range((unsigned) (dst), (unsigned) (dst) + (len)); \
} while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
+#include <asm-generic/cacheflush.h>
#endif /* _ASM_C6X_CACHEFLUSH_H */
diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h
index fb447de45d54..6eff0730e6ef 100644
--- a/arch/hexagon/include/asm/cacheflush.h
+++ b/arch/hexagon/include/asm/cacheflush.h
@@ -25,29 +25,17 @@
#define LINESIZE 32
#define LINEBITS 5
-#define flush_cache_all() do { } while (0)
-#define flush_cache_mm(mm) do { } while (0)
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define flush_dcache_page(page) do { } while (0)
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-#define flush_icache_page(vma, pg) do { } while (0)
-#define flush_icache_user_range(vma, pg, adr, len) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
-
/*
* Flush Dcache range through current map.
*/
extern void flush_dcache_range(unsigned long start, unsigned long end);
+#define flush_dcache_range flush_dcache_range
/*
* Flush Icache range through current map.
*/
extern void flush_icache_range(unsigned long start, unsigned long end);
+#define flush_icache_range flush_icache_range
/*
* Memory-management related flushes are there to ensure in non-physically
@@ -78,6 +66,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long vaddr, void *dst, void *src, int len);
+#define copy_to_user_page copy_to_user_page
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy(dst, src, len)
@@ -85,4 +74,6 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
extern void hexagon_inv_dcache_range(unsigned long start, unsigned long end);
extern void hexagon_clean_dcache_range(unsigned long start, unsigned long end);
+#include <asm-generic/cacheflush.h>
+
#endif
diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h
index 6d3478f8abc8..708c0fa5d975 100644
--- a/arch/ia64/include/asm/cacheflush.h
+++ b/arch/ia64/include/asm/cacheflush.h
@@ -12,44 +12,22 @@
#include <asm/page.h>
-/*
- * Cache flushing routines. This is the kind of stuff that can be very expensive, so try
- * to avoid them whenever possible.
- */
-
-#define flush_cache_all() do { } while (0)
-#define flush_cache_mm(mm) do { } while (0)
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
-#define flush_icache_page(vma,page) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
-
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
#define flush_dcache_page(page) \
do { \
clear_bit(PG_arch_1, &(page)->flags); \
} while (0)
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-
-extern void flush_icache_range (unsigned long start, unsigned long end);
+extern void flush_icache_range(unsigned long start, unsigned long end);
+#define flush_icache_range flush_icache_range
extern void clflush_cache_range(void *addr, int size);
-
-#define flush_icache_user_range(vma, page, user_addr, len) \
+#define flush_icache_user_page(vma, page, user_addr, len) \
do { \
unsigned long _addr = (unsigned long) page_address(page) + ((user_addr) & ~PAGE_MASK); \
flush_icache_range(_addr, _addr + (len)); \
} while (0)
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { memcpy(dst, src, len); \
- flush_icache_user_range(vma, page, vaddr, len); \
-} while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
+#include <asm-generic/cacheflush.h>
#endif /* _ASM_IA64_CACHEFLUSH_H */
diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index 1e2544ecaf88..1ac55e7b47f0 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -254,9 +254,11 @@ static inline void __flush_page_to_ram(void *vaddr)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page))
-extern void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+extern void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len);
extern void flush_icache_range(unsigned long address, unsigned long endaddr);
+extern void flush_icache_user_range(unsigned long address,
+ unsigned long endaddr);
static inline void copy_to_user_page(struct vm_area_struct *vma,
struct page *page, unsigned long vaddr,
@@ -264,7 +266,7 @@ static inline void copy_to_user_page(struct vm_area_struct *vma,
{
flush_cache_page(vma, vaddr, page_to_pfn(page));
memcpy(dst, src, len);
- flush_icache_user_range(vma, page, vaddr, len);
+ flush_icache_user_page(vma, page, vaddr, len);
}
static inline void copy_from_user_page(struct vm_area_struct *vma,
struct page *page, unsigned long vaddr,
diff --git a/arch/m68k/include/asm/cacheflush_no.h b/arch/m68k/include/asm/cacheflush_no.h
index 11e9a9dcbfb2..2731f07e7be8 100644
--- a/arch/m68k/include/asm/cacheflush_no.h
+++ b/arch/m68k/include/asm/cacheflush_no.h
@@ -9,25 +9,8 @@
#include <asm/mcfsim.h>
#define flush_cache_all() __flush_cache_all()
-#define flush_cache_mm(mm) do { } while (0)
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr) do { } while (0)
#define flush_dcache_range(start, len) __flush_dcache_all()
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define flush_dcache_page(page) do { } while (0)
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_icache_range(start, len) __flush_icache_all()
-#define flush_icache_page(vma,pg) do { } while (0)
-#define flush_icache_user_range(vma,pg,adr,len) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
-
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
void mcf_cache_push(void);
@@ -98,4 +81,6 @@ static inline void cache_clear(unsigned long paddr, int len)
__clear_cache_all();
}
+#include <asm-generic/cacheflush.h>
+
#endif /* _M68KNOMMU_CACHEFLUSH_H */
diff --git a/arch/m68k/mm/cache.c b/arch/m68k/mm/cache.c
index 079e64898e6a..5ecb3310e874 100644
--- a/arch/m68k/mm/cache.c
+++ b/arch/m68k/mm/cache.c
@@ -73,7 +73,7 @@ static unsigned long virt_to_phys_slow(unsigned long vaddr)
/* Push n pages at kernel virtual address and clear the icache */
/* RZ: use cpush %bc instead of cpush %dc, cinv %ic */
-void flush_icache_range(unsigned long address, unsigned long endaddr)
+void flush_icache_user_range(unsigned long address, unsigned long endaddr)
{
if (CPU_IS_COLDFIRE) {
unsigned long start, end;
@@ -104,9 +104,18 @@ void flush_icache_range(unsigned long address, unsigned long endaddr)
: "di" (FLUSH_I));
}
}
+
+void flush_icache_range(unsigned long address, unsigned long endaddr)
+{
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ flush_icache_user_range(address, endaddr);
+ set_fs(old_fs);
+}
EXPORT_SYMBOL(flush_icache_range);
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len)
{
if (CPU_IS_COLDFIRE) {
diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h
index 11f56c85056b..39f8fb6768d8 100644
--- a/arch/microblaze/include/asm/cacheflush.h
+++ b/arch/microblaze/include/asm/cacheflush.h
@@ -57,9 +57,6 @@ void microblaze_cache_init(void);
#define invalidate_icache() mbc->iin();
#define invalidate_icache_range(start, end) mbc->iinr(start, end);
-#define flush_icache_user_range(vma, pg, adr, len) flush_icache();
-#define flush_icache_page(vma, pg) do { } while (0)
-
#define enable_dcache() mbc->de();
#define disable_dcache() mbc->dd();
/* FIXME for LL-temac driver */
@@ -77,27 +74,9 @@ do { \
flush_dcache_range((unsigned) (addr), (unsigned) (addr) + PAGE_SIZE); \
} while (0);
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
-#define flush_cache_mm(mm) do { } while (0)
-
#define flush_cache_page(vma, vmaddr, pfn) \
flush_dcache_range(pfn << PAGE_SHIFT, (pfn << PAGE_SHIFT) + PAGE_SIZE);
-/* MS: kgdb code use this macro, wrong len with FLASH */
-#if 0
-#define flush_cache_range(vma, start, len) { \
- flush_icache_range((unsigned) (start), (unsigned) (start) + (len)); \
- flush_dcache_range((unsigned) (start), (unsigned) (start) + (len)); \
-}
-#endif
-
-#define flush_cache_range(vma, start, len) do { } while (0)
-
static inline void copy_to_user_page(struct vm_area_struct *vma,
struct page *page, unsigned long vaddr,
void *dst, void *src, int len)
@@ -109,12 +88,8 @@ static inline void copy_to_user_page(struct vm_area_struct *vma,
flush_dcache_range(addr, addr + PAGE_SIZE);
}
}
+#define copy_to_user_page copy_to_user_page
-static inline void copy_from_user_page(struct vm_area_struct *vma,
- struct page *page, unsigned long vaddr,
- void *dst, void *src, int len)
-{
- memcpy(dst, src, len);
-}
+#include <asm-generic/cacheflush.h>
#endif /* _ASM_MICROBLAZE_CACHEFLUSH_H */
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index caddded56e77..7d6824f7c0e8 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -44,9 +44,9 @@ void invalidate_kernel_vmap_range(void *addr, int size);
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
#else
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len);
-#define flush_icache_user_range flush_icache_user_range
+#define flush_icache_user_page flush_icache_user_page
#include <asm-generic/cacheflush.h>
#endif
diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c
index 254703653b6f..6eb98a7ad27d 100644
--- a/arch/nds32/mm/cacheflush.c
+++ b/arch/nds32/mm/cacheflush.c
@@ -35,9 +35,8 @@ void flush_icache_page(struct vm_area_struct *vma, struct page *page)
kunmap_atomic((void *)kaddr);
local_irq_restore(flags);
}
-EXPORT_SYMBOL(flush_icache_page);
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len)
{
unsigned long kaddr;
diff --git a/arch/openrisc/include/asm/cacheflush.h b/arch/openrisc/include/asm/cacheflush.h
index 79d5d7753fe4..eeac40d4a854 100644
--- a/arch/openrisc/include/asm/cacheflush.h
+++ b/arch/openrisc/include/asm/cacheflush.h
@@ -62,31 +62,12 @@ static inline void flush_dcache_page(struct page *page)
clear_bit(PG_dc_clean, &page->flags);
}
-/*
- * Other interfaces are not required since we do not have virtually
- * indexed or tagged caches. So we can use the default here.
- */
-#define flush_cache_all() do { } while (0)
-#define flush_cache_mm(mm) do { } while (0)
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-#define flush_icache_range(start, end) do { } while (0)
-#define flush_icache_page(vma, pg) do { } while (0)
-#define flush_icache_user_range(vma, pg, adr, len) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
-
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
- do { \
- memcpy(dst, src, len); \
- if (vma->vm_flags & VM_EXEC) \
- sync_icache_dcache(page); \
- } while (0)
+#define flush_icache_user_page(vma, page, addr, len) \
+do { \
+ if (vma->vm_flags & VM_EXEC) \
+ sync_icache_dcache(page); \
+} while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
+#include <asm-generic/cacheflush.h>
#endif /* __ASM_CACHEFLUSH_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index e92191b390f3..de600b915a3c 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -4,23 +4,9 @@
#ifndef _ASM_POWERPC_CACHEFLUSH_H
#define _ASM_POWERPC_CACHEFLUSH_H
-#ifdef __KERNEL__
-
#include <linux/mm.h>
#include <asm/cputable.h>
-/*
- * No cache flushing is required when address mappings are changed,
- * because the caches on PowerPCs are physically addressed.
- */
-#define flush_cache_all() do { } while (0)
-#define flush_cache_mm(mm) do { } while (0)
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
-#define flush_icache_page(vma, page) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
-
#ifdef CONFIG_PPC_BOOK3S_64
/*
* Book3s has no ptesync after setting a pte, so without this ptesync it's
@@ -33,20 +19,20 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
{
asm volatile("ptesync" ::: "memory");
}
-#else
-static inline void flush_cache_vmap(unsigned long start, unsigned long end) { }
-#endif
+#define flush_cache_vmap flush_cache_vmap
+#endif /* CONFIG_PPC_BOOK3S_64 */
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *page);
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
void flush_icache_range(unsigned long start, unsigned long stop);
-extern void flush_icache_user_range(struct vm_area_struct *vma,
- struct page *page, unsigned long addr,
- int len);
-extern void flush_dcache_icache_page(struct page *page);
+#define flush_icache_range flush_icache_range
+
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long addr, int len);
+#define flush_icache_user_page flush_icache_user_page
+
+void flush_dcache_icache_page(struct page *page);
void __flush_dcache_icache(void *page);
/**
@@ -111,14 +97,6 @@ static inline void invalidate_dcache_range(unsigned long start,
mb(); /* sync */
}
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
- do { \
- memcpy(dst, src, len); \
- flush_icache_user_range(vma, page, vaddr, len); \
- } while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
-
-#endif /* __KERNEL__ */
+#include <asm-generic/cacheflush.h>
#endif /* _ASM_POWERPC_CACHEFLUSH_H */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 18aed9775a3c..ddfc4c90ebb6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -581,7 +581,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
* We always ask for write permission since the common case
* is that the page is writable.
*/
- if (__get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ if (get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
write_ok = true;
} else {
/* Call KVM generic code to do the slow-path check */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 02219e28b1e4..a47fa8b4d0f0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -795,7 +795,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
* is that the page is writable.
*/
hva = gfn_to_hva_memslot(memslot, gfn);
- if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
upgrade_write = true;
} else {
unsigned long pfn;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5f7fe13211e9..e2d6a6236aa7 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -577,7 +577,7 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
flush_dcache_page(pg);
}
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len)
{
unsigned long maddr;
@@ -586,7 +586,6 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
flush_icache_range(maddr, maddr + len);
kunmap(page);
}
-EXPORT_SYMBOL(flush_icache_user_range);
/*
* System memory should not be in /proc/iomem but various tools expect it
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
index b63086b663ef..9cc1a129737e 100644
--- a/arch/powerpc/perf/callchain_64.c
+++ b/arch/powerpc/perf/callchain_64.c
@@ -30,11 +30,9 @@ int read_user_stack_slow(void __user *ptr, void *buf, int nb)
unsigned long addr = (unsigned long) ptr;
unsigned long offset;
struct page *page;
- int nrpages;
void *kaddr;
- nrpages = __get_user_pages_fast(addr, 1, 1, &page);
- if (nrpages == 1) {
+ if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) {
kaddr = page_address(page);
/* align address to page boundary */
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index c8677c75f82c..23ff70350992 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -8,65 +8,6 @@
#include <linux/mm.h>
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-
-/*
- * The cache doesn't need to be flushed when TLB entries change when
- * the cache is mapped to physical memory, not virtual memory
- */
-static inline void flush_cache_all(void)
-{
-}
-
-static inline void flush_cache_mm(struct mm_struct *mm)
-{
-}
-
-static inline void flush_cache_dup_mm(struct mm_struct *mm)
-{
-}
-
-static inline void flush_cache_range(struct vm_area_struct *vma,
- unsigned long start,
- unsigned long end)
-{
-}
-
-static inline void flush_cache_page(struct vm_area_struct *vma,
- unsigned long vmaddr,
- unsigned long pfn)
-{
-}
-
-static inline void flush_dcache_mmap_lock(struct address_space *mapping)
-{
-}
-
-static inline void flush_dcache_mmap_unlock(struct address_space *mapping)
-{
-}
-
-static inline void flush_icache_page(struct vm_area_struct *vma,
- struct page *page)
-{
-}
-
-static inline void flush_cache_vmap(unsigned long start, unsigned long end)
-{
-}
-
-static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
-{
-}
-
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
- do { \
- memcpy(dst, src, len); \
- flush_icache_user_range(vma, page, vaddr, len); \
- } while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
-
static inline void local_flush_icache_all(void)
{
asm volatile ("fence.i" ::: "memory");
@@ -79,13 +20,15 @@ static inline void flush_dcache_page(struct page *page)
if (test_bit(PG_dcache_clean, &page->flags))
clear_bit(PG_dcache_clean, &page->flags);
}
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
/*
* RISC-V doesn't have an instruction to flush parts of the instruction cache,
* so instead we just flush the whole thing.
*/
#define flush_icache_range(start, end) flush_icache_all()
-#define flush_icache_user_range(vma, pg, addr, len) flush_icache_mm(vma->vm_mm, 0)
+#define flush_icache_user_page(vma, pg, addr, len) \
+ flush_icache_mm(vma->vm_mm, 0)
#ifndef CONFIG_SMP
@@ -105,4 +48,6 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL)
+#include <asm-generic/cacheflush.h>
+
#endif /* _ASM_RISCV_CACHEFLUSH_H */
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index b932e42ef028..fe7400079b97 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -46,6 +46,7 @@ extern void flush_cache_range(struct vm_area_struct *vma,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *page);
extern void flush_icache_range(unsigned long start, unsigned long end);
+#define flush_icache_user_range flush_icache_range
extern void flush_icache_page(struct vm_area_struct *vma,
struct page *page);
extern void flush_cache_sigtramp(unsigned long address);
diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h
index fb66094a2c30..41c6d734a474 100644
--- a/arch/sparc/include/asm/cacheflush_32.h
+++ b/arch/sparc/include/asm/cacheflush_32.h
@@ -17,8 +17,6 @@
#define flush_icache_range(start, end) do { } while (0)
#define flush_icache_page(vma, pg) do { } while (0)
-#define flush_icache_user_range(vma,pg,adr,len) do { } while (0)
-
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
flush_cache_page(vma, vaddr, page_to_pfn(page));\
diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h
index e7517434d1fa..b9341836597e 100644
--- a/arch/sparc/include/asm/cacheflush_64.h
+++ b/arch/sparc/include/asm/cacheflush_64.h
@@ -49,7 +49,6 @@ void __flush_dcache_range(unsigned long start, unsigned long end);
void flush_dcache_page(struct page *page);
#define flush_icache_page(vma, pg) do { } while(0)
-#define flush_icache_user_range(vma,pg,adr,len) do { } while (0)
void flush_ptrace_access(struct vm_area_struct *, struct page *,
unsigned long uaddr, void *kaddr,
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 70ee60383900..ff9c62828962 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -2,6 +2,8 @@
#ifndef __UM_TLB_H
#define __UM_TLB_H
+#include <linux/mm.h>
+
#include <asm/tlbflush.h>
#include <asm-generic/cacheflush.h>
#include <asm-generic/tlb.h>
diff --git a/arch/unicore32/include/asm/cacheflush.h b/arch/unicore32/include/asm/cacheflush.h
index dc8c0b41538f..ff0be92ebc32 100644
--- a/arch/unicore32/include/asm/cacheflush.h
+++ b/arch/unicore32/include/asm/cacheflush.h
@@ -133,14 +133,6 @@ extern void flush_cache_page(struct vm_area_struct *vma,
#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
/*
- * flush_cache_user_range is used when we want to ensure that the
- * Harvard caches are synchronised for the user space address range.
- * This is used for the UniCore private sys_cacheflush system call.
- */
-#define flush_cache_user_range(vma, start, end) \
- __cpuc_coherent_user_range((start) & PAGE_MASK, PAGE_ALIGN(end))
-
-/*
* Perform necessary cache operations to ensure that data previously
* stored within this range of addresses can be executed by the CPU.
*/
@@ -170,9 +162,6 @@ extern void flush_dcache_page(struct page *);
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-#define flush_icache_user_range(vma, page, addr, len) \
- flush_dcache_page(page)
-
/*
* We don't appear to need to do anything here. In fact, if we did, we'd
* duplicate cache flushing elsewhere performed by flush_dcache_page().
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 63feaf2a5f93..b192d917a6d0 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_CACHEFLUSH_H
#define _ASM_X86_CACHEFLUSH_H
+#include <linux/mm.h>
+
/* Caches aren't brain-dead on the intel. */
#include <asm-generic/cacheflush.h>
#include <asm/special_insns.h>
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index a0d50be5a8cb..cf907e5bf2f2 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -145,6 +145,8 @@ void local_flush_cache_page(struct vm_area_struct *vma,
#endif
+#define flush_icache_user_range flush_icache_range
+
/* Ensure consistency between data and instruction cache. */
#define local_flush_icache_range(start, end) \
do { \
diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c
index 6f769c527fae..10c214bd0903 100644
--- a/drivers/media/platform/omap3isp/ispvideo.c
+++ b/drivers/media/platform/omap3isp/ispvideo.c
@@ -10,7 +10,6 @@
* Sakari Ailus <sakari.ailus@iki.fi>
*/
-#include <asm/cacheflush.h>
#include <linux/clk.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -19,6 +18,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
#include <media/v4l2-dev.h>
#include <media/v4l2-ioctl.h>
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 97f948f8f4e6..d1ecd6da11a2 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -7,7 +7,6 @@
* Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
*/
-#include <asm/cacheflush.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/init.h>
@@ -25,6 +24,8 @@
#include <linux/dax.h>
#include <linux/nd.h>
#include <linux/backing-dev.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
#include "pmem.h"
#include "pfn.h"
#include "nd.h"
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 21a59b598ed8..596132a96cd5 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1762,15 +1762,14 @@ static int set_bit_to_user(int nr, void __user *addr)
int bit = nr + (log % PAGE_SIZE) * 8;
int r;
- r = get_user_pages_fast(log, 1, FOLL_WRITE, &page);
+ r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page);
if (r < 0)
return r;
BUG_ON(r != 1);
base = kmap_atomic(page);
set_bit(bit, base);
kunmap_atomic(base);
- set_page_dirty_lock(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, true);
return 0;
}
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 9b82bc111d0a..87ce229c63cf 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -854,7 +854,7 @@ static int load_flat_file(struct linux_binprm *bprm,
#endif /* CONFIG_BINFMT_FLAT_OLD */
}
- flush_icache_range(start_code, end_code);
+ flush_icache_user_range(start_code, end_code);
/* zero the BSS, BRK and stack areas */
if (clear_user((void __user *)(datapos + data_len), bss_len +
diff --git a/fs/exec.c b/fs/exec.c
index 93ff1c4c7ebb..02d0c5d19be5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1053,14 +1053,17 @@ out:
}
EXPORT_SYMBOL_GPL(kernel_read_file_from_fd);
+#if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
+ defined(CONFIG_BINFMT_ELF_FDPIC)
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
{
ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
if (res > 0)
- flush_icache_range(addr, addr + len);
+ flush_icache_user_range(addr, addr + len);
return res;
}
EXPORT_SYMBOL(read_code);
+#endif
/*
* Maps the mm_struct mm into the current task struct.
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index df2143e05c57..5b405f32971d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
+#include <linux/mount.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@@ -1703,3 +1704,147 @@ int __init proc_sys_init(void)
return sysctl_init();
}
+
+struct sysctl_alias {
+ const char *kernel_param;
+ const char *sysctl_param;
+};
+
+/*
+ * Historically some settings had both sysctl and a command line parameter.
+ * With the generic sysctl. parameter support, we can handle them at a single
+ * place and only keep the historical name for compatibility. This is not meant
+ * to add brand new aliases. When adding existing aliases, consider whether
+ * the possibly different moment of changing the value (e.g. from early_param
+ * to the moment do_sysctl_args() is called) is an issue for the specific
+ * parameter.
+ */
+static const struct sysctl_alias sysctl_aliases[] = {
+ {"hardlockup_all_cpu_backtrace", "kernel.hardlockup_all_cpu_backtrace" },
+ {"hung_task_panic", "kernel.hung_task_panic" },
+ {"numa_zonelist_order", "vm.numa_zonelist_order" },
+ {"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" },
+ {"softlockup_panic", "kernel.softlockup_panic" },
+ { }
+};
+
+static const char *sysctl_find_alias(char *param)
+{
+ const struct sysctl_alias *alias;
+
+ for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) {
+ if (strcmp(alias->kernel_param, param) == 0)
+ return alias->sysctl_param;
+ }
+
+ return NULL;
+}
+
+/* Set sysctl value passed on kernel command line. */
+static int process_sysctl_arg(char *param, char *val,
+ const char *unused, void *arg)
+{
+ char *path;
+ struct vfsmount **proc_mnt = arg;
+ struct file_system_type *proc_fs_type;
+ struct file *file;
+ int len;
+ int err;
+ loff_t pos = 0;
+ ssize_t wret;
+
+ if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
+ param += sizeof("sysctl") - 1;
+
+ if (param[0] != '/' && param[0] != '.')
+ return 0;
+
+ param++;
+ } else {
+ param = (char *) sysctl_find_alias(param);
+ if (!param)
+ return 0;
+ }
+
+ /*
+ * To set sysctl options, we use a temporary mount of proc, look up the
+ * respective sys/ file and write to it. To avoid mounting it when no
+ * options were given, we mount it only when the first sysctl option is
+ * found. Why not a persistent mount? There are problems with a
+ * persistent mount of proc in that it forces userspace not to use any
+ * proc mount options.
+ */
+ if (!*proc_mnt) {
+ proc_fs_type = get_fs_type("proc");
+ if (!proc_fs_type) {
+ pr_err("Failed to find procfs to set sysctl from command line\n");
+ return 0;
+ }
+ *proc_mnt = kern_mount(proc_fs_type);
+ put_filesystem(proc_fs_type);
+ if (IS_ERR(*proc_mnt)) {
+ pr_err("Failed to mount procfs to set sysctl from command line\n");
+ return 0;
+ }
+ }
+
+ path = kasprintf(GFP_KERNEL, "sys/%s", param);
+ if (!path)
+ panic("%s: Failed to allocate path for %s\n", __func__, param);
+ strreplace(path, '.', '/');
+
+ file = file_open_root((*proc_mnt)->mnt_root, *proc_mnt, path, O_WRONLY, 0);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ if (err == -ENOENT)
+ pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n",
+ param, val);
+ else if (err == -EACCES)
+ pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n",
+ param, val);
+ else
+ pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n",
+ file, param, val);
+ goto out;
+ }
+ len = strlen(val);
+ wret = kernel_write(file, val, len, &pos);
+ if (wret < 0) {
+ err = wret;
+ if (err == -EINVAL)
+ pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n",
+ param, val);
+ else
+ pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n",
+ ERR_PTR(err), param, val);
+ } else if (wret != len) {
+ pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n",
+ wret, len, path, param, val);
+ }
+
+ err = filp_close(file, NULL);
+ if (err)
+ pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n",
+ ERR_PTR(err), param, val);
+out:
+ kfree(path);
+ return 0;
+}
+
+void do_sysctl_args(void)
+{
+ char *command_line;
+ struct vfsmount *proc_mnt = NULL;
+
+ command_line = kstrdup(saved_command_line, GFP_KERNEL);
+ if (!command_line)
+ panic("%s: Failed to allocate copy of command line\n", __func__);
+
+ parse_args("Setting sysctl args", command_line,
+ NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg);
+
+ if (proc_mnt)
+ kern_unmount(proc_mnt);
+
+ kfree(command_line);
+}
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index cac7404b2bdd..907fa5d16494 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -1,11 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_CACHEFLUSH_H
-#define __ASM_CACHEFLUSH_H
-
-/* Keep includes the same across arches. */
-#include <linux/mm.h>
-
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#ifndef _ASM_GENERIC_CACHEFLUSH_H
+#define _ASM_GENERIC_CACHEFLUSH_H
/*
* The cache doesn't need to be flushed when TLB entries change when
@@ -45,12 +40,14 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
}
#endif
-#ifndef flush_dcache_page
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
static inline void flush_dcache_page(struct page *page)
{
}
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
#endif
+
#ifndef flush_dcache_mmap_lock
static inline void flush_dcache_mmap_lock(struct address_space *mapping)
{
@@ -69,6 +66,10 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
}
#endif
+#ifndef flush_icache_user_range
+#define flush_icache_user_range flush_icache_range
+#endif
+
#ifndef flush_icache_page
static inline void flush_icache_page(struct vm_area_struct *vma,
struct page *page)
@@ -76,8 +77,8 @@ static inline void flush_icache_page(struct vm_area_struct *vma,
}
#endif
-#ifndef flush_icache_user_range
-static inline void flush_icache_user_range(struct vm_area_struct *vma,
+#ifndef flush_icache_user_page
+static inline void flush_icache_user_page(struct vm_area_struct *vma,
struct page *page,
unsigned long addr, int len)
{
@@ -100,7 +101,7 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
- flush_icache_user_range(vma, page, vaddr, len); \
+ flush_icache_user_page(vma, page, vaddr, len); \
} while (0)
#endif
@@ -109,4 +110,4 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
memcpy(dst, src, len)
#endif
-#endif /* __ASM_CACHEFLUSH_H */
+#endif /* _ASM_GENERIC_CACHEFLUSH_H */
diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h
index 5aad06b4ca7b..3028b644b4fb 100644
--- a/include/linux/dev_printk.h
+++ b/include/linux/dev_printk.h
@@ -109,7 +109,8 @@ void _dev_info(const struct device *dev, const char *fmt, ...)
#define dev_info(dev, fmt, ...) \
_dev_info(dev, dev_fmt(fmt), ##__VA_ARGS__)
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#define dev_dbg(dev, fmt, ...) \
dynamic_dev_dbg(dev, dev_fmt(fmt), ##__VA_ARGS__)
#elif defined(DEBUG)
@@ -181,7 +182,8 @@ do { \
dev_level_ratelimited(dev_notice, dev, fmt, ##__VA_ARGS__)
#define dev_info_ratelimited(dev, fmt, ...) \
dev_level_ratelimited(dev_info, dev, fmt, ##__VA_ARGS__)
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
/* descriptor check is first to prevent flooding with "callbacks suppressed" */
#define dev_dbg_ratelimited(dev, fmt, ...) \
do { \
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 4cf02ecd67de..abcd5fde30eb 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -48,7 +48,7 @@ struct _ddebug {
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG_CORE)
int ddebug_add_module(struct _ddebug *tab, unsigned int n,
const char *modname);
extern int ddebug_remove_module(const char *mod_name);
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index c309f43bde45..a06a78c67f19 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -68,6 +68,8 @@ struct ipc_namespace {
struct user_namespace *user_ns;
struct ucounts *ucounts;
+ struct llist_node mnt_llist;
+
struct ns_common ns;
} __randomize_layout;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 9b7a8d74a9d6..82d91547d122 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -520,6 +520,12 @@ static inline u32 int_sqrt64(u64 x)
}
#endif
+#ifdef CONFIG_SMP
+extern unsigned int sysctl_oops_all_cpu_backtrace;
+#else
+#define sysctl_oops_all_cpu_backtrace 0
+#endif /* CONFIG_SMP */
+
extern void bust_spinlocks(int yes);
extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
extern int panic_timeout;
@@ -528,6 +534,8 @@ extern int panic_on_oops;
extern int panic_on_unrecovered_nmi;
extern int panic_on_io_nmi;
extern int panic_on_warn;
+extern unsigned long panic_on_taint;
+extern bool panic_on_taint_nousertaint;
extern int sysctl_panic_on_rcu_stall;
extern int sysctl_panic_on_stackoverflow;
@@ -596,6 +604,7 @@ extern enum system_states {
#define TAINT_AUX 16
#define TAINT_RANDSTRUCT 17
#define TAINT_FLAGS_COUNT 18
+#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1)
struct taint_flag {
char c_true; /* character printed when tainted */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 86adc71a972f..9d6042178ca7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1706,6 +1706,8 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
struct vm_area_struct **vmas);
long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, int *locked);
+long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
@@ -1824,10 +1826,16 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
/*
* doesn't attempt to fault and will return short.
*/
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages);
+int get_user_pages_fast_only(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages);
int pin_user_pages_fast_only(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
+
+static inline bool get_user_page_fast_only(unsigned long addr,
+ unsigned int gup_flags, struct page **pagep)
+{
+ return get_user_pages_fast_only(addr, 1, gup_flags, pagep) == 1;
+}
/*
* per-process(per-mm_struct) statistics.
*/
diff --git a/include/linux/net.h b/include/linux/net.h
index e10f378194a5..016a9c5faa34 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -264,7 +264,8 @@ do { \
net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
#define net_info_ratelimited(fmt, ...) \
net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#define net_dbg_ratelimited(fmt, ...) \
do { \
DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a96e9c4ec36..5b364a2e0006 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4942,7 +4942,8 @@ do { \
#define MODULE_ALIAS_NETDEV(device) \
MODULE_ALIAS("netdev-" device)
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#define netdev_dbg(__dev, format, args...) \
do { \
dynamic_netdev_dbg(__dev, format, ##args); \
@@ -5012,7 +5013,8 @@ do { \
#define netif_info(priv, type, dev, fmt, args...) \
netif_level(info, priv, type, dev, fmt, ##args)
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#define netif_dbg(priv, type, netdev, format, args...) \
do { \
if (netif_msg_##type(priv)) \
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 3cc2f178bf06..fc8f03c54543 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -399,7 +399,8 @@ extern int kptr_restrict;
/* If you are writing a driver, please use dev_dbg instead */
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#include <linux/dynamic_debug.h>
/**
@@ -535,7 +536,8 @@ extern int kptr_restrict;
#endif
/* If you are writing a driver, please use dev_dbg instead */
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
/* descriptor check is first to prevent flooding with "callbacks suppressed" */
#define pr_debug_ratelimited(fmt, ...) \
do { \
@@ -582,7 +584,8 @@ static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
#endif
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \
groupsize, buf, len, ascii) \
dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 7b4d3a49b6c5..660ac49f2b53 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,6 +7,13 @@
struct ctl_table;
#ifdef CONFIG_DETECT_HUNG_TASK
+
+#ifdef CONFIG_SMP
+extern unsigned int sysctl_hung_task_all_cpu_backtrace;
+#else
+#define sysctl_hung_task_all_cpu_backtrace 0
+#endif /* CONFIG_SMP */
+
extern int sysctl_hung_task_check_count;
extern unsigned int sysctl_hung_task_panic;
extern unsigned long sysctl_hung_task_timeout_secs;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index f2401e45a3c2..50bb7f383a1b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -197,6 +197,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
void unregister_sysctl_table(struct ctl_table_header * table);
extern int sysctl_init(void);
+void do_sysctl_args(void);
extern int pwrsw_enabled;
extern int unaligned_enabled;
@@ -235,6 +236,9 @@ static inline void setup_sysctl_set(struct ctl_table_set *p,
{
}
+static inline void do_sysctl_args(void)
+{
+}
#endif /* CONFIG_SYSCTL */
int sysctl_max_threads(struct ctl_table *table, int write, void *buffer,
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 14c893433139..b4d70e7568b2 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -576,7 +576,7 @@ void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
*
* Context: Any context. Takes and releases the xa_lock while
* disabling softirqs.
- * Return: The entry which used to be at this index.
+ * Return: The old entry at this index or xa_err() if an error happened.
*/
static inline void *xa_store_bh(struct xarray *xa, unsigned long index,
void *entry, gfp_t gfp)
@@ -602,7 +602,7 @@ static inline void *xa_store_bh(struct xarray *xa, unsigned long index,
*
* Context: Process context. Takes and releases the xa_lock while
* disabling interrupts.
- * Return: The entry which used to be at this index.
+ * Return: The old entry at this index or xa_err() if an error happened.
*/
static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
void *entry, gfp_t gfp)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 033e7044f29c..ef2f3986c493 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -100,7 +100,8 @@ void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
__printf(2, 3) __cold
void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#define ibdev_dbg(__dev, format, args...) \
dynamic_ibdev_dbg(__dev, format, ##args)
#else
@@ -133,7 +134,8 @@ do { \
#define ibdev_info_ratelimited(ibdev, fmt, ...) \
ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
/* descriptor check is first to prevent flooding with "callbacks suppressed" */
#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \
do { \
diff --git a/init/main.c b/init/main.c
index 76df62fc3e2c..b59e09353881 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1412,6 +1412,8 @@ static int __ref kernel_init(void *unused)
rcu_end_inkernel_boot();
+ do_sysctl_args();
+
if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
diff --git a/ipc/msg.c b/ipc/msg.c
index caca67368cb5..acd1bc7af55a 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -268,6 +268,8 @@ static void expunge_all(struct msg_queue *msq, int res,
* before freeque() is called. msg_ids.rwsem remains locked on exit.
*/
static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
+ __releases(RCU)
+ __releases(&msq->q_perm)
{
struct msg_msg *msg, *t;
struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index fdc3b5f3f53a..24e7b45320f7 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -117,6 +117,10 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
static void free_ipc_ns(struct ipc_namespace *ns)
{
+ /* mq_put_mnt() waits for a grace period as kern_unmount()
+ * uses synchronize_rcu().
+ */
+ mq_put_mnt(ns);
sem_exit_ns(ns);
msg_exit_ns(ns);
shm_exit_ns(ns);
@@ -127,6 +131,21 @@ static void free_ipc_ns(struct ipc_namespace *ns)
kfree(ns);
}
+static LLIST_HEAD(free_ipc_list);
+static void free_ipc(struct work_struct *unused)
+{
+ struct llist_node *node = llist_del_all(&free_ipc_list);
+ struct ipc_namespace *n, *t;
+
+ llist_for_each_entry_safe(n, t, node, mnt_llist)
+ free_ipc_ns(n);
+}
+
+/*
+ * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
+ */
+static DECLARE_WORK(free_ipc_work, free_ipc);
+
/*
* put_ipc_ns - drop a reference to an ipc namespace.
* @ns: the namespace to put
@@ -148,8 +167,9 @@ void put_ipc_ns(struct ipc_namespace *ns)
if (refcount_dec_and_lock(&ns->count, &mq_lock)) {
mq_clear_sbinfo(ns);
spin_unlock(&mq_lock);
- mq_put_mnt(ns);
- free_ipc_ns(ns);
+
+ if (llist_add(&ns->mnt_llist, &free_ipc_list))
+ schedule_work(&free_ipc_work);
}
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fcfadecd3a08..63d66bbebbd5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6934,12 +6934,12 @@ static u64 perf_virt_to_phys(u64 virt)
* Walking the pages tables for user address.
* Interrupts are disabled, so it prevents any tear down
* of the page tables.
- * Try IRQ-safe __get_user_pages_fast first.
+ * Try IRQ-safe get_user_page_fast_only first.
* If failed, leave phys_addr as 0.
*/
if (current->mm != NULL) {
pagefault_disable();
- if (__get_user_pages_fast(virt, 1, 0, &p) == 1)
+ if (get_user_page_fast_only(virt, 0, &p))
phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
pagefault_enable();
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index eddc8db96027..e51ec844c87c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1668,7 +1668,7 @@ void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
copy_to_page(page, vaddr, src, len);
/*
- * We probably need flush_icache_user_range() but it needs vma.
+ * We probably need flush_icache_user_page() but it needs vma.
* This should work on most of architectures by default. If
* architecture needs to do something different it can define
* its own version of the function.
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 14a625c16cb3..ce76f490126c 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -53,9 +53,18 @@ int __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
static bool hung_task_show_lock;
static bool hung_task_call_panic;
+static bool hung_task_show_all_bt;
static struct task_struct *watchdog_task;
+#ifdef CONFIG_SMP
+/*
+ * Should we dump all CPUs backtraces in a hung task event?
+ * Defaults to 0, can be changed via sysctl.
+ */
+unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
+#endif /* CONFIG_SMP */
+
/*
* Should we panic (and reboot, if panic_timeout= is set) when a
* hung task is detected:
@@ -63,16 +72,6 @@ static struct task_struct *watchdog_task;
unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
-static int __init hung_task_panic_setup(char *str)
-{
- int rc = kstrtouint(str, 0, &sysctl_hung_task_panic);
-
- if (rc)
- return rc;
- return 1;
-}
-__setup("hung_task_panic=", hung_task_panic_setup);
-
static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
@@ -137,6 +136,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
" disables this message.\n");
sched_show_task(t);
hung_task_show_lock = true;
+
+ if (sysctl_hung_task_all_cpu_backtrace)
+ hung_task_show_all_bt = true;
}
touch_nmi_watchdog();
@@ -201,10 +203,14 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
rcu_read_unlock();
if (hung_task_show_lock)
debug_show_all_locks();
- if (hung_task_call_panic) {
+
+ if (hung_task_show_all_bt) {
+ hung_task_show_all_bt = false;
trigger_all_cpu_backtrace();
- panic("hung_task: blocked tasks");
}
+
+ if (hung_task_call_panic)
+ panic("hung_task: blocked tasks");
}
static long hung_timeout_jiffies(unsigned long last_checked,
diff --git a/kernel/module.c b/kernel/module.c
index ef400c389f49..e8a198588f26 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3344,12 +3344,6 @@ static int check_module_license_and_versions(struct module *mod)
static void flush_module_icache(const struct module *mod)
{
- mm_segment_t old_fs;
-
- /* flush the icache in correct context */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
-
/*
* Flush the instruction cache, since we've played with text.
* Do it before processing of module parameters, so the module
@@ -3361,8 +3355,6 @@ static void flush_module_icache(const struct module *mod)
+ mod->init_layout.size);
flush_icache_range((unsigned long)mod->core_layout.base,
(unsigned long)mod->core_layout.base + mod->core_layout.size);
-
- set_fs(old_fs);
}
int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
diff --git a/kernel/panic.c b/kernel/panic.c
index b69ee9e76cb2..85568bbfb12b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -36,6 +36,14 @@
#define PANIC_TIMER_STEP 100
#define PANIC_BLINK_SPD 18
+#ifdef CONFIG_SMP
+/*
+ * Should we dump all CPUs backtraces in an oops event?
+ * Defaults to 0, can be changed via sysctl.
+ */
+unsigned int __read_mostly sysctl_oops_all_cpu_backtrace;
+#endif /* CONFIG_SMP */
+
int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
static unsigned long tainted_mask =
IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
@@ -44,6 +52,8 @@ static int pause_on_oops_flag;
static DEFINE_SPINLOCK(pause_on_oops_lock);
bool crash_kexec_post_notifiers;
int panic_on_warn __read_mostly;
+unsigned long panic_on_taint;
+bool panic_on_taint_nousertaint = false;
int panic_timeout = CONFIG_PANIC_TIMEOUT;
EXPORT_SYMBOL_GPL(panic_timeout);
@@ -434,6 +444,11 @@ void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
pr_warn("Disabling lock debugging due to kernel taint\n");
set_bit(flag, &tainted_mask);
+
+ if (tainted_mask & panic_on_taint) {
+ panic_on_taint = 0;
+ panic("panic_on_taint set ...");
+ }
}
EXPORT_SYMBOL(add_taint);
@@ -515,6 +530,9 @@ void oops_enter(void)
/* can't trust the integrity of the kernel anymore: */
debug_locks_off();
do_oops_enter_exit();
+
+ if (sysctl_oops_all_cpu_backtrace)
+ trigger_all_cpu_backtrace();
}
/*
@@ -686,3 +704,30 @@ static int __init oops_setup(char *s)
return 0;
}
early_param("oops", oops_setup);
+
+static int __init panic_on_taint_setup(char *s)
+{
+ char *taint_str;
+
+ if (!s)
+ return -EINVAL;
+
+ taint_str = strsep(&s, ",");
+ if (kstrtoul(taint_str, 16, &panic_on_taint))
+ return -EINVAL;
+
+ /* make sure panic_on_taint doesn't hold out-of-range TAINT flags */
+ panic_on_taint &= TAINT_FLAGS_MAX;
+
+ if (!panic_on_taint)
+ return -EINVAL;
+
+ if (s && !strcmp(s, "nousertaint"))
+ panic_on_taint_nousertaint = true;
+
+ pr_info("panic_on_taint: bitmask=0x%lx nousertaint_mode=%sabled\n",
+ panic_on_taint, panic_on_taint_nousertaint ? "en" : "dis");
+
+ return 0;
+}
+early_param("panic_on_taint", panic_on_taint_setup);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 715774d8c55f..db1ce7af2563 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -866,15 +866,23 @@ static int proc_taint(struct ctl_table *table, int write,
return err;
if (write) {
+ int i;
+
+ /*
+ * If we are relying on panic_on_taint not producing
+ * false positives due to userspace input, bail out
+ * before setting the requested taint flags.
+ */
+ if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
+ return -EINVAL;
+
/*
* Poor man's atomic or. Not worth adding a primitive
* to everyone's atomic.h for this
*/
- int i;
- for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
- if ((tmptaint >> i) & 1)
+ for (i = 0; i < TAINT_FLAGS_COUNT; i++)
+ if ((1UL << i) & tmptaint)
add_taint(i, LOCKDEP_STILL_OK);
- }
}
return err;
@@ -2141,6 +2149,17 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
+#ifdef CONFIG_SMP
+ {
+ .procname = "oops_all_cpu_backtrace",
+ .data = &sysctl_oops_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SMP */
{
.procname = "pid_max",
.data = &pid_max,
@@ -2428,6 +2447,17 @@ static struct ctl_table kern_table[] = {
},
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
+#ifdef CONFIG_SMP
+ {
+ .procname = "hung_task_all_cpu_backtrace",
+ .data = &sysctl_hung_task_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SMP */
{
.procname = "hung_task_panic",
.data = &sysctl_hung_task_panic,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 53ff2c81b084..5abb5b22ad13 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -50,6 +50,11 @@ struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
+
+# ifdef CONFIG_SMP
+int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
+# endif /* CONFIG_SMP */
+
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
@@ -82,16 +87,6 @@ static int __init hardlockup_panic_setup(char *str)
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
-# ifdef CONFIG_SMP
-int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
-
-static int __init hardlockup_all_cpu_backtrace_setup(char *str)
-{
- sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
- return 1;
-}
-__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
-# endif /* CONFIG_SMP */
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
/*
@@ -163,6 +158,10 @@ static void lockup_detector_update_enable(void)
#define SOFTLOCKUP_RESET ULONG_MAX
+#ifdef CONFIG_SMP
+int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+#endif
+
/* Global variables, exported for sysctl */
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
@@ -178,13 +177,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static unsigned long soft_lockup_nmi_warn;
-static int __init softlockup_panic_setup(char *str)
-{
- softlockup_panic = simple_strtoul(str, NULL, 0);
- return 1;
-}
-__setup("softlockup_panic=", softlockup_panic_setup);
-
static int __init nowatchdog_setup(char *str)
{
watchdog_user_enabled = 0;
@@ -206,17 +198,6 @@ static int __init watchdog_thresh_setup(char *str)
}
__setup("watchdog_thresh=", watchdog_thresh_setup);
-#ifdef CONFIG_SMP
-int __read_mostly sysctl_softlockup_all_cpu_backtrace;
-
-static int __init softlockup_all_cpu_backtrace_setup(char *str)
-{
- sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
- return 1;
-}
-__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
-#endif
-
static void __lockup_detector_cleanup(void);
/*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9bd4eb7f5ec1..333e878d8af9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -99,6 +99,7 @@ config DYNAMIC_DEBUG
default n
depends on PRINTK
depends on (DEBUG_FS || PROC_FS)
+ select DYNAMIC_DEBUG_CORE
help
Compiles debug level messages into the kernel, which would not
@@ -165,6 +166,17 @@ config DYNAMIC_DEBUG
See Documentation/admin-guide/dynamic-debug-howto.rst for additional
information.
+config DYNAMIC_DEBUG_CORE
+ bool "Enable core function of dynamic debug support"
+ depends on PRINTK
+ depends on (DEBUG_FS || PROC_FS)
+ help
+ Enable core functional support of dynamic debug. It is useful
+ when you want to tie dynamic debug to your kernel modules with
+ DYNAMIC_DEBUG_MODULE defined for each of them, especially for
+ the case of embedded system where the kernel image size is
+ sensitive for people.
+
config SYMBOLIC_ERRNAME
bool "Support symbolic error names in printf"
default y if PRINTK
diff --git a/lib/Makefile b/lib/Makefile
index 32f19b4d1d2a..315516fa4ef4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -190,7 +190,7 @@ lib-$(CONFIG_GENERIC_BUG) += bug.o
obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
-obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
+obj-$(CONFIG_DYNAMIC_DEBUG_CORE) += dynamic_debug.o
obj-$(CONFIG_SYMBOLIC_ERRNAME) += errname.o
obj-$(CONFIG_NLATTR) += nlattr.o
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 8f199f403ab5..321437bbf87d 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -1032,8 +1032,13 @@ static int __init dynamic_debug_init(void)
int verbose_bytes = 0;
if (&__start___verbose == &__stop___verbose) {
- pr_warn("_ddebug table is empty in a CONFIG_DYNAMIC_DEBUG build\n");
- return 1;
+ if (IS_ENABLED(CONFIG_DYNAMIC_DEBUG)) {
+ pr_warn("_ddebug table is empty in a CONFIG_DYNAMIC_DEBUG build\n");
+ return 1;
+ }
+ pr_info("Ignore empty _ddebug table in a CONFIG_DYNAMIC_DEBUG_CORE build\n");
+ ddebug_init_success = 1;
+ return 0;
}
iter = __start___verbose;
modname = iter->modname;
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 566dad3f4196..84eaae22d3a6 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -44,6 +44,8 @@ struct test_sysctl_data {
int int_0002;
int int_0003[4];
+ int boot_int;
+
unsigned int uint_0001;
char string_0001[65];
@@ -61,6 +63,8 @@ static struct test_sysctl_data test_data = {
.int_0003[2] = 2,
.int_0003[3] = 3,
+ .boot_int = 0,
+
.uint_0001 = 314,
.string_0001 = "(none)",
@@ -92,6 +96,15 @@ static struct ctl_table test_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "boot_int",
+ .data = &test_data.boot_int,
+ .maxlen = sizeof(test_data.boot_int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "uint_0001",
.data = &test_data.uint_0001,
.maxlen = sizeof(unsigned int),
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index c431ca81dad5..4107dbca0056 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -72,7 +72,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
vec->got_ref = true;
vec->is_pfns = false;
- ret = get_user_pages_locked(start, nr_frames,
+ ret = pin_user_pages_locked(start, nr_frames,
gup_flags, (struct page **)(vec->ptrs), &locked);
goto out;
}
@@ -122,7 +122,6 @@ EXPORT_SYMBOL(get_vaddr_frames);
*/
void put_vaddr_frames(struct frame_vector *vec)
{
- int i;
struct page **pages;
if (!vec->got_ref)
@@ -135,8 +134,8 @@ void put_vaddr_frames(struct frame_vector *vec)
*/
if (WARN_ON(IS_ERR(pages)))
goto out;
- for (i = 0; i < vec->nr_frames; i++)
- put_page(pages[i]);
+
+ unpin_user_pages(pages, vec->nr_frames);
vec->got_ref = false;
out:
vec->nr_frames = 0;
diff --git a/mm/gup.c b/mm/gup.c
index e19ff770eb4c..8bd090b36d1d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2035,6 +2035,12 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
*/
if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
return -EINVAL;
+ /*
+ * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
+ * never directly by the caller, so enforce that:
+ */
+ if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
+ return -EINVAL;
return __get_user_pages_locked(current, current->mm, start, nr_pages,
pages, NULL, locked,
@@ -2294,7 +2300,7 @@ pte_unmap:
* to be special.
*
* For a futex to be placed on a THP tail page, get_futex_key requires a
- * __get_user_pages_fast implementation that can pin pages. Thus it's still
+ * get_user_pages_fast_only implementation that can pin pages. Thus it's still
* useful to have gup_huge_pmd even if we can't operate on ptes.
*/
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
@@ -2699,7 +2705,7 @@ static inline void gup_pgd_range(unsigned long addr, unsigned long end,
#ifndef gup_fast_permitted
/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * Check if it's allowed to use get_user_pages_fast_only() for the range, or
* we need to fall back to the slow version:
*/
static bool gup_fast_permitted(unsigned long start, unsigned long end)
@@ -2811,8 +2817,14 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
return ret;
}
-
-/*
+/**
+ * get_user_pages_fast_only() - pin user pages in memory
+ * @start: starting user address
+ * @nr_pages: number of pages from start to pin
+ * @gup_flags: flags modifying pin behaviour
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_pages long.
+ *
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP.
* Note a difference with get_user_pages_fast: this always returns the
@@ -2825,8 +2837,8 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
* access can get ambiguous page results. If you call this function without
* 'write' set, you'd better be sure that you're ok with that ambiguity.
*/
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
+int get_user_pages_fast_only(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages)
{
int nr_pinned;
/*
@@ -2836,10 +2848,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
* FOLL_FAST_ONLY is required in order to match the API description of
* this routine: no fall back to regular ("slow") GUP.
*/
- unsigned int gup_flags = FOLL_GET | FOLL_FAST_ONLY;
-
- if (write)
- gup_flags |= FOLL_WRITE;
+ gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
pages);
@@ -2855,7 +2864,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
return nr_pinned;
}
-EXPORT_SYMBOL_GPL(__get_user_pages_fast);
+EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
/**
* get_user_pages_fast() - pin user pages in memory
@@ -2909,9 +2918,6 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for further details.
- *
- * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
- * is NOT intended for Case 2 (RDMA: long-term pins).
*/
int pin_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages)
@@ -2926,8 +2932,8 @@ int pin_user_pages_fast(unsigned long start, int nr_pages,
EXPORT_SYMBOL_GPL(pin_user_pages_fast);
/*
- * This is the FOLL_PIN equivalent of __get_user_pages_fast(). Behavior is the
- * same, except that this one sets FOLL_PIN instead of FOLL_GET.
+ * This is the FOLL_PIN equivalent of get_user_pages_fast_only(). Behavior
+ * is the same, except that this one sets FOLL_PIN instead of FOLL_GET.
*
* The API rules are the same, too: no negative values may be returned.
*/
@@ -2985,9 +2991,6 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for details.
- *
- * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
- * is NOT intended for Case 2 (RDMA: long-term pins).
*/
long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
@@ -3021,9 +3024,6 @@ EXPORT_SYMBOL(pin_user_pages_remote);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for details.
- *
- * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
- * is NOT intended for Case 2 (RDMA: long-term pins).
*/
long pin_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
@@ -3055,3 +3055,32 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
}
EXPORT_SYMBOL(pin_user_pages_unlocked);
+
+/*
+ * pin_user_pages_locked() is the FOLL_PIN variant of get_user_pages_locked().
+ * Behavior is the same, except that this one sets FOLL_PIN and rejects
+ * FOLL_GET.
+ */
+long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ int *locked)
+{
+ /*
+ * FIXME: Current FOLL_LONGTERM behavior is incompatible with
+ * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
+ * vmas. As there are no users of this flag in this call we simply
+ * disallow this option for now.
+ */
+ if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
+ return -EINVAL;
+
+ /* FOLL_GET and FOLL_PIN are mutually exclusive. */
+ if (WARN_ON_ONCE(gup_flags & FOLL_GET))
+ return -EINVAL;
+
+ gup_flags |= FOLL_PIN;
+ return __get_user_pages_locked(current, current->mm, start, nr_pages,
+ pages, NULL, locked,
+ gup_flags | FOLL_TOUCH);
+}
+EXPORT_SYMBOL(pin_user_pages_locked);
diff --git a/mm/nommu.c b/mm/nommu.c
index dfae55f41901..062dc1c90d21 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -433,7 +433,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
/*
* Ok, looks good - let it rip.
*/
- flush_icache_range(mm->brk, brk);
+ flush_icache_user_range(mm->brk, brk);
return mm->brk = brk;
}
@@ -1277,7 +1277,7 @@ share:
/* we flush the region from the icache only when the first executable
* mapping of it is made */
if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
- flush_icache_range(region->vm_start, region->vm_end);
+ flush_icache_user_range(region->vm_start, region->vm_end);
region->vm_icache_flushed = true;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07ae77d97952..727751219003 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5575,15 +5575,6 @@ static int __parse_numa_zonelist_order(char *s)
return 0;
}
-static __init int setup_numa_zonelist_order(char *s)
-{
- if (!s)
- return 0;
-
- return __parse_numa_zonelist_order(s);
-}
-early_param("numa_zonelist_order", setup_numa_zonelist_order);
-
char numa_zonelist_order[] = "Node";
/*
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 295512465065..057c61df12db 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -4,6 +4,7 @@
#include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/kobject.h>
+#include <linux/memory_hotplug.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/pagemap.h>
@@ -30,13 +31,9 @@
*/
static struct page *page_idle_get_page(unsigned long pfn)
{
- struct page *page;
+ struct page *page = pfn_to_online_page(pfn);
pg_data_t *pgdat;
- if (!pfn_valid(pfn))
- return NULL;
-
- page = pfn_to_page(pfn);
if (!page || !PageLRU(page) ||
!get_page_unless_zero(page))
return NULL;
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 6a970b127c9b..0aa8a86140e9 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -39,6 +39,7 @@ ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002"
ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
+ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
test_modprobe()
{
@@ -122,7 +123,7 @@ test_reqs()
function load_req_mod()
{
- if [ ! -d $DIR ]; then
+ if [ ! -d $SYSCTL ]; then
if ! modprobe -q -n $TEST_DRIVER; then
echo "$0: module $TEST_DRIVER not found [SKIP]"
exit $ksft_skip
@@ -752,6 +753,46 @@ sysctl_test_0006()
run_bitmaptest
}
+sysctl_test_0007()
+{
+ TARGET="${SYSCTL}/boot_int"
+ if [ ! -f $TARGET ]; then
+ echo "Skipping test for $TARGET as it is not present ..."
+ return $ksft_skip
+ fi
+
+ if [ -d $DIR ]; then
+ echo "Boot param test only possible sysctl_test is built-in, not module:"
+ cat $TEST_DIR/config >&2
+ return $ksft_skip
+ fi
+
+ echo -n "Testing if $TARGET is set to 1 ..."
+ ORIG=$(cat "${TARGET}")
+
+ if [ x$ORIG = "x1" ]; then
+ echo "ok"
+ return 0
+ fi
+ echo "FAIL"
+ echo "Checking if /proc/cmdline contains setting of the expected parameter ..."
+ if [ ! -f /proc/cmdline ]; then
+ echo "/proc/cmdline does not exist, test inconclusive"
+ return 0
+ fi
+
+ FOUND=$(grep -c "sysctl[./]debug[./]test_sysctl[./]boot_int=1" /proc/cmdline)
+ if [ $FOUND = "1" ]; then
+ echo "Kernel param found but $TARGET is not 1, TEST FAILED"
+ rc=1
+ test_rc
+ fi
+
+ echo "Skipping test, expected kernel parameter missing."
+ echo "To perform this test, make sure kernel is booted with parameter: sysctl.debug.test_sysctl.boot_int=1"
+ return $ksft_skip
+}
+
list_tests()
{
echo "Test ID list:"
@@ -766,6 +807,7 @@ list_tests()
echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
+ echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
}
usage()
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7fa1e38e1659..7b0da1c28e51 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1740,7 +1740,6 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
bool *writable, kvm_pfn_t *pfn)
{
struct page *page[1];
- int npages;
/*
* Fast pin a writable pfn only if it is a write fault request
@@ -1750,8 +1749,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
if (!(write_fault || writable))
return false;
- npages = __get_user_pages_fast(addr, 1, 1, page);
- if (npages == 1) {
+ if (get_user_page_fast_only(addr, FOLL_WRITE, page)) {
*pfn = page_to_pfn(page[0]);
if (writable)
@@ -1791,7 +1789,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
if (unlikely(!write_fault) && writable) {
struct page *wpage;
- if (__get_user_pages_fast(addr, 1, 1, &wpage) == 1) {
+ if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) {
*writable = true;
put_page(page);
page = wpage;
@@ -2003,7 +2001,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
if (entry < nr_pages)
return 0;
- return __get_user_pages_fast(addr, nr_pages, 1, pages);
+ return get_user_pages_fast_only(addr, nr_pages, FOLL_WRITE, pages);
}
EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);