summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 17:53:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 17:53:04 -0700
commit27bc50fc90647bbf7b734c3fc306a5e61350da53 (patch)
tree75fc525fbfec8c07a97a7875a89592317bcad4ca /fs/proc
parent70442fc54e6889a2a77f0e9554e8188a1557f00e (diff)
parentbbff39cc6cbcb86ccfacb2dcafc79912a9f9df69 (diff)
Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Yu Zhao's Multi-Gen LRU patches are here. They've been under test in linux-next for a couple of months without, to my knowledge, any negative reports (or any positive ones, come to that). - Also the Maple Tree from Liam Howlett. An overlapping range-based tree for vmas. It it apparently slightly more efficient in its own right, but is mainly targeted at enabling work to reduce mmap_lock contention. Liam has identified a number of other tree users in the kernel which could be beneficially onverted to mapletrees. Yu Zhao has identified a hard-to-hit but "easy to fix" lockdep splat at [1]. This has yet to be addressed due to Liam's unfortunately timed vacation. He is now back and we'll get this fixed up. - Dmitry Vyukov introduces KMSAN: the Kernel Memory Sanitizer. It uses clang-generated instrumentation to detect used-unintialized bugs down to the single bit level. KMSAN keeps finding bugs. New ones, as well as the legacy ones. - Yang Shi adds a userspace mechanism (madvise) to induce a collapse of memory into THPs. - Zach O'Keefe has expanded Yang Shi's madvise(MADV_COLLAPSE) to support file/shmem-backed pages. - userfaultfd updates from Axel Rasmussen - zsmalloc cleanups from Alexey Romanov - cleanups from Miaohe Lin: vmscan, hugetlb_cgroup, hugetlb and memory-failure - Huang Ying adds enhancements to NUMA balancing memory tiering mode's page promotion, with a new way of detecting hot pages. - memcg updates from Shakeel Butt: charging optimizations and reduced memory consumption. - memcg cleanups from Kairui Song. - memcg fixes and cleanups from Johannes Weiner. - Vishal Moola provides more folio conversions - Zhang Yi removed ll_rw_block() :( - migration enhancements from Peter Xu - migration error-path bugfixes from Huang Ying - Aneesh Kumar added ability for a device driver to alter the memory tiering promotion paths. For optimizations by PMEM drivers, DRM drivers, etc. - vma merging improvements from Jakub Matěn. - NUMA hinting cleanups from David Hildenbrand. - xu xin added aditional userspace visibility into KSM merging activity. - THP & KSM code consolidation from Qi Zheng. - more folio work from Matthew Wilcox. - KASAN updates from Andrey Konovalov. - DAMON cleanups from Kaixu Xia. - DAMON work from SeongJae Park: fixes, cleanups. - hugetlb sysfs cleanups from Muchun Song. - Mike Kravetz fixes locking issues in hugetlbfs and in hugetlb core. Link: https://lkml.kernel.org/r/CAOUHufZabH85CeUN-MEMgL8gJGzJEWUrkiM58JkTbBhh-jew0Q@mail.gmail.com [1] * tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (555 commits) hugetlb: allocate vma lock for all sharable vmas hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer hugetlb: fix vma lock handling during split vma and range unmapping mglru: mm/vmscan.c: fix imprecise comments mm/mglru: don't sync disk for each aging cycle mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol mm: memcontrol: use do_memsw_account() in a few more places mm: memcontrol: deprecate swapaccounting=0 mode mm: memcontrol: don't allocate cgroup swap arrays when memcg is disabled mm/secretmem: remove reduntant return value mm/hugetlb: add available_huge_pages() func mm: remove unused inline functions from include/linux/mm_inline.h selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd selftests/vm: add thp collapse shmem testing selftests/vm: add thp collapse file and tmpfs testing selftests/vm: modularize thp collapse memory operations selftests/vm: dedup THP helpers mm/khugepaged: add tracepoint to hpage_collapse_scan_file() mm/madvise: add file and shmem support to MADV_COLLAPSE ...
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/base.c20
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/task_mmu.c96
-rw-r--r--fs/proc/task_nommu.c45
4 files changed, 100 insertions, 63 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2d9429bf51fa..9e479d7d202b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2350,6 +2350,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
GENRADIX(struct map_files_info) fa;
struct map_files_info *p;
int ret;
+ struct vma_iterator vmi;
genradix_init(&fa);
@@ -2388,7 +2389,9 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
* routine might require mmap_lock taken in might_fault().
*/
- for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
+ pos = 2;
+ vma_iter_init(&vmi, mm, 0);
+ for_each_vma(vmi, vma) {
if (!vma->vm_file)
continue;
if (++pos <= ctx->pos)
@@ -3196,6 +3199,19 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *
return 0;
}
+static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ struct mm_struct *mm;
+
+ mm = get_task_mm(task);
+ if (mm) {
+ seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items);
+ mmput(mm);
+ }
+
+ return 0;
+}
#endif /* CONFIG_KSM */
#ifdef CONFIG_STACKLEAK_METRICS
@@ -3331,6 +3347,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
#ifdef CONFIG_KSM
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
+ ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat),
#endif
};
@@ -3668,6 +3685,7 @@ static const struct pid_entry tid_base_stuff[] = {
#endif
#ifdef CONFIG_KSM
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
+ ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat),
#endif
};
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 06a80f78433d..f03000764ce5 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -285,7 +285,7 @@ struct proc_maps_private {
struct task_struct *task;
struct mm_struct *mm;
#ifdef CONFIG_MMU
- struct vm_area_struct *tail_vma;
+ struct vma_iterator iter;
#endif
#ifdef CONFIG_NUMA
struct mempolicy *task_mempolicy;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4e0023643f8b..8b4f3073f8f5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/pagewalk.h>
-#include <linux/vmacache.h>
#include <linux/mm_inline.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
@@ -124,12 +123,26 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
}
#endif
+static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
+ loff_t *ppos)
+{
+ struct vm_area_struct *vma = vma_next(&priv->iter);
+
+ if (vma) {
+ *ppos = vma->vm_start;
+ } else {
+ *ppos = -2UL;
+ vma = get_gate_vma(priv->mm);
+ }
+
+ return vma;
+}
+
static void *m_start(struct seq_file *m, loff_t *ppos)
{
struct proc_maps_private *priv = m->private;
unsigned long last_addr = *ppos;
struct mm_struct *mm;
- struct vm_area_struct *vma;
/* See m_next(). Zero at the start or after lseek. */
if (last_addr == -1UL)
@@ -153,31 +166,21 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return ERR_PTR(-EINTR);
}
+ vma_iter_init(&priv->iter, mm, last_addr);
hold_task_mempolicy(priv);
- priv->tail_vma = get_gate_vma(mm);
-
- vma = find_vma(mm, last_addr);
- if (vma)
- return vma;
+ if (last_addr == -2UL)
+ return get_gate_vma(mm);
- return priv->tail_vma;
+ return proc_get_vma(priv, ppos);
}
static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *next, *vma = v;
-
- if (vma == priv->tail_vma)
- next = NULL;
- else if (vma->vm_next)
- next = vma->vm_next;
- else
- next = priv->tail_vma;
-
- *ppos = next ? next->vm_start : -1UL;
-
- return next;
+ if (*ppos == -2UL) {
+ *ppos = -1UL;
+ return NULL;
+ }
+ return proc_get_vma(m->private, ppos);
}
static void m_stop(struct seq_file *m, void *v)
@@ -864,7 +867,7 @@ static int show_smap(struct seq_file *m, void *v)
__show_smap(m, &mss, false);
seq_printf(m, "THPeligible: %d\n",
- hugepage_vma_check(vma, vma->vm_flags, true, false));
+ hugepage_vma_check(vma, vma->vm_flags, true, false, true));
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
@@ -877,16 +880,16 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
struct mem_size_stats mss;
- struct mm_struct *mm;
+ struct mm_struct *mm = priv->mm;
struct vm_area_struct *vma;
- unsigned long last_vma_end = 0;
+ unsigned long vma_start = 0, last_vma_end = 0;
int ret = 0;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
priv->task = get_proc_task(priv->inode);
if (!priv->task)
return -ESRCH;
- mm = priv->mm;
if (!mm || !mmget_not_zero(mm)) {
ret = -ESRCH;
goto out_put_task;
@@ -899,8 +902,13 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
goto out_put_mm;
hold_task_mempolicy(priv);
+ vma = mas_find(&mas, 0);
+
+ if (unlikely(!vma))
+ goto empty_set;
- for (vma = priv->mm->mmap; vma;) {
+ vma_start = vma->vm_start;
+ do {
smap_gather_stats(vma, &mss, 0);
last_vma_end = vma->vm_end;
@@ -909,6 +917,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
* access it for write request.
*/
if (mmap_lock_is_contended(mm)) {
+ mas_pause(&mas);
mmap_read_unlock(mm);
ret = mmap_read_lock_killable(mm);
if (ret) {
@@ -952,7 +961,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
* contains last_vma_end.
* Iterate VMA' from last_vma_end.
*/
- vma = find_vma(mm, last_vma_end - 1);
+ vma = mas_find(&mas, ULONG_MAX);
/* Case 3 above */
if (!vma)
break;
@@ -966,11 +975,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
smap_gather_stats(vma, &mss, last_vma_end);
}
/* Case 2 above */
- vma = vma->vm_next;
- }
+ } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
- show_vma_header_prefix(m, priv->mm->mmap->vm_start,
- last_vma_end, 0, 0, 0, 0);
+empty_set:
+ show_vma_header_prefix(m, vma_start, last_vma_end, 0, 0, 0, 0);
seq_pad(m, ' ');
seq_puts(m, "[rollup]\n");
@@ -1263,6 +1271,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
struct mmu_notifier_range range;
struct clear_refs_private cp = {
.type = type,
@@ -1282,7 +1291,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
}
if (type == CLEAR_REFS_SOFT_DIRTY) {
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ mas_for_each(&mas, vma, ULONG_MAX) {
if (!(vma->vm_flags & VM_SOFTDIRTY))
continue;
vma->vm_flags &= ~VM_SOFTDIRTY;
@@ -1294,8 +1303,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
0, NULL, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
- &cp);
+ walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
if (type == CLEAR_REFS_SOFT_DIRTY) {
mmu_notifier_invalidate_range_end(&range);
flush_tlb_mm(mm);
@@ -1418,9 +1426,19 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
entry = pte_to_swp_entry(pte);
- if (pm->show_pfn)
+ if (pm->show_pfn) {
+ pgoff_t offset;
+ /*
+ * For PFN swap offsets, keeping the offset field
+ * to be PFN only to be compatible with old smaps.
+ */
+ if (is_pfn_swap_entry(entry))
+ offset = swp_offset_pfn(entry);
+ else
+ offset = swp_offset(entry);
frame = swp_type(entry) |
- (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ (offset << MAX_SWAPFILES_SHIFT);
+ }
flags |= PM_SWAP;
migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry))
@@ -1477,7 +1495,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
unsigned long offset;
if (pm->show_pfn) {
- offset = swp_offset(entry) +
+ if (is_pfn_swap_entry(entry))
+ offset = swp_offset_pfn(entry);
+ else
+ offset = swp_offset(entry);
+ offset = offset +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
frame = swp_type(entry) |
(offset << MAX_SWAPFILES_SHIFT);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index a6d21fc0033c..2fd06f52b6a4 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -20,15 +20,13 @@
*/
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
struct vm_region *region;
- struct rb_node *p;
unsigned long bytes = 0, sbytes = 0, slack = 0, size;
-
- mmap_read_lock(mm);
- for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
- vma = rb_entry(p, struct vm_area_struct, vm_rb);
+ mmap_read_lock(mm);
+ for_each_vma(vmi, vma) {
bytes += kobjsize(vma);
region = vma->vm_region;
@@ -82,15 +80,13 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long task_vsize(struct mm_struct *mm)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
- struct rb_node *p;
unsigned long vsize = 0;
mmap_read_lock(mm);
- for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
- vma = rb_entry(p, struct vm_area_struct, vm_rb);
+ for_each_vma(vmi, vma)
vsize += vma->vm_end - vma->vm_start;
- }
mmap_read_unlock(mm);
return vsize;
}
@@ -99,14 +95,13 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
struct vm_region *region;
- struct rb_node *p;
unsigned long size = kobjsize(mm);
mmap_read_lock(mm);
- for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
- vma = rb_entry(p, struct vm_area_struct, vm_rb);
+ for_each_vma(vmi, vma) {
size += kobjsize(vma);
region = vma->vm_region;
if (region) {
@@ -190,17 +185,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
*/
static int show_map(struct seq_file *m, void *_p)
{
- struct rb_node *p = _p;
-
- return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
+ return nommu_vma_show(m, _p);
}
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
struct mm_struct *mm;
- struct rb_node *p;
- loff_t n = *pos;
+ struct vm_area_struct *vma;
+ unsigned long addr = *pos;
+
+ /* See m_next(). Zero at the start or after lseek. */
+ if (addr == -1UL)
+ return NULL;
/* pin the task and mm whilst we play with them */
priv->task = get_proc_task(priv->inode);
@@ -216,10 +213,10 @@ static void *m_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-EINTR);
}
- /* start from the Nth VMA */
- for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
- if (n-- == 0)
- return p;
+ /* start the next element from addr */
+ vma = find_vma(mm, addr);
+ if (vma)
+ return vma;
mmap_read_unlock(mm);
mmput(mm);
@@ -242,10 +239,10 @@ static void m_stop(struct seq_file *m, void *_vml)
static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
{
- struct rb_node *p = _p;
+ struct vm_area_struct *vma = _p;
- (*pos)++;
- return p ? rb_next(p) : NULL;
+ *pos = vma->vm_end;
+ return find_vma(vma->vm_mm, vma->vm_end);
}
static const struct seq_operations proc_pid_maps_ops = {