summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-19 11:35:31 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-19 11:35:31 +0200
commit839c4f596f898edc424070dc8b517381572f8502 (patch)
tree7d40d42955f3b65672e24b89b727421900da9b36
parenta1d1eb2f57501b2e7e2076ce89b3f3a666ddbfdd (diff)
parent22af8caff7d1ca22a1ff1a554180e53f7a6555af (diff)
Merge tag 'mm-hotfixes-stable-2024-09-19-00-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc hotfixes from Andrew Morton: "12 hotfixes, 11 of which are cc:stable. Four fixes for longstanding ocfs2 issues and the remainder address random MM things" * tag 'mm-hotfixes-stable-2024-09-19-00-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: mm/madvise: process_madvise() drop capability check if same mm mm/huge_memory: ensure huge_zero_folio won't have large_rmappable flag set mm/hugetlb.c: fix UAF of vma in hugetlb fault pathway mm: change vmf_anon_prepare() to __vmf_anon_prepare() resource: fix region_intersects() vs add_memory_driver_managed() zsmalloc: use unique zsmalloc caches names mm/damon/vaddr: protect vma traversal in __damon_va_thre_regions() with rcu read lock mm: vmscan.c: fix OOM on swap stress test ocfs2: cancel dqi_sync_work before freeing oinfo ocfs2: fix possible null-ptr-deref in ocfs2_set_buffer_uptodate ocfs2: remove unreasonable unlock in ocfs2_read_blocks ocfs2: fix null-ptr-deref when journal load failed.
-rw-r--r--fs/ocfs2/buffer_head_io.c4
-rw-r--r--fs/ocfs2/journal.c7
-rw-r--r--fs/ocfs2/quota_local.c8
-rw-r--r--kernel/resource.c58
-rw-r--r--mm/damon/vaddr.c2
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/hugetlb.c20
-rw-r--r--mm/internal.h11
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory.c8
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/zsmalloc.c23
12 files changed, 116 insertions, 31 deletions
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index cdb9b9bdea1f..8f714406528d 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -235,7 +235,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(sb, block++);
if (bhs[i] == NULL) {
- ocfs2_metadata_cache_io_unlock(ci);
status = -ENOMEM;
mlog_errno(status);
/* Don't forget to put previous bh! */
@@ -389,7 +388,8 @@ read_failure:
/* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet
* completed. */
- ocfs2_set_buffer_uptodate(ci, bh);
+ if (bh)
+ ocfs2_set_buffer_uptodate(ci, bh);
}
ocfs2_metadata_cache_io_unlock(ci);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 530fba34f6d3..1bf188b6866a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1055,7 +1055,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (!igrab(inode))
BUG();
- num_running_trans = atomic_read(&(osb->journal->j_num_trans));
+ num_running_trans = atomic_read(&(journal->j_num_trans));
trace_ocfs2_journal_shutdown(num_running_trans);
/* Do a commit_cache here. It will flush our journal, *and*
@@ -1074,9 +1074,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
osb->commit_task = NULL;
}
- BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
+ BUG_ON(atomic_read(&(journal->j_num_trans)) != 0);
- if (ocfs2_mount_local(osb)) {
+ if (ocfs2_mount_local(osb) &&
+ (journal->j_journal->j_flags & JBD2_LOADED)) {
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8ce462c64c51..73d3367c533b 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -692,7 +692,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
int status;
struct buffer_head *bh = NULL;
struct ocfs2_quota_recovery *rec;
- int locked = 0;
+ int locked = 0, global_read = 0;
info->dqi_max_spc_limit = 0x7fffffffffffffffLL;
info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
@@ -700,6 +700,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
if (!oinfo) {
mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
" info.");
+ status = -ENOMEM;
goto out_err;
}
info->dqi_priv = oinfo;
@@ -712,6 +713,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
status = ocfs2_global_read_info(sb, type);
if (status < 0)
goto out_err;
+ global_read = 1;
status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
if (status < 0) {
@@ -782,10 +784,12 @@ out_err:
if (locked)
ocfs2_inode_unlock(lqinode, 1);
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+ if (global_read)
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
kfree(oinfo);
}
brelse(bh);
- return -1;
+ return status;
}
/* Write local info to quota file */
diff --git a/kernel/resource.c b/kernel/resource.c
index a83040fde236..1681ab5012e1 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -540,20 +540,62 @@ static int __region_intersects(struct resource *parent, resource_size_t start,
size_t size, unsigned long flags,
unsigned long desc)
{
- struct resource res;
+ resource_size_t ostart, oend;
int type = 0; int other = 0;
- struct resource *p;
+ struct resource *p, *dp;
+ bool is_type, covered;
+ struct resource res;
res.start = start;
res.end = start + size - 1;
for (p = parent->child; p ; p = p->sibling) {
- bool is_type = (((p->flags & flags) == flags) &&
- ((desc == IORES_DESC_NONE) ||
- (desc == p->desc)));
-
- if (resource_overlaps(p, &res))
- is_type ? type++ : other++;
+ if (!resource_overlaps(p, &res))
+ continue;
+ is_type = (p->flags & flags) == flags &&
+ (desc == IORES_DESC_NONE || desc == p->desc);
+ if (is_type) {
+ type++;
+ continue;
+ }
+ /*
+ * Continue to search in descendant resources as if the
+ * matched descendant resources cover some ranges of 'p'.
+ *
+ * |------------- "CXL Window 0" ------------|
+ * |-- "System RAM" --|
+ *
+ * will behave similar as the following fake resource
+ * tree when searching "System RAM".
+ *
+ * |-- "System RAM" --||-- "CXL Window 0a" --|
+ */
+ covered = false;
+ ostart = max(res.start, p->start);
+ oend = min(res.end, p->end);
+ for_each_resource(p, dp, false) {
+ if (!resource_overlaps(dp, &res))
+ continue;
+ is_type = (dp->flags & flags) == flags &&
+ (desc == IORES_DESC_NONE || desc == dp->desc);
+ if (is_type) {
+ type++;
+ /*
+ * Range from 'ostart' to 'dp->start'
+ * isn't covered by matched resource.
+ */
+ if (dp->start > ostart)
+ break;
+ if (dp->end >= oend) {
+ covered = true;
+ break;
+ }
+ /* Remove covered range */
+ ostart = max(ostart, dp->end + 1);
+ }
+ }
+ if (!covered)
+ other++;
}
if (type == 0)
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 58829baf8b5d..a0036dc78a3b 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -126,6 +126,7 @@ static int __damon_va_three_regions(struct mm_struct *mm,
* If this is too slow, it can be optimised to examine the maple
* tree gaps.
*/
+ rcu_read_lock();
for_each_vma(vmi, vma) {
unsigned long gap;
@@ -146,6 +147,7 @@ static int __damon_va_three_regions(struct mm_struct *mm,
next:
prev = vma;
}
+ rcu_read_unlock();
if (!sz_range(&second_gap) || !sz_range(&first_gap))
return -EINVAL;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 67c86a5d64a6..99b146d16a18 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -220,6 +220,8 @@ retry:
count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
return false;
}
+ /* Ensure zero folio won't have large_rmappable flag set. */
+ folio_clear_large_rmappable(zero_folio);
preempt_disable();
if (cmpxchg(&huge_zero_folio, NULL, zero_folio)) {
preempt_enable();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index aaf508be0a2b..9a3a6e2dee97 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6048,7 +6048,7 @@ retry_avoidcopy:
* When the original hugepage is shared one, it does not have
* anon_vma prepared.
*/
- ret = vmf_anon_prepare(vmf);
+ ret = __vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out_release_all;
@@ -6247,7 +6247,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
}
if (!(vma->vm_flags & VM_MAYSHARE)) {
- ret = vmf_anon_prepare(vmf);
+ ret = __vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out;
}
@@ -6378,6 +6378,14 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
folio_unlock(folio);
out:
hugetlb_vma_unlock_read(vma);
+
+ /*
+ * We must check to release the per-VMA lock. __vmf_anon_prepare() is
+ * the only way ret can be set to VM_FAULT_RETRY.
+ */
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vma);
+
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
return ret;
@@ -6599,6 +6607,14 @@ out_ptl:
}
out_mutex:
hugetlb_vma_unlock_read(vma);
+
+ /*
+ * We must check to release the per-VMA lock. __vmf_anon_prepare() in
+ * hugetlb_wp() is the only way ret can be set to VM_FAULT_RETRY.
+ */
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vma);
+
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/*
* Generally it's safe to hold refcount during waiting page lock. But
diff --git a/mm/internal.h b/mm/internal.h
index b4d86436565b..a963f67d3452 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -310,7 +310,16 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat)
wake_up(wqh);
}
-vm_fault_t vmf_anon_prepare(struct vm_fault *vmf);
+vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf);
+static inline vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
+{
+ vm_fault_t ret = __vmf_anon_prepare(vmf);
+
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vmf->vma);
+ return ret;
+}
+
vm_fault_t do_swap_page(struct vm_fault *vmf);
void folio_rotate_reclaimable(struct folio *folio);
bool __folio_end_writeback(struct folio *folio);
diff --git a/mm/madvise.c b/mm/madvise.c
index 89089d84f8df..6e3a137b8e50 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1527,7 +1527,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
* Require CAP_SYS_NICE for influencing process performance. Note that
* only non-destructive hints are currently supported.
*/
- if (!capable(CAP_SYS_NICE)) {
+ if (mm != current->mm && !capable(CAP_SYS_NICE)) {
ret = -EPERM;
goto release_mm;
}
diff --git a/mm/memory.c b/mm/memory.c
index ebfc9768f801..cda2c12c500b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3276,7 +3276,7 @@ static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
}
/**
- * vmf_anon_prepare - Prepare to handle an anonymous fault.
+ * __vmf_anon_prepare - Prepare to handle an anonymous fault.
* @vmf: The vm_fault descriptor passed from the fault handler.
*
* When preparing to insert an anonymous page into a VMA from a
@@ -3290,7 +3290,7 @@ static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
* Return: 0 if fault handling can proceed. Any other value should be
* returned to the caller.
*/
-vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
+vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret = 0;
@@ -3298,10 +3298,8 @@ vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
if (likely(vma->anon_vma))
return 0;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
- if (!mmap_read_trylock(vma->vm_mm)) {
- vma_end_read(vma);
+ if (!mmap_read_trylock(vma->vm_mm))
return VM_FAULT_RETRY;
- }
}
if (__anon_vma_prepare(vma))
ret = VM_FAULT_OOM;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd489c1af228..a8d61a8b6894 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4300,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
}
/* ineligible */
- if (zone > sc->reclaim_idx) {
+ if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2d3163e4da96..b572aa84823c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -54,6 +54,7 @@
#include <linux/vmalloc.h>
#include <linux/preempt.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/shrinker.h>
#include <linux/types.h>
#include <linux/debugfs.h>
@@ -293,17 +294,27 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
static int create_cache(struct zs_pool *pool)
{
- pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
- 0, 0, NULL);
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "zs_handle-%s", pool->name);
+ if (!name)
+ return -ENOMEM;
+ pool->handle_cachep = kmem_cache_create(name, ZS_HANDLE_SIZE,
+ 0, 0, NULL);
+ kfree(name);
if (!pool->handle_cachep)
- return 1;
+ return -EINVAL;
- pool->zspage_cachep = kmem_cache_create("zspage", sizeof(struct zspage),
- 0, 0, NULL);
+ name = kasprintf(GFP_KERNEL, "zspage-%s", pool->name);
+ if (!name)
+ return -ENOMEM;
+ pool->zspage_cachep = kmem_cache_create(name, sizeof(struct zspage),
+ 0, 0, NULL);
+ kfree(name);
if (!pool->zspage_cachep) {
kmem_cache_destroy(pool->handle_cachep);
pool->handle_cachep = NULL;
- return 1;
+ return -EINVAL;
}
return 0;