diff options
-rw-r--r-- | include/linux/mm.h | 4 | ||||
-rw-r--r-- | mm/mmap.c | 94 | ||||
-rw-r--r-- | mm/mremap.c | 28 |
3 files changed, 57 insertions, 69 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 4f2c33c273eb..703ba8203da3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3177,7 +3177,7 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long pgoff, unsigned long *populate, struct list_head *uf); extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, - bool downgrade); + bool unlock); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); @@ -3185,7 +3185,7 @@ extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, #ifdef CONFIG_MMU extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, - struct list_head *uf, bool downgrade); + struct list_head *uf, bool unlock); extern int __mm_populate(unsigned long addr, unsigned long len, int ignore_errors); static inline void mm_populate(unsigned long addr, unsigned long len) diff --git a/mm/mmap.c b/mm/mmap.c index 141c618847ac..51e70fa98450 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -193,8 +193,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) struct mm_struct *mm = current->mm; struct vm_area_struct *brkvma, *next = NULL; unsigned long min_brk; - bool populate; - bool downgraded = false; + bool populate = false; LIST_HEAD(uf); struct vma_iterator vmi; @@ -236,13 +235,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto success; } - /* - * Always allow shrinking brk. - * do_vma_munmap() may downgrade mmap_lock to read. - */ + /* Always allow shrinking brk. */ if (brk <= mm->brk) { - int ret; - /* Search one past newbrk */ vma_iter_init(&vmi, mm, newbrk); brkvma = vma_find(&vmi, oldbrk); @@ -250,19 +244,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; /* mapping intersects with an existing non-brk vma. */ /* * mm->brk must be protected by write mmap_lock. - * do_vma_munmap() may downgrade the lock, so update it + * do_vma_munmap() will drop the lock on success, so update it * before calling do_vma_munmap(). */ mm->brk = brk; - ret = do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true); - if (ret == 1) { - downgraded = true; - goto success; - } else if (!ret) - goto success; - - mm->brk = origbrk; - goto out; + if (do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true)) + goto out; + + goto success_unlocked; } if (check_brk_limits(oldbrk, newbrk - oldbrk)) @@ -283,19 +272,19 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; mm->brk = brk; + if (mm->def_flags & VM_LOCKED) + populate = true; success: - populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; - if (downgraded) - mmap_read_unlock(mm); - else - mmap_write_unlock(mm); + mmap_write_unlock(mm); +success_unlocked: userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(oldbrk, newbrk - oldbrk); return brk; out: + mm->brk = origbrk; mmap_write_unlock(mm); return origbrk; } @@ -2428,14 +2417,16 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, * @start: The aligned start address to munmap. * @end: The aligned end address to munmap. * @uf: The userfaultfd list_head - * @downgrade: Set to true to attempt a write downgrade of the mmap_lock + * @unlock: Set to true to drop the mmap_lock. unlocking only happens on + * success. * - * If @downgrade is true, check return code for potential release of the lock. + * Return: 0 on success and drops the lock if so directed, error and leaves the + * lock held otherwise. */ static int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, - unsigned long end, struct list_head *uf, bool downgrade) + unsigned long end, struct list_head *uf, bool unlock) { struct vm_area_struct *prev, *next = NULL; struct maple_tree mt_detach; @@ -2551,22 +2542,24 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, /* Point of no return */ mm->locked_vm -= locked_vm; mm->map_count -= count; - if (downgrade) + if (unlock) mmap_write_downgrade(mm); /* * We can free page tables without write-locking mmap_lock because VMAs * were isolated before we downgraded mmap_lock. */ - unmap_region(mm, &mt_detach, vma, prev, next, start, end, !downgrade); + unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock); /* Statistics and freeing VMAs */ mas_set(&mas_detach, start); remove_mt(mm, &mas_detach); __mt_destroy(&mt_detach); + if (unlock) + mmap_read_unlock(mm); validate_mm(mm); - return downgrade ? 1 : 0; + return 0; clear_tree_failed: userfaultfd_error: @@ -2589,18 +2582,18 @@ map_count_exceeded: * @start: The start address to munmap * @len: The length of the range to munmap * @uf: The userfaultfd list_head - * @downgrade: set to true if the user wants to attempt to write_downgrade the - * mmap_lock + * @unlock: set to true if the user wants to drop the mmap_lock on success * * This function takes a @mas that is either pointing to the previous VMA or set * to MA_START and sets it up to remove the mapping(s). The @len will be * aligned and any arch_unmap work will be preformed. * - * Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise. + * Return: 0 on success and drops the lock if so directed, error and leaves the + * lock held otherwise. */ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, - bool downgrade) + bool unlock) { unsigned long end; struct vm_area_struct *vma; @@ -2617,10 +2610,13 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, /* Find the first overlapping VMA */ vma = vma_find(vmi, end); - if (!vma) + if (!vma) { + if (unlock) + mmap_write_unlock(mm); return 0; + } - return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade); + return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock); } /* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls. @@ -2628,6 +2624,8 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, * @start: The start address to munmap * @len: The length to be munmapped. * @uf: The userfaultfd list_head + * + * Return: 0 on success, error otherwise. */ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) @@ -2888,7 +2886,7 @@ unacct_error: return error; } -static int __vm_munmap(unsigned long start, size_t len, bool downgrade) +static int __vm_munmap(unsigned long start, size_t len, bool unlock) { int ret; struct mm_struct *mm = current->mm; @@ -2898,16 +2896,8 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade) if (mmap_write_lock_killable(mm)) return -EINTR; - ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade); - /* - * Returning 1 indicates mmap_lock is downgraded. - * But 1 is not legal return value of vm_munmap() and munmap(), reset - * it to 0 before return. - */ - if (ret == 1) { - mmap_read_unlock(mm); - ret = 0; - } else + ret = do_vmi_munmap(&vmi, mm, start, len, &uf, unlock); + if (ret || !unlock) mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); @@ -3017,21 +3007,23 @@ out: * @start: the start of the address to unmap * @end: The end of the address to unmap * @uf: The userfaultfd list_head - * @downgrade: Attempt to downgrade or not + * @unlock: Drop the lock on success * - * Returns: 0 on success and not downgraded, 1 on success and downgraded. * unmaps a VMA mapping when the vma iterator is already in position. * Does not handle alignment. + * + * Return: 0 on success drops the lock of so directed, error on failure and will + * still hold the lock. */ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct list_head *uf, bool downgrade) + unsigned long start, unsigned long end, struct list_head *uf, + bool unlock) { struct mm_struct *mm = vma->vm_mm; int ret; arch_unmap(mm, start, end); - ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade); + ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock); validate_mm(mm); return ret; } diff --git a/mm/mremap.c b/mm/mremap.c index fe6b722ae633..11e06e4ab33b 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -715,7 +715,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, } vma_iter_init(&vmi, mm, old_addr); - if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) { + if (!do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false)) { /* OOM: unable to split vma, just get accounts right */ if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) vm_acct_memory(old_len >> PAGE_SHIFT); @@ -913,7 +913,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, struct vm_area_struct *vma; unsigned long ret = -EINVAL; bool locked = false; - bool downgraded = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; LIST_HEAD(uf_unmap_early); LIST_HEAD(uf_unmap); @@ -999,24 +998,23 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, * Always allow a shrinking remap: that just unmaps * the unnecessary pages.. * do_vmi_munmap does all the needed commit accounting, and - * downgrades mmap_lock to read if so directed. + * unlocks the mmap_lock if so directed. */ if (old_len >= new_len) { - int retval; VMA_ITERATOR(vmi, mm, addr + new_len); - retval = do_vmi_munmap(&vmi, mm, addr + new_len, - old_len - new_len, &uf_unmap, true); - /* Returning 1 indicates mmap_lock is downgraded to read. */ - if (retval == 1) { - downgraded = true; - } else if (retval < 0 && old_len != new_len) { - ret = retval; + if (old_len == new_len) { + ret = addr; goto out; } + ret = do_vmi_munmap(&vmi, mm, addr + new_len, old_len - new_len, + &uf_unmap, true); + if (ret) + goto out; + ret = addr; - goto out; + goto out_unlocked; } /* @@ -1101,12 +1099,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, out: if (offset_in_page(ret)) locked = false; - if (downgraded) - mmap_read_unlock(current->mm); - else - mmap_write_unlock(current->mm); + mmap_write_unlock(current->mm); if (locked && new_len > old_len) mm_populate(new_addr + old_len, new_len - old_len); +out_unlocked: userfaultfd_unmap_complete(mm, &uf_unmap_early); mremap_userfaultfd_complete(&uf, addr, ret, old_len); userfaultfd_unmap_complete(mm, &uf_unmap); |