summaryrefslogtreecommitdiff
path: root/mm/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r--mm/userfaultfd.c287
1 files changed, 139 insertions, 148 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 53c3d916ff66..e97a0b4889fc 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -31,11 +31,7 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
struct vm_area_struct *dst_vma;
dst_vma = find_vma(dst_mm, dst_start);
- if (!dst_vma)
- return NULL;
-
- if (dst_start < dst_vma->vm_start ||
- dst_start + len > dst_vma->vm_end)
+ if (!range_in_vma(dst_vma, dst_start, dst_start + len))
return NULL;
/*
@@ -55,12 +51,13 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
* This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
* and anon, and for both shared and private VMAs.
*/
-int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+int mfill_atomic_install_pte(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr, struct page *page,
- bool newly_allocated, bool wp_copy)
+ bool newly_allocated, uffd_flags_t flags)
{
int ret;
+ struct mm_struct *dst_mm = dst_vma->vm_mm;
pte_t _dst_pte, *dst_pte;
bool writable = dst_vma->vm_flags & VM_WRITE;
bool vm_shared = dst_vma->vm_flags & VM_SHARED;
@@ -76,7 +73,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
writable = false;
if (writable)
_dst_pte = pte_mkwrite(_dst_pte);
- if (wp_copy)
+ if (flags & MFILL_ATOMIC_WP)
_dst_pte = pte_mkuffd_wp(_dst_pte);
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
@@ -127,25 +124,25 @@ out_unlock:
return ret;
}
-static int mcopy_atomic_pte(struct mm_struct *dst_mm,
- pmd_t *dst_pmd,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- struct page **pagep,
- bool wp_copy)
+static int mfill_atomic_pte_copy(pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ uffd_flags_t flags,
+ struct folio **foliop)
{
- void *page_kaddr;
+ void *kaddr;
int ret;
- struct page *page;
+ struct folio *folio;
- if (!*pagep) {
+ if (!*foliop) {
ret = -ENOMEM;
- page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
- if (!page)
+ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma,
+ dst_addr, false);
+ if (!folio)
goto out;
- page_kaddr = kmap_local_page(page);
+ kaddr = kmap_local_folio(folio, 0);
/*
* The read mmap_lock is held here. Despite the
* mmap_lock being read recursive a deadlock is still
@@ -162,52 +159,50 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
* and retry the copy outside the mmap_lock.
*/
pagefault_disable();
- ret = copy_from_user(page_kaddr,
- (const void __user *) src_addr,
+ ret = copy_from_user(kaddr, (const void __user *) src_addr,
PAGE_SIZE);
pagefault_enable();
- kunmap_local(page_kaddr);
+ kunmap_local(kaddr);
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
ret = -ENOENT;
- *pagep = page;
+ *foliop = folio;
/* don't free the page */
goto out;
}
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
} else {
- page = *pagep;
- *pagep = NULL;
+ folio = *foliop;
+ *foliop = NULL;
}
/*
- * The memory barrier inside __SetPageUptodate makes sure that
+ * The memory barrier inside __folio_mark_uptodate makes sure that
* preceding stores to the page contents become visible before
* the set_pte_at() write.
*/
- __SetPageUptodate(page);
+ __folio_mark_uptodate(folio);
ret = -ENOMEM;
- if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL))
+ if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL))
goto out_release;
- ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
- page, true, wp_copy);
+ ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
+ &folio->page, true, flags);
if (ret)
goto out_release;
out:
return ret;
out_release:
- put_page(page);
+ folio_put(folio);
goto out;
}
-static int mfill_zeropage_pte(struct mm_struct *dst_mm,
- pmd_t *dst_pmd,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr)
+static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr)
{
pte_t _dst_pte, *dst_pte;
spinlock_t *ptl;
@@ -217,7 +212,7 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm,
_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
dst_vma->vm_page_prot));
- dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+ dst_pte = pte_offset_map_lock(dst_vma->vm_mm, dst_pmd, dst_addr, &ptl);
if (dst_vma->vm_file) {
/* the shmem MAP_PRIVATE case requires checking the i_size */
inode = dst_vma->vm_file->f_inode;
@@ -230,7 +225,7 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm,
ret = -EEXIST;
if (!pte_none(*dst_pte))
goto out_unlock;
- set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+ set_pte_at(dst_vma->vm_mm, dst_addr, dst_pte, _dst_pte);
/* No need to invalidate - it was non-present before */
update_mmu_cache(dst_vma, dst_addr, dst_pte);
ret = 0;
@@ -240,11 +235,10 @@ out_unlock:
}
/* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
-static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
- pmd_t *dst_pmd,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- bool wp_copy)
+static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ uffd_flags_t flags)
{
struct inode *inode = file_inode(dst_vma->vm_file);
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
@@ -269,8 +263,8 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
goto out_release;
}
- ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
- page, false, wp_copy);
+ ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
+ page, false, flags);
if (ret)
goto out_release;
@@ -307,23 +301,23 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
#ifdef CONFIG_HUGETLB_PAGE
/*
- * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is
+ * mfill_atomic processing for HUGETLB vmas. Note that this routine is
* called with mmap_lock held, it will release mmap_lock before returning.
*/
-static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
+static __always_inline ssize_t mfill_atomic_hugetlb(
struct vm_area_struct *dst_vma,
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
- enum mcopy_atomic_mode mode,
- bool wp_copy)
+ uffd_flags_t flags)
{
+ struct mm_struct *dst_mm = dst_vma->vm_mm;
int vm_shared = dst_vma->vm_flags & VM_SHARED;
ssize_t err;
pte_t *dst_pte;
unsigned long src_addr, dst_addr;
long copied;
- struct page *page;
+ struct folio *folio;
unsigned long vma_hpagesize;
pgoff_t idx;
u32 hash;
@@ -335,7 +329,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
* by THP. Since we can not reliably insert a zero page, this
* feature is not supported.
*/
- if (mode == MCOPY_ATOMIC_ZEROPAGE) {
+ if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) {
mmap_read_unlock(dst_mm);
return -EINVAL;
}
@@ -343,7 +337,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
src_addr = src_start;
dst_addr = dst_start;
copied = 0;
- page = NULL;
+ folio = NULL;
vma_hpagesize = vma_kernel_pagesize(dst_vma);
/*
@@ -403,7 +397,7 @@ retry:
goto out_unlock;
}
- if (mode != MCOPY_ATOMIC_CONTINUE &&
+ if (!uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE) &&
!huge_pte_none_mostly(huge_ptep_get(dst_pte))) {
err = -EEXIST;
hugetlb_vma_unlock_read(dst_vma);
@@ -411,9 +405,8 @@ retry:
goto out_unlock;
}
- err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
- dst_addr, src_addr, mode, &page,
- wp_copy);
+ err = hugetlb_mfill_atomic_pte(dst_pte, dst_vma, dst_addr,
+ src_addr, flags, &folio);
hugetlb_vma_unlock_read(dst_vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -422,12 +415,10 @@ retry:
if (unlikely(err == -ENOENT)) {
mmap_read_unlock(dst_mm);
- BUG_ON(!page);
+ BUG_ON(!folio);
- err = copy_huge_page_from_user(page,
- (const void __user *)src_addr,
- vma_hpagesize / PAGE_SIZE,
- true);
+ err = copy_folio_from_user(folio,
+ (const void __user *)src_addr, true);
if (unlikely(err)) {
err = -EFAULT;
goto out;
@@ -437,7 +428,7 @@ retry:
dst_vma = NULL;
goto retry;
} else
- BUG_ON(page);
+ BUG_ON(folio);
if (!err) {
dst_addr += vma_hpagesize;
@@ -454,8 +445,8 @@ retry:
out_unlock:
mmap_read_unlock(dst_mm);
out:
- if (page)
- put_page(page);
+ if (folio)
+ folio_put(folio);
BUG_ON(copied < 0);
BUG_ON(err > 0);
BUG_ON(!copied && !err);
@@ -463,29 +454,25 @@ out:
}
#else /* !CONFIG_HUGETLB_PAGE */
/* fail at build time if gcc attempts to use this */
-extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
- struct vm_area_struct *dst_vma,
- unsigned long dst_start,
- unsigned long src_start,
- unsigned long len,
- enum mcopy_atomic_mode mode,
- bool wp_copy);
+extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma,
+ unsigned long dst_start,
+ unsigned long src_start,
+ unsigned long len,
+ uffd_flags_t flags);
#endif /* CONFIG_HUGETLB_PAGE */
-static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
- pmd_t *dst_pmd,
+static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
- struct page **page,
- enum mcopy_atomic_mode mode,
- bool wp_copy)
+ uffd_flags_t flags,
+ struct folio **foliop)
{
ssize_t err;
- if (mode == MCOPY_ATOMIC_CONTINUE) {
- return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
- wp_copy);
+ if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) {
+ return mfill_atomic_pte_continue(dst_pmd, dst_vma,
+ dst_addr, flags);
}
/*
@@ -499,38 +486,35 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
* and not in the radix tree.
*/
if (!(dst_vma->vm_flags & VM_SHARED)) {
- if (mode == MCOPY_ATOMIC_NORMAL)
- err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
- dst_addr, src_addr, page,
- wp_copy);
+ if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY))
+ err = mfill_atomic_pte_copy(dst_pmd, dst_vma,
+ dst_addr, src_addr,
+ flags, foliop);
else
- err = mfill_zeropage_pte(dst_mm, dst_pmd,
+ err = mfill_atomic_pte_zeropage(dst_pmd,
dst_vma, dst_addr);
} else {
- err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
+ err = shmem_mfill_atomic_pte(dst_pmd, dst_vma,
dst_addr, src_addr,
- mode != MCOPY_ATOMIC_NORMAL,
- wp_copy, page);
+ flags, foliop);
}
return err;
}
-static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
- unsigned long dst_start,
- unsigned long src_start,
- unsigned long len,
- enum mcopy_atomic_mode mcopy_mode,
- atomic_t *mmap_changing,
- __u64 mode)
+static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
+ unsigned long dst_start,
+ unsigned long src_start,
+ unsigned long len,
+ atomic_t *mmap_changing,
+ uffd_flags_t flags)
{
struct vm_area_struct *dst_vma;
ssize_t err;
pmd_t *dst_pmd;
unsigned long src_addr, dst_addr;
long copied;
- struct page *page;
- bool wp_copy;
+ struct folio *folio;
/*
* Sanitize the command parameters:
@@ -545,7 +529,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
src_addr = src_start;
dst_addr = dst_start;
copied = 0;
- page = NULL;
+ folio = NULL;
retry:
mmap_read_lock(dst_mm);
@@ -580,21 +564,20 @@ retry:
* validate 'mode' now that we know the dst_vma: don't allow
* a wrprotect copy if the userfaultfd didn't register as WP.
*/
- wp_copy = mode & UFFDIO_COPY_MODE_WP;
- if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
+ if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP))
goto out_unlock;
/*
* If this is a HUGETLB vma, pass off to appropriate routine
*/
if (is_vm_hugetlb_page(dst_vma))
- return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
- src_start, len, mcopy_mode,
- wp_copy);
+ return mfill_atomic_hugetlb(dst_vma, dst_start,
+ src_start, len, flags);
if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
goto out_unlock;
- if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
+ if (!vma_is_shmem(dst_vma) &&
+ uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
goto out_unlock;
/*
@@ -641,29 +624,29 @@ retry:
BUG_ON(pmd_none(*dst_pmd));
BUG_ON(pmd_trans_huge(*dst_pmd));
- err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
- src_addr, &page, mcopy_mode, wp_copy);
+ err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
+ src_addr, flags, &folio);
cond_resched();
if (unlikely(err == -ENOENT)) {
- void *page_kaddr;
+ void *kaddr;
mmap_read_unlock(dst_mm);
- BUG_ON(!page);
+ BUG_ON(!folio);
- page_kaddr = kmap_local_page(page);
- err = copy_from_user(page_kaddr,
+ kaddr = kmap_local_folio(folio, 0);
+ err = copy_from_user(kaddr,
(const void __user *) src_addr,
PAGE_SIZE);
- kunmap_local(page_kaddr);
+ kunmap_local(kaddr);
if (unlikely(err)) {
err = -EFAULT;
goto out;
}
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
goto retry;
} else
- BUG_ON(page);
+ BUG_ON(folio);
if (!err) {
dst_addr += PAGE_SIZE;
@@ -680,43 +663,46 @@ retry:
out_unlock:
mmap_read_unlock(dst_mm);
out:
- if (page)
- put_page(page);
+ if (folio)
+ folio_put(folio);
BUG_ON(copied < 0);
BUG_ON(err > 0);
BUG_ON(!copied && !err);
return copied ? copied : err;
}
-ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
- unsigned long src_start, unsigned long len,
- atomic_t *mmap_changing, __u64 mode)
+ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start,
+ unsigned long src_start, unsigned long len,
+ atomic_t *mmap_changing, uffd_flags_t flags)
{
- return __mcopy_atomic(dst_mm, dst_start, src_start, len,
- MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
+ return mfill_atomic(dst_mm, dst_start, src_start, len, mmap_changing,
+ uffd_flags_set_mode(flags, MFILL_ATOMIC_COPY));
}
-ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
- unsigned long len, atomic_t *mmap_changing)
+ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, unsigned long start,
+ unsigned long len, atomic_t *mmap_changing)
{
- return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
- mmap_changing, 0);
+ return mfill_atomic(dst_mm, start, 0, len, mmap_changing,
+ uffd_flags_set_mode(0, MFILL_ATOMIC_ZEROPAGE));
}
-ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
- unsigned long len, atomic_t *mmap_changing)
+ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long start,
+ unsigned long len, atomic_t *mmap_changing,
+ uffd_flags_t flags)
{
- return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
- mmap_changing, 0);
+ return mfill_atomic(dst_mm, start, 0, len, mmap_changing,
+ uffd_flags_set_mode(flags, MFILL_ATOMIC_CONTINUE));
}
-long uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
+long uffd_wp_range(struct vm_area_struct *dst_vma,
unsigned long start, unsigned long len, bool enable_wp)
{
unsigned int mm_cp_flags;
struct mmu_gather tlb;
long ret;
+ VM_WARN_ONCE(start < dst_vma->vm_start || start + len > dst_vma->vm_end,
+ "The address range exceeds VMA boundary.\n");
if (enable_wp)
mm_cp_flags = MM_CP_UFFD_WP;
else
@@ -730,7 +716,7 @@ long uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
*/
if (!enable_wp && vma_wants_manual_pte_write_upgrade(dst_vma))
mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
- tlb_gather_mmu(&tlb, dst_mm);
+ tlb_gather_mmu(&tlb, dst_vma->vm_mm);
ret = change_protection(&tlb, dst_vma, start, start + len, mm_cp_flags);
tlb_finish_mmu(&tlb);
@@ -741,9 +727,12 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool enable_wp,
atomic_t *mmap_changing)
{
+ unsigned long end = start + len;
+ unsigned long _start, _end;
struct vm_area_struct *dst_vma;
unsigned long page_mask;
long err;
+ VMA_ITERATOR(vmi, dst_mm, start);
/*
* Sanitize the command parameters:
@@ -766,28 +755,30 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
goto out_unlock;
err = -ENOENT;
- dst_vma = find_dst_vma(dst_mm, start, len);
+ for_each_vma_range(vmi, dst_vma, end) {
- if (!dst_vma)
- goto out_unlock;
- if (!userfaultfd_wp(dst_vma))
- goto out_unlock;
- if (!vma_can_userfault(dst_vma, dst_vma->vm_flags))
- goto out_unlock;
+ if (!userfaultfd_wp(dst_vma)) {
+ err = -ENOENT;
+ break;
+ }
- if (is_vm_hugetlb_page(dst_vma)) {
- err = -EINVAL;
- page_mask = vma_kernel_pagesize(dst_vma) - 1;
- if ((start & page_mask) || (len & page_mask))
- goto out_unlock;
- }
+ if (is_vm_hugetlb_page(dst_vma)) {
+ err = -EINVAL;
+ page_mask = vma_kernel_pagesize(dst_vma) - 1;
+ if ((start & page_mask) || (len & page_mask))
+ break;
+ }
- err = uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
+ _start = max(dst_vma->vm_start, start);
+ _end = min(dst_vma->vm_end, end);
- /* Return 0 on success, <0 on failures */
- if (err > 0)
- err = 0;
+ err = uffd_wp_range(dst_vma, _start, _end - _start, enable_wp);
+ /* Return 0 on success, <0 on failures */
+ if (err < 0)
+ break;
+ err = 0;
+ }
out_unlock:
mmap_read_unlock(dst_mm);
return err;