diff options
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r-- | mm/userfaultfd.c | 287 |
1 files changed, 139 insertions, 148 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 53c3d916ff66..e97a0b4889fc 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -31,11 +31,7 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma; dst_vma = find_vma(dst_mm, dst_start); - if (!dst_vma) - return NULL; - - if (dst_start < dst_vma->vm_start || - dst_start + len > dst_vma->vm_end) + if (!range_in_vma(dst_vma, dst_start, dst_start + len)) return NULL; /* @@ -55,12 +51,13 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm, * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem * and anon, and for both shared and private VMAs. */ -int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, +int mfill_atomic_install_pte(pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, struct page *page, - bool newly_allocated, bool wp_copy) + bool newly_allocated, uffd_flags_t flags) { int ret; + struct mm_struct *dst_mm = dst_vma->vm_mm; pte_t _dst_pte, *dst_pte; bool writable = dst_vma->vm_flags & VM_WRITE; bool vm_shared = dst_vma->vm_flags & VM_SHARED; @@ -76,7 +73,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, writable = false; if (writable) _dst_pte = pte_mkwrite(_dst_pte); - if (wp_copy) + if (flags & MFILL_ATOMIC_WP) _dst_pte = pte_mkuffd_wp(_dst_pte); dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); @@ -127,25 +124,25 @@ out_unlock: return ret; } -static int mcopy_atomic_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, - unsigned long src_addr, - struct page **pagep, - bool wp_copy) +static int mfill_atomic_pte_copy(pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + uffd_flags_t flags, + struct folio **foliop) { - void *page_kaddr; + void *kaddr; int ret; - struct page *page; + struct folio *folio; - if (!*pagep) { + if (!*foliop) { ret = -ENOMEM; - page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr); - if (!page) + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma, + dst_addr, false); + if (!folio) goto out; - page_kaddr = kmap_local_page(page); + kaddr = kmap_local_folio(folio, 0); /* * The read mmap_lock is held here. Despite the * mmap_lock being read recursive a deadlock is still @@ -162,52 +159,50 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, * and retry the copy outside the mmap_lock. */ pagefault_disable(); - ret = copy_from_user(page_kaddr, - (const void __user *) src_addr, + ret = copy_from_user(kaddr, (const void __user *) src_addr, PAGE_SIZE); pagefault_enable(); - kunmap_local(page_kaddr); + kunmap_local(kaddr); /* fallback to copy_from_user outside mmap_lock */ if (unlikely(ret)) { ret = -ENOENT; - *pagep = page; + *foliop = folio; /* don't free the page */ goto out; } - flush_dcache_page(page); + flush_dcache_folio(folio); } else { - page = *pagep; - *pagep = NULL; + folio = *foliop; + *foliop = NULL; } /* - * The memory barrier inside __SetPageUptodate makes sure that + * The memory barrier inside __folio_mark_uptodate makes sure that * preceding stores to the page contents become visible before * the set_pte_at() write. */ - __SetPageUptodate(page); + __folio_mark_uptodate(folio); ret = -ENOMEM; - if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL)) + if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL)) goto out_release; - ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - page, true, wp_copy); + ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, + &folio->page, true, flags); if (ret) goto out_release; out: return ret; out_release: - put_page(page); + folio_put(folio); goto out; } -static int mfill_zeropage_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr) +static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr) { pte_t _dst_pte, *dst_pte; spinlock_t *ptl; @@ -217,7 +212,7 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm, _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); - dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + dst_pte = pte_offset_map_lock(dst_vma->vm_mm, dst_pmd, dst_addr, &ptl); if (dst_vma->vm_file) { /* the shmem MAP_PRIVATE case requires checking the i_size */ inode = dst_vma->vm_file->f_inode; @@ -230,7 +225,7 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm, ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_unlock; - set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + set_pte_at(dst_vma->vm_mm, dst_addr, dst_pte, _dst_pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); ret = 0; @@ -240,11 +235,10 @@ out_unlock: } /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */ -static int mcontinue_atomic_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, - bool wp_copy) +static int mfill_atomic_pte_continue(pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + uffd_flags_t flags) { struct inode *inode = file_inode(dst_vma->vm_file); pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); @@ -269,8 +263,8 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, goto out_release; } - ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - page, false, wp_copy); + ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, + page, false, flags); if (ret) goto out_release; @@ -307,23 +301,23 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) #ifdef CONFIG_HUGETLB_PAGE /* - * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is + * mfill_atomic processing for HUGETLB vmas. Note that this routine is * called with mmap_lock held, it will release mmap_lock before returning. */ -static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, +static __always_inline ssize_t mfill_atomic_hugetlb( struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long src_start, unsigned long len, - enum mcopy_atomic_mode mode, - bool wp_copy) + uffd_flags_t flags) { + struct mm_struct *dst_mm = dst_vma->vm_mm; int vm_shared = dst_vma->vm_flags & VM_SHARED; ssize_t err; pte_t *dst_pte; unsigned long src_addr, dst_addr; long copied; - struct page *page; + struct folio *folio; unsigned long vma_hpagesize; pgoff_t idx; u32 hash; @@ -335,7 +329,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, * by THP. Since we can not reliably insert a zero page, this * feature is not supported. */ - if (mode == MCOPY_ATOMIC_ZEROPAGE) { + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) { mmap_read_unlock(dst_mm); return -EINVAL; } @@ -343,7 +337,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, src_addr = src_start; dst_addr = dst_start; copied = 0; - page = NULL; + folio = NULL; vma_hpagesize = vma_kernel_pagesize(dst_vma); /* @@ -403,7 +397,7 @@ retry: goto out_unlock; } - if (mode != MCOPY_ATOMIC_CONTINUE && + if (!uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE) && !huge_pte_none_mostly(huge_ptep_get(dst_pte))) { err = -EEXIST; hugetlb_vma_unlock_read(dst_vma); @@ -411,9 +405,8 @@ retry: goto out_unlock; } - err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, - dst_addr, src_addr, mode, &page, - wp_copy); + err = hugetlb_mfill_atomic_pte(dst_pte, dst_vma, dst_addr, + src_addr, flags, &folio); hugetlb_vma_unlock_read(dst_vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -422,12 +415,10 @@ retry: if (unlikely(err == -ENOENT)) { mmap_read_unlock(dst_mm); - BUG_ON(!page); + BUG_ON(!folio); - err = copy_huge_page_from_user(page, - (const void __user *)src_addr, - vma_hpagesize / PAGE_SIZE, - true); + err = copy_folio_from_user(folio, + (const void __user *)src_addr, true); if (unlikely(err)) { err = -EFAULT; goto out; @@ -437,7 +428,7 @@ retry: dst_vma = NULL; goto retry; } else - BUG_ON(page); + BUG_ON(folio); if (!err) { dst_addr += vma_hpagesize; @@ -454,8 +445,8 @@ retry: out_unlock: mmap_read_unlock(dst_mm); out: - if (page) - put_page(page); + if (folio) + folio_put(folio); BUG_ON(copied < 0); BUG_ON(err > 0); BUG_ON(!copied && !err); @@ -463,29 +454,25 @@ out: } #else /* !CONFIG_HUGETLB_PAGE */ /* fail at build time if gcc attempts to use this */ -extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, - struct vm_area_struct *dst_vma, - unsigned long dst_start, - unsigned long src_start, - unsigned long len, - enum mcopy_atomic_mode mode, - bool wp_copy); +extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma, + unsigned long dst_start, + unsigned long src_start, + unsigned long len, + uffd_flags_t flags); #endif /* CONFIG_HUGETLB_PAGE */ -static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, +static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, - struct page **page, - enum mcopy_atomic_mode mode, - bool wp_copy) + uffd_flags_t flags, + struct folio **foliop) { ssize_t err; - if (mode == MCOPY_ATOMIC_CONTINUE) { - return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - wp_copy); + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) { + return mfill_atomic_pte_continue(dst_pmd, dst_vma, + dst_addr, flags); } /* @@ -499,38 +486,35 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, * and not in the radix tree. */ if (!(dst_vma->vm_flags & VM_SHARED)) { - if (mode == MCOPY_ATOMIC_NORMAL) - err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, - dst_addr, src_addr, page, - wp_copy); + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) + err = mfill_atomic_pte_copy(dst_pmd, dst_vma, + dst_addr, src_addr, + flags, foliop); else - err = mfill_zeropage_pte(dst_mm, dst_pmd, + err = mfill_atomic_pte_zeropage(dst_pmd, dst_vma, dst_addr); } else { - err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, + err = shmem_mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, src_addr, - mode != MCOPY_ATOMIC_NORMAL, - wp_copy, page); + flags, foliop); } return err; } -static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, - unsigned long dst_start, - unsigned long src_start, - unsigned long len, - enum mcopy_atomic_mode mcopy_mode, - atomic_t *mmap_changing, - __u64 mode) +static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm, + unsigned long dst_start, + unsigned long src_start, + unsigned long len, + atomic_t *mmap_changing, + uffd_flags_t flags) { struct vm_area_struct *dst_vma; ssize_t err; pmd_t *dst_pmd; unsigned long src_addr, dst_addr; long copied; - struct page *page; - bool wp_copy; + struct folio *folio; /* * Sanitize the command parameters: @@ -545,7 +529,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, src_addr = src_start; dst_addr = dst_start; copied = 0; - page = NULL; + folio = NULL; retry: mmap_read_lock(dst_mm); @@ -580,21 +564,20 @@ retry: * validate 'mode' now that we know the dst_vma: don't allow * a wrprotect copy if the userfaultfd didn't register as WP. */ - wp_copy = mode & UFFDIO_COPY_MODE_WP; - if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP)) + if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP)) goto out_unlock; /* * If this is a HUGETLB vma, pass off to appropriate routine */ if (is_vm_hugetlb_page(dst_vma)) - return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, - src_start, len, mcopy_mode, - wp_copy); + return mfill_atomic_hugetlb(dst_vma, dst_start, + src_start, len, flags); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; - if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE) + if (!vma_is_shmem(dst_vma) && + uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) goto out_unlock; /* @@ -641,29 +624,29 @@ retry: BUG_ON(pmd_none(*dst_pmd)); BUG_ON(pmd_trans_huge(*dst_pmd)); - err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - src_addr, &page, mcopy_mode, wp_copy); + err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, + src_addr, flags, &folio); cond_resched(); if (unlikely(err == -ENOENT)) { - void *page_kaddr; + void *kaddr; mmap_read_unlock(dst_mm); - BUG_ON(!page); + BUG_ON(!folio); - page_kaddr = kmap_local_page(page); - err = copy_from_user(page_kaddr, + kaddr = kmap_local_folio(folio, 0); + err = copy_from_user(kaddr, (const void __user *) src_addr, PAGE_SIZE); - kunmap_local(page_kaddr); + kunmap_local(kaddr); if (unlikely(err)) { err = -EFAULT; goto out; } - flush_dcache_page(page); + flush_dcache_folio(folio); goto retry; } else - BUG_ON(page); + BUG_ON(folio); if (!err) { dst_addr += PAGE_SIZE; @@ -680,43 +663,46 @@ retry: out_unlock: mmap_read_unlock(dst_mm); out: - if (page) - put_page(page); + if (folio) + folio_put(folio); BUG_ON(copied < 0); BUG_ON(err > 0); BUG_ON(!copied && !err); return copied ? copied : err; } -ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, - unsigned long src_start, unsigned long len, - atomic_t *mmap_changing, __u64 mode) +ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start, + unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags) { - return __mcopy_atomic(dst_mm, dst_start, src_start, len, - MCOPY_ATOMIC_NORMAL, mmap_changing, mode); + return mfill_atomic(dst_mm, dst_start, src_start, len, mmap_changing, + uffd_flags_set_mode(flags, MFILL_ATOMIC_COPY)); } -ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start, - unsigned long len, atomic_t *mmap_changing) +ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, unsigned long start, + unsigned long len, atomic_t *mmap_changing) { - return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE, - mmap_changing, 0); + return mfill_atomic(dst_mm, start, 0, len, mmap_changing, + uffd_flags_set_mode(0, MFILL_ATOMIC_ZEROPAGE)); } -ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, - unsigned long len, atomic_t *mmap_changing) +ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long start, + unsigned long len, atomic_t *mmap_changing, + uffd_flags_t flags) { - return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE, - mmap_changing, 0); + return mfill_atomic(dst_mm, start, 0, len, mmap_changing, + uffd_flags_set_mode(flags, MFILL_ATOMIC_CONTINUE)); } -long uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, +long uffd_wp_range(struct vm_area_struct *dst_vma, unsigned long start, unsigned long len, bool enable_wp) { unsigned int mm_cp_flags; struct mmu_gather tlb; long ret; + VM_WARN_ONCE(start < dst_vma->vm_start || start + len > dst_vma->vm_end, + "The address range exceeds VMA boundary.\n"); if (enable_wp) mm_cp_flags = MM_CP_UFFD_WP; else @@ -730,7 +716,7 @@ long uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, */ if (!enable_wp && vma_wants_manual_pte_write_upgrade(dst_vma)) mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; - tlb_gather_mmu(&tlb, dst_mm); + tlb_gather_mmu(&tlb, dst_vma->vm_mm); ret = change_protection(&tlb, dst_vma, start, start + len, mm_cp_flags); tlb_finish_mmu(&tlb); @@ -741,9 +727,12 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing) { + unsigned long end = start + len; + unsigned long _start, _end; struct vm_area_struct *dst_vma; unsigned long page_mask; long err; + VMA_ITERATOR(vmi, dst_mm, start); /* * Sanitize the command parameters: @@ -766,28 +755,30 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, goto out_unlock; err = -ENOENT; - dst_vma = find_dst_vma(dst_mm, start, len); + for_each_vma_range(vmi, dst_vma, end) { - if (!dst_vma) - goto out_unlock; - if (!userfaultfd_wp(dst_vma)) - goto out_unlock; - if (!vma_can_userfault(dst_vma, dst_vma->vm_flags)) - goto out_unlock; + if (!userfaultfd_wp(dst_vma)) { + err = -ENOENT; + break; + } - if (is_vm_hugetlb_page(dst_vma)) { - err = -EINVAL; - page_mask = vma_kernel_pagesize(dst_vma) - 1; - if ((start & page_mask) || (len & page_mask)) - goto out_unlock; - } + if (is_vm_hugetlb_page(dst_vma)) { + err = -EINVAL; + page_mask = vma_kernel_pagesize(dst_vma) - 1; + if ((start & page_mask) || (len & page_mask)) + break; + } - err = uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); + _start = max(dst_vma->vm_start, start); + _end = min(dst_vma->vm_end, end); - /* Return 0 on success, <0 on failures */ - if (err > 0) - err = 0; + err = uffd_wp_range(dst_vma, _start, _end - _start, enable_wp); + /* Return 0 on success, <0 on failures */ + if (err < 0) + break; + err = 0; + } out_unlock: mmap_read_unlock(dst_mm); return err; |