diff options
Diffstat (limited to 'mm/mprotect.c')
-rw-r--r-- | mm/mprotect.c | 161 |
1 files changed, 97 insertions, 64 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c index 61cf60015a8b..1d4843c97c2a 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -80,13 +80,13 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, return pte_dirty(pte); } -static unsigned long change_pte_range(struct mmu_gather *tlb, +static long change_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { pte_t *pte, oldpte; spinlock_t *ptl; - unsigned long pages = 0; + long pages = 0; int target_node = NUMA_NO_NODE; bool prot_numa = cp_flags & MM_CP_PROT_NUMA; bool uffd_wp = cp_flags & MM_CP_UFFD_WP; @@ -177,12 +177,10 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, oldpte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_modify(oldpte, newprot); - if (uffd_wp) { - ptent = pte_wrprotect(ptent); + if (uffd_wp) ptent = pte_mkuffd_wp(ptent); - } else if (uffd_wp_resolve) { + else if (uffd_wp_resolve) ptent = pte_clear_uffd_wp(ptent); - } /* * In some writable, shared mappings, we might want @@ -332,36 +330,42 @@ uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags) /* * If wr-protecting the range for file-backed, populate pgtable for the case * when pgtable is empty but page cache exists. When {pte|pmd|...}_alloc() - * failed it means no memory, we don't have a better option but stop. + * failed we treat it the same way as pgtable allocation failures during + * page faults by kicking OOM and returning error. */ #define change_pmd_prepare(vma, pmd, cp_flags) \ - do { \ + ({ \ + long err = 0; \ if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \ - if (WARN_ON_ONCE(pte_alloc(vma->vm_mm, pmd))) \ - break; \ + if (pte_alloc(vma->vm_mm, pmd)) \ + err = -ENOMEM; \ } \ - } while (0) + err; \ + }) + /* * This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to * have separate change_pmd_prepare() because pte_alloc() returns 0 on success, * while {pmd|pud|p4d}_alloc() returns the valid pointer on success. */ #define change_prepare(vma, high, low, addr, cp_flags) \ - do { \ + ({ \ + long err = 0; \ if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \ low##_t *p = low##_alloc(vma->vm_mm, high, addr); \ - if (WARN_ON_ONCE(p == NULL)) \ - break; \ + if (p == NULL) \ + err = -ENOMEM; \ } \ - } while (0) + err; \ + }) -static inline unsigned long change_pmd_range(struct mmu_gather *tlb, +static inline long change_pmd_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { pmd_t *pmd; unsigned long next; - unsigned long pages = 0; + long pages = 0; unsigned long nr_huge_updates = 0; struct mmu_notifier_range range; @@ -369,11 +373,15 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { - unsigned long this_pages; + long ret; next = pmd_addr_end(addr, end); - change_pmd_prepare(vma, pmd, cp_flags); + ret = change_pmd_prepare(vma, pmd, cp_flags); + if (ret) { + pages = ret; + break; + } /* * Automatic NUMA balancing walks the tables with mmap_lock * held for read. It's possible a parallel update to occur @@ -390,7 +398,7 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb, if (!range.start) { mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0, - vma, vma->vm_mm, addr, end); + vma->vm_mm, addr, end); mmu_notifier_invalidate_range_start(&range); } @@ -403,7 +411,11 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb, * cleared; make sure pmd populated if * necessary, then fall-through to pte level. */ - change_pmd_prepare(vma, pmd, cp_flags); + ret = change_pmd_prepare(vma, pmd, cp_flags); + if (ret) { + pages = ret; + break; + } } else { /* * change_huge_pmd() does not defer TLB flushes, @@ -424,9 +436,8 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb, } /* fall through, the trans huge pmd just split */ } - this_pages = change_pte_range(tlb, vma, pmd, addr, next, - newprot, cp_flags); - pages += this_pages; + pages += change_pte_range(tlb, vma, pmd, addr, next, + newprot, cp_flags); next: cond_resched(); } while (pmd++, addr = next, addr != end); @@ -439,18 +450,20 @@ next: return pages; } -static inline unsigned long change_pud_range(struct mmu_gather *tlb, +static inline long change_pud_range(struct mmu_gather *tlb, struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { pud_t *pud; unsigned long next; - unsigned long pages = 0; + long pages = 0, ret; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); - change_prepare(vma, pud, pmd, addr, cp_flags); + ret = change_prepare(vma, pud, pmd, addr, cp_flags); + if (ret) + return ret; if (pud_none_or_clear_bad(pud)) continue; pages += change_pmd_range(tlb, vma, pud, addr, next, newprot, @@ -460,18 +473,20 @@ static inline unsigned long change_pud_range(struct mmu_gather *tlb, return pages; } -static inline unsigned long change_p4d_range(struct mmu_gather *tlb, +static inline long change_p4d_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { p4d_t *p4d; unsigned long next; - unsigned long pages = 0; + long pages = 0, ret; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); - change_prepare(vma, p4d, pud, addr, cp_flags); + ret = change_prepare(vma, p4d, pud, addr, cp_flags); + if (ret) + return ret; if (p4d_none_or_clear_bad(p4d)) continue; pages += change_pud_range(tlb, vma, p4d, addr, next, newprot, @@ -481,21 +496,25 @@ static inline unsigned long change_p4d_range(struct mmu_gather *tlb, return pages; } -static unsigned long change_protection_range(struct mmu_gather *tlb, +static long change_protection_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; unsigned long next; - unsigned long pages = 0; + long pages = 0, ret; BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); tlb_start_vma(tlb, vma); do { next = pgd_addr_end(addr, end); - change_prepare(vma, pgd, p4d, addr, cp_flags); + ret = change_prepare(vma, pgd, p4d, addr, cp_flags); + if (ret) { + pages = ret; + break; + } if (pgd_none_or_clear_bad(pgd)) continue; pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot, @@ -507,15 +526,27 @@ static unsigned long change_protection_range(struct mmu_gather *tlb, return pages; } -unsigned long change_protection(struct mmu_gather *tlb, +long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgprot_t newprot, - unsigned long cp_flags) + unsigned long end, unsigned long cp_flags) { - unsigned long pages; + pgprot_t newprot = vma->vm_page_prot; + long pages; BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL); +#ifdef CONFIG_NUMA_BALANCING + /* + * Ordinary protection updates (mprotect, uffd-wp, softdirty tracking) + * are expected to reflect their requirements via VMA flags such that + * vma_set_page_prot() will adjust vma->vm_page_prot accordingly. + */ + if (cp_flags & MM_CP_PROT_NUMA) + newprot = PAGE_NONE; +#else + WARN_ON_ONCE(cp_flags & MM_CP_PROT_NUMA); +#endif + if (is_vm_hugetlb_page(vma)) pages = hugetlb_change_protection(vma, start, end, newprot, cp_flags); @@ -554,9 +585,9 @@ static const struct mm_walk_ops prot_none_walk_ops = { }; int -mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, - struct vm_area_struct **pprev, unsigned long start, - unsigned long end, unsigned long newflags) +mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, + struct vm_area_struct *vma, struct vm_area_struct **pprev, + unsigned long start, unsigned long end, unsigned long newflags) { struct mm_struct *mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; @@ -611,7 +642,7 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, * First try to merge with previous and/or next vma. */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); - *pprev = vma_merge(mm, *pprev, start, end, newflags, + *pprev = vma_merge(vmi, mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx, anon_vma_name(vma)); if (*pprev) { @@ -623,13 +654,13 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, *pprev = vma; if (start != vma->vm_start) { - error = split_vma(mm, vma, start, 1); + error = split_vma(vmi, vma, start, 1); if (error) goto fail; } if (end != vma->vm_end) { - error = split_vma(mm, vma, end, 0); + error = split_vma(vmi, vma, end, 0); if (error) goto fail; } @@ -639,12 +670,12 @@ success: * vm_flags and vm_page_prot are protected by the mmap_lock * held in write mode. */ - vma->vm_flags = newflags; + vm_flags_reset(vma, newflags); if (vma_wants_manual_pte_write_upgrade(vma)) mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; vma_set_page_prot(vma); - change_protection(tlb, vma, start, end, vma->vm_page_prot, mm_cp_flags); + change_protection(tlb, vma, start, end, mm_cp_flags); /* * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major @@ -678,7 +709,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, const bool rier = (current->personality & READ_IMPLIES_EXEC) && (prot & PROT_READ); struct mmu_gather tlb; - MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + struct vma_iterator vmi; start = untagged_addr(start); @@ -710,8 +741,8 @@ static int do_mprotect_pkey(unsigned long start, size_t len, if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey)) goto out; - mas_set(&mas, start); - vma = mas_find(&mas, ULONG_MAX); + vma_iter_init(&vmi, current->mm, start); + vma = vma_find(&vmi, end); error = -ENOMEM; if (!vma) goto out; @@ -734,18 +765,22 @@ static int do_mprotect_pkey(unsigned long start, size_t len, } } + prev = vma_prev(&vmi); if (start > vma->vm_start) prev = vma; - else - prev = mas_prev(&mas, 0); tlb_gather_mmu(&tlb, current->mm); - for (nstart = start ; ; ) { + nstart = start; + tmp = vma->vm_start; + for_each_vma_range(vmi, vma, end) { unsigned long mask_off_old_flags; unsigned long newflags; int new_vma_pkey; - /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ + if (vma->vm_start != tmp) { + error = -ENOMEM; + break; + } /* Does the application expect PROT_READ to imply PROT_EXEC */ if (rier && (vma->vm_flags & VM_MAYEXEC)) @@ -768,6 +803,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len, break; } + if (map_deny_write_exec(vma, newflags)) { + error = -EACCES; + goto out; + } + /* Allow architectures to sanity-check the new flags */ if (!arch_validate_flags(newflags)) { error = -EINVAL; @@ -788,25 +828,18 @@ static int do_mprotect_pkey(unsigned long start, size_t len, break; } - error = mprotect_fixup(&tlb, vma, &prev, nstart, tmp, newflags); + error = mprotect_fixup(&vmi, &tlb, vma, &prev, nstart, tmp, newflags); if (error) break; nstart = tmp; - - if (nstart < prev->vm_end) - nstart = prev->vm_end; - if (nstart >= end) - break; - - vma = find_vma(current->mm, prev->vm_end); - if (!vma || vma->vm_start != nstart) { - error = -ENOMEM; - break; - } prot = reqprot; } tlb_finish_mmu(&tlb); + + if (vma_iter_end(&vmi) < end) + error = -ENOMEM; + out: mmap_write_unlock(current->mm); return error; |