diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 255 |
1 files changed, 51 insertions, 204 deletions
@@ -545,42 +545,13 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) return ERR_PTR(-EINVAL); - - /* - * Considering PTE level hugetlb, like continuous-PTE hugetlb on - * ARM64 architecture. - */ - if (is_vm_hugetlb_page(vma)) { - page = follow_huge_pmd_pte(vma, address, flags); - if (page) - return page; - return no_page_table(vma, flags); - } - -retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; - if (!pte_present(pte)) { - swp_entry_t entry; - /* - * KSM's break_ksm() relies upon recognizing a ksm page - * even while it is being migrated, so for that case we - * need migration_entry_wait(). - */ - if (likely(!(flags & FOLL_MIGRATION))) - goto no_page; - if (pte_none(pte)) - goto no_page; - entry = pte_to_swp_entry(pte); - if (!is_migration_entry(entry)) - goto no_page; - pte_unmap_unlock(ptep, ptl); - migration_entry_wait(mm, pmd, address); - goto retry; - } + if (!pte_present(pte)) + goto no_page; if (pte_protnone(pte) && !gup_can_follow_protnone(flags)) goto no_page; @@ -623,7 +594,7 @@ retry: } } - if (!pte_write(pte) && gup_must_unshare(flags, page)) { + if (!pte_write(pte) && gup_must_unshare(vma, flags, page)) { page = ERR_PTR(-EMLINK); goto out; } @@ -690,42 +661,8 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, pmdval = READ_ONCE(*pmd); if (pmd_none(pmdval)) return no_page_table(vma, flags); - if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) { - page = follow_huge_pmd_pte(vma, address, flags); - if (page) - return page; + if (!pmd_present(pmdval)) return no_page_table(vma, flags); - } - if (is_hugepd(__hugepd(pmd_val(pmdval)))) { - page = follow_huge_pd(vma, address, - __hugepd(pmd_val(pmdval)), flags, - PMD_SHIFT); - if (page) - return page; - return no_page_table(vma, flags); - } -retry: - if (!pmd_present(pmdval)) { - /* - * Should never reach here, if thp migration is not supported; - * Otherwise, it must be a thp migration entry. - */ - VM_BUG_ON(!thp_migration_supported() || - !is_pmd_migration_entry(pmdval)); - - if (likely(!(flags & FOLL_MIGRATION))) - return no_page_table(vma, flags); - - pmd_migration_entry_wait(mm, pmd); - pmdval = READ_ONCE(*pmd); - /* - * MADV_DONTNEED may convert the pmd to null because - * mmap_lock is held in read mode - */ - if (pmd_none(pmdval)) - return no_page_table(vma, flags); - goto retry; - } if (pmd_devmap(pmdval)) { ptl = pmd_lock(mm, pmd); page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap); @@ -739,18 +676,10 @@ retry: if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags)) return no_page_table(vma, flags); -retry_locked: ptl = pmd_lock(mm, pmd); - if (unlikely(pmd_none(*pmd))) { - spin_unlock(ptl); - return no_page_table(vma, flags); - } if (unlikely(!pmd_present(*pmd))) { spin_unlock(ptl); - if (likely(!(flags & FOLL_MIGRATION))) - return no_page_table(vma, flags); - pmd_migration_entry_wait(mm, pmd); - goto retry_locked; + return no_page_table(vma, flags); } if (unlikely(!pmd_trans_huge(*pmd))) { spin_unlock(ptl); @@ -793,20 +722,6 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma, pud = pud_offset(p4dp, address); if (pud_none(*pud)) return no_page_table(vma, flags); - if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) { - page = follow_huge_pud(mm, address, pud, flags); - if (page) - return page; - return no_page_table(vma, flags); - } - if (is_hugepd(__hugepd(pud_val(*pud)))) { - page = follow_huge_pd(vma, address, - __hugepd(pud_val(*pud)), flags, - PUD_SHIFT); - if (page) - return page; - return no_page_table(vma, flags); - } if (pud_devmap(*pud)) { ptl = pud_lock(mm, pud); page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap); @@ -826,7 +741,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, struct follow_page_context *ctx) { p4d_t *p4d; - struct page *page; p4d = p4d_offset(pgdp, address); if (p4d_none(*p4d)) @@ -835,14 +749,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, if (unlikely(p4d_bad(*p4d))) return no_page_table(vma, flags); - if (is_hugepd(__hugepd(p4d_val(*p4d)))) { - page = follow_huge_pd(vma, address, - __hugepd(p4d_val(*p4d)), flags, - P4D_SHIFT); - if (page) - return page; - return no_page_table(vma, flags); - } return follow_pud_mask(vma, address, p4d, flags, ctx); } @@ -880,10 +786,18 @@ static struct page *follow_page_mask(struct vm_area_struct *vma, ctx->page_mask = 0; - /* make this handle hugepd */ - page = follow_huge_addr(mm, address, flags & FOLL_WRITE); - if (!IS_ERR(page)) { - WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN)); + /* + * Call hugetlb_follow_page_mask for hugetlb vmas as it will use + * special hugetlb page table walking code. This eliminates the + * need to check for hugetlb entries in the general walking code. + * + * hugetlb_follow_page_mask is only for follow_page() handling here. + * Ordinary GUP uses follow_hugetlb_page for hugetlb processing. + */ + if (is_vm_hugetlb_page(vma)) { + page = hugetlb_follow_page_mask(vma, address, flags); + if (!page) + page = no_page_table(vma, flags); return page; } @@ -892,21 +806,6 @@ static struct page *follow_page_mask(struct vm_area_struct *vma, if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); - if (pgd_huge(*pgd)) { - page = follow_huge_pgd(mm, address, pgd, flags); - if (page) - return page; - return no_page_table(vma, flags); - } - if (is_hugepd(__hugepd(pgd_val(*pgd)))) { - page = follow_huge_pd(vma, address, - __hugepd(pgd_val(*pgd)), flags, - PGDIR_SHIFT); - if (page) - return page; - return no_page_table(vma, flags); - } - return follow_p4d_mask(vma, address, pgd, flags, ctx); } @@ -1077,6 +976,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) return -EFAULT; + /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */ + if (is_vm_hugetlb_page(vma)) + return -EFAULT; /* * We used to let the write,force case do COW in a * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could @@ -2117,14 +2019,19 @@ static long __gup_longterm_locked(struct mm_struct *mm, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, + int *locked, unsigned int gup_flags) { + bool must_unlock = false; unsigned int flags; long rc, nr_pinned_pages; + if (locked && WARN_ON_ONCE(!*locked)) + return -EINVAL; + if (!(gup_flags & FOLL_LONGTERM)) return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, - NULL, gup_flags); + locked, gup_flags); /* * If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM @@ -2138,8 +2045,13 @@ static long __gup_longterm_locked(struct mm_struct *mm, return -EINVAL; flags = memalloc_pin_save(); do { + if (locked && !*locked) { + mmap_read_lock(mm); + must_unlock = true; + *locked = 1; + } nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages, - pages, vmas, NULL, + pages, vmas, locked, gup_flags); if (nr_pinned_pages <= 0) { rc = nr_pinned_pages; @@ -2149,6 +2061,10 @@ static long __gup_longterm_locked(struct mm_struct *mm, } while (rc == -EAGAIN); memalloc_pin_restore(flags); + if (locked && *locked && must_unlock) { + mmap_read_unlock(mm); + *locked = 0; + } return rc ? rc : nr_pinned_pages; } @@ -2172,35 +2088,6 @@ static bool is_valid_gup_flags(unsigned int gup_flags) } #ifdef CONFIG_MMU -static long __get_user_pages_remote(struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked) -{ - /* - * Parts of FOLL_LONGTERM behavior are incompatible with - * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on - * vmas. However, this only comes up if locked is set, and there are - * callers that do request FOLL_LONGTERM, but do not set locked. So, - * allow what we can. - */ - if (gup_flags & FOLL_LONGTERM) { - if (WARN_ON_ONCE(locked)) - return -EINVAL; - /* - * This will check the vmas (even if our vmas arg is NULL) - * and return -ENOTSUPP if DAX isn't allowed in this case: - */ - return __gup_longterm_locked(mm, start, nr_pages, pages, - vmas, gup_flags | FOLL_TOUCH | - FOLL_REMOTE); - } - - return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, - locked, - gup_flags | FOLL_TOUCH | FOLL_REMOTE); -} - /** * get_user_pages_remote() - pin user pages in memory * @mm: mm_struct of target mm @@ -2269,8 +2156,8 @@ long get_user_pages_remote(struct mm_struct *mm, if (!is_valid_gup_flags(gup_flags)) return -EINVAL; - return __get_user_pages_remote(mm, start, nr_pages, gup_flags, - pages, vmas, locked); + return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked, + gup_flags | FOLL_TOUCH | FOLL_REMOTE); } EXPORT_SYMBOL(get_user_pages_remote); @@ -2282,14 +2169,6 @@ long get_user_pages_remote(struct mm_struct *mm, { return 0; } - -static long __get_user_pages_remote(struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked) -{ - return 0; -} #endif /* !CONFIG_MMU */ /** @@ -2316,7 +2195,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, return -EINVAL; return __gup_longterm_locked(current->mm, start, nr_pages, - pages, vmas, gup_flags | FOLL_TOUCH); + pages, vmas, NULL, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); @@ -2342,18 +2221,9 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int locked = 1; long ret; - /* - * FIXME: Current FOLL_LONGTERM behavior is incompatible with - * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on - * vmas. As there are no users of this flag in this call we simply - * disallow this option for now. - */ - if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM)) - return -EINVAL; - mmap_read_lock(mm); - ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL, - &locked, gup_flags | FOLL_TOUCH); + ret = __gup_longterm_locked(mm, start, nr_pages, pages, NULL, &locked, + gup_flags | FOLL_TOUCH); if (locked) mmap_read_unlock(mm); return ret; @@ -2480,7 +2350,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, goto pte_unmap; } - if (!pte_write(pte) && gup_must_unshare(flags, page)) { + if (!pte_write(pte) && gup_must_unshare(NULL, flags, page)) { gup_put_folio(folio, 1, flags); goto pte_unmap; } @@ -2672,7 +2542,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, return 0; } - if (!pte_write(pte) && gup_must_unshare(flags, &folio->page)) { + if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } @@ -2738,7 +2608,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, return 0; } - if (!pmd_write(orig) && gup_must_unshare(flags, &folio->page)) { + if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } @@ -2778,7 +2648,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 0; } - if (!pud_write(orig) && gup_must_unshare(flags, &folio->page)) { + if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } @@ -2953,29 +2823,6 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end) } #endif -static int __gup_longterm_unlocked(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages) -{ - int ret; - - /* - * FIXME: FOLL_LONGTERM does not work with - * get_user_pages_unlocked() (see comments in that function) - */ - if (gup_flags & FOLL_LONGTERM) { - mmap_read_lock(current->mm); - ret = __gup_longterm_locked(current->mm, - start, nr_pages, - pages, NULL, gup_flags); - mmap_read_unlock(current->mm); - } else { - ret = get_user_pages_unlocked(start, nr_pages, - pages, gup_flags); - } - - return ret; -} - static unsigned long lockless_pages_from_mm(unsigned long start, unsigned long end, unsigned int gup_flags, @@ -3060,8 +2907,8 @@ static int internal_get_user_pages_fast(unsigned long start, /* Slow path: try to get the remaining pages with get_user_pages */ start += nr_pinned << PAGE_SHIFT; pages += nr_pinned; - ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags, - pages); + ret = get_user_pages_unlocked(start, nr_pages - nr_pinned, pages, + gup_flags); if (ret < 0) { /* * The caller has to unpin the pages we already pinned so @@ -3260,9 +3107,9 @@ long pin_user_pages_remote(struct mm_struct *mm, if (WARN_ON_ONCE(!pages)) return -EINVAL; - gup_flags |= FOLL_PIN; - return __get_user_pages_remote(mm, start, nr_pages, gup_flags, - pages, vmas, locked); + return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked, + gup_flags | FOLL_PIN | FOLL_TOUCH | + FOLL_REMOTE); } EXPORT_SYMBOL(pin_user_pages_remote); @@ -3296,7 +3143,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, gup_flags |= FOLL_PIN; return __gup_longterm_locked(current->mm, start, nr_pages, - pages, vmas, gup_flags); + pages, vmas, NULL, gup_flags); } EXPORT_SYMBOL(pin_user_pages); |