diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 127 |
1 files changed, 119 insertions, 8 deletions
@@ -29,6 +29,39 @@ struct follow_page_context { unsigned int page_mask; }; +static inline void sanity_check_pinned_pages(struct page **pages, + unsigned long npages) +{ + if (!IS_ENABLED(CONFIG_DEBUG_VM)) + return; + + /* + * We only pin anonymous pages if they are exclusive. Once pinned, we + * can no longer turn them possibly shared and PageAnonExclusive() will + * stick around until the page is freed. + * + * We'd like to verify that our pinned anonymous pages are still mapped + * exclusively. The issue with anon THP is that we don't know how + * they are/were mapped when pinning them. However, for anon + * THP we can assume that either the given page (PTE-mapped THP) or + * the head page (PMD-mapped THP) should be PageAnonExclusive(). If + * neither is the case, there is certainly something wrong. + */ + for (; npages; npages--, pages++) { + struct page *page = *pages; + struct folio *folio = page_folio(page); + + if (!folio_test_anon(folio)) + continue; + if (!folio_test_large(folio) || folio_test_hugetlb(folio)) + VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page); + else + /* Either a PTE-mapped or a PMD-mapped THP. */ + VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) && + !PageAnonExclusive(page), page); + } +} + /* * Return the folio with ref appropriately incremented, * or NULL if that failed. @@ -204,6 +237,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) */ void unpin_user_page(struct page *page) { + sanity_check_pinned_pages(&page, 1); gup_put_folio(page_folio(page), 1, FOLL_PIN); } EXPORT_SYMBOL(unpin_user_page); @@ -272,6 +306,7 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, return; } + sanity_check_pinned_pages(pages, npages); for (i = 0; i < npages; i += nr) { folio = gup_folio_next(pages, npages, i, &nr); /* @@ -344,6 +379,23 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, } EXPORT_SYMBOL(unpin_user_page_range_dirty_lock); +static void unpin_user_pages_lockless(struct page **pages, unsigned long npages) +{ + unsigned long i; + struct folio *folio; + unsigned int nr; + + /* + * Don't perform any sanity checks because we might have raced with + * fork() and some anonymous pages might now actually be shared -- + * which is why we're unpinning after all. + */ + for (i = 0; i < npages; i += nr) { + folio = gup_folio_next(pages, npages, i, &nr); + gup_put_folio(folio, nr, FOLL_PIN); + } +} + /** * unpin_user_pages() - release an array of gup-pinned pages. * @pages: array of pages to be marked dirty and released. @@ -367,6 +419,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages) if (WARN_ON(IS_ERR_VALUE(npages))) return; + sanity_check_pinned_pages(pages, npages); for (i = 0; i < npages; i += nr) { folio = gup_folio_next(pages, npages, i, &nr); gup_put_folio(folio, nr, FOLL_PIN); @@ -506,6 +559,14 @@ retry: } } + if (!pte_write(pte) && gup_must_unshare(flags, page)) { + page = ERR_PTR(-EMLINK); + goto out; + } + + VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && + !PageAnonExclusive(page), page); + /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ if (unlikely(!try_grab_page(page, flags))) { page = ERR_PTR(-ENOMEM); @@ -732,6 +793,11 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches * the device's dev_pagemap metadata to avoid repeating expensive lookups. * + * When getting an anonymous page and the caller has to trigger unsharing + * of a shared anonymous page first, -EMLINK is returned. The caller should + * trigger a fault with FAULT_FLAG_UNSHARE set. Note that unsharing is only + * relevant with FOLL_PIN and !FOLL_WRITE. + * * On output, the @ctx->page_mask is set according to the size of the page. * * Return: the mapped (struct page *), %NULL if no mapping exists, or @@ -787,6 +853,9 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, if (vma_is_secretmem(vma)) return NULL; + if (foll_flags & FOLL_PIN) + return NULL; + page = follow_page_mask(vma, address, foll_flags, &ctx); if (ctx.pgmap) put_dev_pagemap(ctx.pgmap); @@ -852,7 +921,8 @@ unmap: * is, *@locked will be set to 0 and -EBUSY returned. */ static int faultin_page(struct vm_area_struct *vma, - unsigned long address, unsigned int *flags, int *locked) + unsigned long address, unsigned int *flags, bool unshare, + int *locked) { unsigned int fault_flags = 0; vm_fault_t ret; @@ -874,6 +944,11 @@ static int faultin_page(struct vm_area_struct *vma, */ fault_flags |= FAULT_FLAG_TRIED; } + if (unshare) { + fault_flags |= FAULT_FLAG_UNSHARE; + /* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */ + VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE); + } ret = handle_mm_fault(vma, address, fault_flags, NULL); if (ret & VM_FAULT_ERROR) { @@ -1095,8 +1170,9 @@ retry: cond_resched(); page = follow_page_mask(vma, start, foll_flags, &ctx); - if (!page) { - ret = faultin_page(vma, start, &foll_flags, locked); + if (!page || PTR_ERR(page) == -EMLINK) { + ret = faultin_page(vma, start, &foll_flags, + PTR_ERR(page) == -EMLINK, locked); switch (ret) { case 0: goto retry; @@ -2227,6 +2303,11 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, goto pte_unmap; } + if (!pte_write(pte) && gup_must_unshare(flags, page)) { + gup_put_folio(folio, 1, flags); + goto pte_unmap; + } + /* * We need to make the page accessible if and only if we are * going to access its content (the FOLL_PIN case). Please @@ -2407,6 +2488,11 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, return 0; } + if (!pte_write(pte) && gup_must_unshare(flags, &folio->page)) { + gup_put_folio(folio, refs, flags); + return 0; + } + *nr += refs; folio_set_referenced(folio); return 1; @@ -2468,6 +2554,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, return 0; } + if (!pmd_write(orig) && gup_must_unshare(flags, &folio->page)) { + gup_put_folio(folio, refs, flags); + return 0; + } + *nr += refs; folio_set_referenced(folio); return 1; @@ -2503,6 +2594,11 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 0; } + if (!pud_write(orig) && gup_must_unshare(flags, &folio->page)) { + gup_put_folio(folio, refs, flags); + return 0; + } + *nr += refs; folio_set_referenced(folio); return 1; @@ -2740,8 +2836,10 @@ static unsigned long lockless_pages_from_mm(unsigned long start, */ if (gup_flags & FOLL_PIN) { if (read_seqcount_retry(¤t->mm->write_protect_seq, seq)) { - unpin_user_pages(pages, nr_pinned); + unpin_user_pages_lockless(pages, nr_pinned); return 0; + } else { + sanity_check_pinned_pages(pages, nr_pinned); } } return nr_pinned; @@ -2900,6 +2998,9 @@ int pin_user_pages_fast(unsigned long start, int nr_pages, if (WARN_ON_ONCE(gup_flags & FOLL_GET)) return -EINVAL; + if (WARN_ON_ONCE(!pages)) + return -EINVAL; + gup_flags |= FOLL_PIN; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } @@ -2922,6 +3023,9 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages, */ if (WARN_ON_ONCE(gup_flags & FOLL_GET)) return 0; + + if (WARN_ON_ONCE(!pages)) + return 0; /* * FOLL_FAST_ONLY is required in order to match the API description of * this routine: no fall back to regular ("slow") GUP. @@ -2949,8 +3053,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. Or NULL, if caller - * only intends to ensure the pages are faulted in. + * Should be at least nr_pages long. * @vmas: array of pointers to vmas corresponding to each page. * Or NULL if the caller does not require them. * @locked: pointer to lock flag indicating whether lock is held and @@ -2973,6 +3076,9 @@ long pin_user_pages_remote(struct mm_struct *mm, if (WARN_ON_ONCE(gup_flags & FOLL_GET)) return -EINVAL; + if (WARN_ON_ONCE(!pages)) + return -EINVAL; + gup_flags |= FOLL_PIN; return __get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); @@ -2986,8 +3092,7 @@ EXPORT_SYMBOL(pin_user_pages_remote); * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. Or NULL, if caller - * only intends to ensure the pages are faulted in. + * Should be at least nr_pages long. * @vmas: array of pointers to vmas corresponding to each page. * Or NULL if the caller does not require them. * @@ -3005,6 +3110,9 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, if (WARN_ON_ONCE(gup_flags & FOLL_GET)) return -EINVAL; + if (WARN_ON_ONCE(!pages)) + return -EINVAL; + gup_flags |= FOLL_PIN; return __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags); @@ -3023,6 +3131,9 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, if (WARN_ON_ONCE(gup_flags & FOLL_GET)) return -EINVAL; + if (WARN_ON_ONCE(!pages)) + return -EINVAL; + gup_flags |= FOLL_PIN; return get_user_pages_unlocked(start, nr_pages, pages, gup_flags); } |