diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 248 |
1 files changed, 141 insertions, 107 deletions
@@ -158,6 +158,13 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) else folio_ref_add(folio, refs * (GUP_PIN_COUNTING_BIAS - 1)); + /* + * Adjust the pincount before re-checking the PTE for changes. + * This is essentially a smp_mb() and is paired with a memory + * barrier in page_try_share_anon_rmap(). + */ + smp_mb__after_atomic(); + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); return folio; @@ -554,7 +561,7 @@ retry: migration_entry_wait(mm, pmd, address); goto retry; } - if ((flags & FOLL_NUMA) && pte_protnone(pte)) + if (pte_protnone(pte) && !gup_can_follow_protnone(flags)) goto no_page; page = vm_normal_page(vma, address, pte); @@ -707,7 +714,7 @@ retry: if (likely(!pmd_trans_huge(pmdval))) return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); - if ((flags & FOLL_NUMA) && pmd_protnone(pmdval)) + if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags)) return no_page_table(vma, flags); retry_locked: @@ -1153,14 +1160,6 @@ static long __get_user_pages(struct mm_struct *mm, VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN))); - /* - * If FOLL_FORCE is set then do not force a full fault as the hinting - * fault information is unrelated to the reference behaviour of a task - * using the address space - */ - if (!(gup_flags & FOLL_FORCE)) - gup_flags |= FOLL_NUMA; - do { struct page *page; unsigned int foll_flags = gup_flags; @@ -1667,10 +1666,11 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) if (!locked) { locked = 1; mmap_read_lock(mm); - vma = find_vma(mm, nstart); + vma = find_vma_intersection(mm, nstart, end); } else if (nstart >= vma->vm_end) - vma = vma->vm_next; - if (!vma || vma->vm_start >= end) + vma = find_vma_intersection(mm, vma->vm_end, end); + + if (!vma) break; /* * Set [nstart; nend) to intersection of desired address @@ -1927,20 +1927,16 @@ struct page *get_dump_page(unsigned long addr) #ifdef CONFIG_MIGRATION /* - * Check whether all pages are pinnable, if so return number of pages. If some - * pages are not pinnable, migrate them, and unpin all pages. Return zero if - * pages were migrated, or if some pages were not successfully isolated. - * Return negative error if migration fails. + * Returns the number of collected pages. Return value is always >= 0. */ -static long check_and_migrate_movable_pages(unsigned long nr_pages, - struct page **pages, - unsigned int gup_flags) +static unsigned long collect_longterm_unpinnable_pages( + struct list_head *movable_page_list, + unsigned long nr_pages, + struct page **pages) { - unsigned long isolation_error_count = 0, i; + unsigned long i, collected = 0; struct folio *prev_folio = NULL; - LIST_HEAD(movable_page_list); - bool drain_allow = true, coherent_pages = false; - int ret = 0; + bool drain_allow = true; for (i = 0; i < nr_pages; i++) { struct folio *folio = page_folio(pages[i]); @@ -1949,45 +1945,16 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, continue; prev_folio = folio; - /* - * Device coherent pages are managed by a driver and should not - * be pinned indefinitely as it prevents the driver moving the - * page. So when trying to pin with FOLL_LONGTERM instead try - * to migrate the page out of device memory. - */ - if (folio_is_device_coherent(folio)) { - /* - * We always want a new GUP lookup with device coherent - * pages. - */ - pages[i] = 0; - coherent_pages = true; - - /* - * Migration will fail if the page is pinned, so convert - * the pin on the source page to a normal reference. - */ - if (gup_flags & FOLL_PIN) { - get_page(&folio->page); - unpin_user_page(&folio->page); - } + if (folio_is_longterm_pinnable(folio)) + continue; - ret = migrate_device_coherent_page(&folio->page); - if (ret) - goto unpin_pages; + collected++; + if (folio_is_device_coherent(folio)) continue; - } - if (folio_is_longterm_pinnable(folio)) - continue; - /* - * Try to move out any movable page before pinning the range. - */ if (folio_test_hugetlb(folio)) { - if (isolate_hugetlb(&folio->page, - &movable_page_list)) - isolation_error_count++; + isolate_hugetlb(&folio->page, movable_page_list); continue; } @@ -1996,63 +1963,124 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, drain_allow = false; } - if (folio_isolate_lru(folio)) { - isolation_error_count++; + if (!folio_isolate_lru(folio)) continue; - } - list_add_tail(&folio->lru, &movable_page_list); + + list_add_tail(&folio->lru, movable_page_list); node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } - if (!list_empty(&movable_page_list) || isolation_error_count || - coherent_pages) - goto unpin_pages; + return collected; +} - /* - * If list is empty, and no isolation errors, means that all pages are - * in the correct zone. - */ - return nr_pages; +/* + * Unpins all pages and migrates device coherent pages and movable_page_list. + * Returns -EAGAIN if all pages were successfully migrated or -errno for failure + * (or partial success). + */ +static int migrate_longterm_unpinnable_pages( + struct list_head *movable_page_list, + unsigned long nr_pages, + struct page **pages) +{ + int ret; + unsigned long i; -unpin_pages: - /* - * pages[i] might be NULL if any device coherent pages were found. - */ for (i = 0; i < nr_pages; i++) { - if (!pages[i]) + struct folio *folio = page_folio(pages[i]); + + if (folio_is_device_coherent(folio)) { + /* + * Migration will fail if the page is pinned, so convert + * the pin on the source page to a normal reference. + */ + pages[i] = NULL; + folio_get(folio); + gup_put_folio(folio, 1, FOLL_PIN); + + if (migrate_device_coherent_page(&folio->page)) { + ret = -EBUSY; + goto err; + } + continue; + } - if (gup_flags & FOLL_PIN) - unpin_user_page(pages[i]); - else - put_page(pages[i]); + /* + * We can't migrate pages with unexpected references, so drop + * the reference obtained by __get_user_pages_locked(). + * Migrating pages have been added to movable_page_list after + * calling folio_isolate_lru() which takes a reference so the + * page won't be freed if it's migrating. + */ + unpin_user_page(pages[i]); + pages[i] = NULL; } - if (!list_empty(&movable_page_list)) { + if (!list_empty(movable_page_list)) { struct migration_target_control mtc = { .nid = NUMA_NO_NODE, .gfp_mask = GFP_USER | __GFP_NOWARN, }; - ret = migrate_pages(&movable_page_list, alloc_migration_target, - NULL, (unsigned long)&mtc, MIGRATE_SYNC, - MR_LONGTERM_PIN, NULL); - if (ret > 0) /* number of pages not migrated */ + if (migrate_pages(movable_page_list, alloc_migration_target, + NULL, (unsigned long)&mtc, MIGRATE_SYNC, + MR_LONGTERM_PIN, NULL)) { ret = -ENOMEM; + goto err; + } } - if (ret && !list_empty(&movable_page_list)) - putback_movable_pages(&movable_page_list); + putback_movable_pages(movable_page_list); + + return -EAGAIN; + +err: + for (i = 0; i < nr_pages; i++) + if (pages[i]) + unpin_user_page(pages[i]); + putback_movable_pages(movable_page_list); + return ret; } + +/* + * Check whether all pages are *allowed* to be pinned. Rather confusingly, all + * pages in the range are required to be pinned via FOLL_PIN, before calling + * this routine. + * + * If any pages in the range are not allowed to be pinned, then this routine + * will migrate those pages away, unpin all the pages in the range and return + * -EAGAIN. The caller should re-pin the entire range with FOLL_PIN and then + * call this routine again. + * + * If an error other than -EAGAIN occurs, this indicates a migration failure. + * The caller should give up, and propagate the error back up the call stack. + * + * If everything is OK and all pages in the range are allowed to be pinned, then + * this routine leaves all pages pinned and returns zero for success. + */ +static long check_and_migrate_movable_pages(unsigned long nr_pages, + struct page **pages) +{ + unsigned long collected; + LIST_HEAD(movable_page_list); + + collected = collect_longterm_unpinnable_pages(&movable_page_list, + nr_pages, pages); + if (!collected) + return 0; + + return migrate_longterm_unpinnable_pages(&movable_page_list, nr_pages, + pages); +} #else static long check_and_migrate_movable_pages(unsigned long nr_pages, - struct page **pages, - unsigned int gup_flags) + struct page **pages) { - return nr_pages; + return 0; } #endif /* CONFIG_MIGRATION */ @@ -2068,22 +2096,36 @@ static long __gup_longterm_locked(struct mm_struct *mm, unsigned int gup_flags) { unsigned int flags; - long rc; + long rc, nr_pinned_pages; if (!(gup_flags & FOLL_LONGTERM)) return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, gup_flags); + + /* + * If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM + * implies FOLL_PIN (although the reverse is not true). Therefore it is + * correct to unconditionally call check_and_migrate_movable_pages() + * which assumes pages have been pinned via FOLL_PIN. + * + * Enforce the above reasoning by asserting that FOLL_PIN is set. + */ + if (WARN_ON(!(gup_flags & FOLL_PIN))) + return -EINVAL; flags = memalloc_pin_save(); do { - rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, - NULL, gup_flags); - if (rc <= 0) + nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages, + pages, vmas, NULL, + gup_flags); + if (nr_pinned_pages <= 0) { + rc = nr_pinned_pages; break; - rc = check_and_migrate_movable_pages(rc, pages, gup_flags); - } while (!rc); + } + rc = check_and_migrate_movable_pages(nr_pinned_pages, pages); + } while (rc == -EAGAIN); memalloc_pin_restore(flags); - return rc; + return rc ? rc : nr_pinned_pages; } static bool is_valid_gup_flags(unsigned int gup_flags) @@ -2378,11 +2420,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, struct page *page; struct folio *folio; - /* - * Similar to the PMD case below, NUMA hinting must take slow - * path using the pte_protnone check. - */ - if (pte_protnone(pte)) + if (pte_protnone(pte) && !gup_can_follow_protnone(flags)) goto pte_unmap; if (!pte_access_permitted(pte, flags & FOLL_WRITE)) @@ -2766,12 +2804,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_protnone(pmd)) + if (pmd_protnone(pmd) && + !gup_can_follow_protnone(flags)) return 0; if (!gup_huge_pmd(pmd, pmdp, addr, next, flags, |