diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-19 08:39:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-19 08:39:47 -0700 |
commit | 63ae602cea637ee4a6490d940c0da5d78bd0bbe0 (patch) | |
tree | 5da1eec3756fec9d62455c498442944a7f35149b /mm | |
parent | 1a1891d762d6e64daf07b5be4817e3fbb29e3c59 (diff) | |
parent | f307ab6dcea03f9d8e4d70508fd7d1ca57cfa7f9 (diff) |
Merge branch 'gup_flag-cleanups'
Merge the gup_flags cleanups from Lorenzo Stoakes:
"This patch series adjusts functions in the get_user_pages* family such
that desired FOLL_* flags are passed as an argument rather than
implied by flags.
The purpose of this change is to make the use of FOLL_FORCE explicit
so it is easier to grep for and clearer to callers that this flag is
being used. The use of FOLL_FORCE is an issue as it overrides missing
VM_READ/VM_WRITE flags for the VMA whose pages we are reading
from/writing to, which can result in surprising behaviour.
The patch series came out of the discussion around commit 38e088546522
("mm: check VMA flags to avoid invalid PROT_NONE NUMA balancing"),
which addressed a BUG_ON() being triggered when a page was faulted in
with PROT_NONE set but having been overridden by FOLL_FORCE.
do_numa_page() was run on the assumption the page _must_ be one marked
for NUMA node migration as an actual PROT_NONE page would have been
dealt with prior to this code path, however FOLL_FORCE introduced a
situation where this assumption did not hold.
See
https://marc.info/?l=linux-mm&m=147585445805166
for the patch proposal"
Additionally, there's a fix for an ancient bug related to FOLL_FORCE and
FOLL_WRITE by me.
[ This branch was rebased recently to add a few more acked-by's and
reviewed-by's ]
* gup_flag-cleanups:
mm: replace access_process_vm() write parameter with gup_flags
mm: replace access_remote_vm() write parameter with gup_flags
mm: replace __access_remote_vm() write parameter with gup_flags
mm: replace get_user_pages_remote() write/force parameters with gup_flags
mm: replace get_user_pages() write/force parameters with gup_flags
mm: replace get_vaddr_frames() write/force parameters with gup_flags
mm: replace get_user_pages_locked() write/force parameters with gup_flags
mm: replace get_user_pages_unlocked() write/force parameters with gup_flags
mm: remove write/force parameters from __get_user_pages_unlocked()
mm: remove write/force parameters from __get_user_pages_locked()
mm: remove gup_flags FOLL_WRITE games from __get_user_pages()
Diffstat (limited to 'mm')
-rw-r--r-- | mm/frame_vector.c | 9 | ||||
-rw-r--r-- | mm/gup.c | 64 | ||||
-rw-r--r-- | mm/memory.c | 16 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 38 | ||||
-rw-r--r-- | mm/process_vm_access.c | 7 | ||||
-rw-r--r-- | mm/util.c | 8 |
7 files changed, 73 insertions, 71 deletions
diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 381bb07ed14f..db77dcb38afd 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -11,10 +11,7 @@ * get_vaddr_frames() - map virtual addresses to pfns * @start: starting user address * @nr_frames: number of pages / pfns from start to map - * @write: whether pages will be written to by the caller - * @force: whether to force write access even if user mapping is - * readonly. See description of the same argument of - get_user_pages(). + * @gup_flags: flags modifying lookup behaviour * @vec: structure which receives pages / pfns of the addresses mapped. * It should have space for at least nr_frames entries. * @@ -34,7 +31,7 @@ * This function takes care of grabbing mmap_sem as necessary. */ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, - bool write, bool force, struct frame_vector *vec) + unsigned int gup_flags, struct frame_vector *vec) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -59,7 +56,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, vec->got_ref = true; vec->is_pfns = false; ret = get_user_pages_locked(start, nr_frames, - write, force, (struct page **)(vec->ptrs), &locked); + gup_flags, (struct page **)(vec->ptrs), &locked); goto out; } @@ -60,6 +60,16 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, return -EEXIST; } +/* + * FOLL_FORCE can write to even unwritable pte's, but only + * after we've gone through a COW cycle and they are dirty. + */ +static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) +{ + return pte_write(pte) || + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); +} + static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { @@ -95,7 +105,7 @@ retry: } if ((flags & FOLL_NUMA) && pte_protnone(pte)) goto no_page; - if ((flags & FOLL_WRITE) && !pte_write(pte)) { + if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { pte_unmap_unlock(ptep, ptl); return NULL; } @@ -412,7 +422,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, * reCOWed by userspace write). */ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) - *flags &= ~FOLL_WRITE; + *flags |= FOLL_COW; return 0; } @@ -729,7 +739,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, struct vm_area_struct **vmas, int *locked, bool notify_drop, @@ -747,10 +756,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, if (pages) flags |= FOLL_GET; - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; pages_done = 0; lock_dropped = false; @@ -843,12 +848,12 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, * up_read(&mm->mmap_sem); */ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { return __get_user_pages_locked(current, current->mm, start, nr_pages, - write, force, pages, NULL, locked, true, - FOLL_TOUCH); + pages, NULL, locked, true, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_locked); @@ -864,14 +869,14 @@ EXPORT_SYMBOL(get_user_pages_locked); */ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; int locked = 1; + down_read(&mm->mmap_sem); - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, &locked, false, gup_flags); + ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL, + &locked, false, gup_flags); if (locked) up_read(&mm->mmap_sem); return ret; @@ -896,10 +901,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); * "force" parameter). */ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) + struct page **pages, unsigned int gup_flags) { return __get_user_pages_unlocked(current, current->mm, start, nr_pages, - write, force, pages, FOLL_TOUCH); + pages, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -910,9 +915,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to by the caller - * @force: whether to force access even when user mapping is currently - * protected (but never forces write access to shared mapping). + * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. Or NULL, if caller * only intends to ensure the pages are faulted in. @@ -941,9 +944,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * or similar operation cannot guarantee anything stronger anyway because * locks can't be held over the syscall boundary. * - * If write=0, the page must not be written to. If the page is written to, - * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called - * after the page is finished with, and before put_page is called. + * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page + * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must + * be called after the page is finished with, and before put_page is called. * * get_user_pages is typically used for fewer-copy IO operations, to get a * handle on the memory by some means other than accesses via the user virtual @@ -960,12 +963,12 @@ EXPORT_SYMBOL(get_user_pages_unlocked); */ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, vmas, NULL, false, - FOLL_TOUCH | FOLL_REMOTE); + return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + NULL, false, + gup_flags | FOLL_TOUCH | FOLL_REMOTE); } EXPORT_SYMBOL(get_user_pages_remote); @@ -976,12 +979,12 @@ EXPORT_SYMBOL(get_user_pages_remote); * obviously don't pass FOLL_REMOTE in here. */ long get_user_pages(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { return __get_user_pages_locked(current, current->mm, start, nr_pages, - write, force, pages, vmas, NULL, false, - FOLL_TOUCH); + pages, vmas, NULL, false, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); @@ -1505,7 +1508,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); + ret = get_user_pages_unlocked(start, nr_pages - nr, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/mm/memory.c b/mm/memory.c index fc1987dfd8cc..e18c57bdc75c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3869,10 +3869,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * given task for page fault accounting. */ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; void *old_buf = buf; + int write = gup_flags & FOLL_WRITE; down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transferred */ @@ -3882,7 +3883,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, struct page *page = NULL; ret = get_user_pages_remote(tsk, mm, addr, 1, - write, 1, &page, &vma); + gup_flags, &page, &vma); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; @@ -3934,14 +3935,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write: whether the access is a write + * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* @@ -3950,7 +3951,7 @@ int access_remote_vm(struct mm_struct *mm, unsigned long addr, * Do not walk the page table directly, use get_user_pages */ int access_process_vm(struct task_struct *tsk, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { struct mm_struct *mm; int ret; @@ -3959,7 +3960,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, if (!mm) return 0; - ret = __access_remote_vm(tsk, mm, addr, buf, len, write); + ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); + mmput(mm); return ret; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ad1c96ac313c..0b859af06b87 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -850,7 +850,7 @@ static int lookup_node(unsigned long addr) struct page *p; int err; - err = get_user_pages(addr & PAGE_MASK, 1, 0, 0, &p, NULL); + err = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL); if (err >= 0) { err = page_to_nid(p); put_page(p); diff --git a/mm/nommu.c b/mm/nommu.c index 95daf81a4855..db5fd1795298 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -160,33 +160,25 @@ finish_or_fault: * - don't permit access to VMAs that don't support it, such as I/O mappings */ long get_user_pages(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { - int flags = 0; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - - return __get_user_pages(current, current->mm, start, nr_pages, flags, - pages, vmas, NULL); + return __get_user_pages(current, current->mm, start, nr_pages, + gup_flags, pages, vmas, NULL); } EXPORT_SYMBOL(get_user_pages); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { - return get_user_pages(start, nr_pages, write, force, pages, NULL); + return get_user_pages(start, nr_pages, gup_flags, pages, NULL); } EXPORT_SYMBOL(get_user_pages_locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; down_read(&mm->mmap_sem); @@ -198,10 +190,10 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, EXPORT_SYMBOL(__get_user_pages_unlocked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) + struct page **pages, unsigned int gup_flags) { return __get_user_pages_unlocked(current, current->mm, start, nr_pages, - write, force, pages, 0); + pages, gup_flags); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -1817,9 +1809,10 @@ void filemap_map_pages(struct fault_env *fe, EXPORT_SYMBOL(filemap_map_pages); static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; + int write = gup_flags & FOLL_WRITE; down_read(&mm->mmap_sem); @@ -1854,21 +1847,22 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write: whether the access is a write + * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* * Access another process' address space. * - source/target buffer must be kernel space */ -int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, + unsigned int gup_flags) { struct mm_struct *mm; @@ -1879,7 +1873,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in if (!mm) return 0; - len = __access_remote_vm(tsk, mm, addr, buf, len, write); + len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); mmput(mm); return len; diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 07514d41ebcc..be8dc8d1edb9 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -88,12 +88,16 @@ static int process_vm_rw_single_vec(unsigned long addr, ssize_t rc = 0; unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES / sizeof(struct pages *); + unsigned int flags = FOLL_REMOTE; /* Work out address and page range required */ if (len == 0) return 0; nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; + if (vm_write) + flags |= FOLL_WRITE; + while (!rc && nr_pages && iov_iter_count(iter)) { int pages = min(nr_pages, max_pages_per_loop); size_t bytes; @@ -104,8 +108,7 @@ static int process_vm_rw_single_vec(unsigned long addr, * current/current->mm */ pages = __get_user_pages_unlocked(task, mm, pa, pages, - vm_write, 0, process_pages, - FOLL_REMOTE); + process_pages, flags); if (pages <= 0) return -EFAULT; diff --git a/mm/util.c b/mm/util.c index 662cddf914af..952cbe7ad7b7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -283,7 +283,8 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast); int __weak get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - return get_user_pages_unlocked(start, nr_pages, write, 0, pages); + return get_user_pages_unlocked(start, nr_pages, pages, + write ? FOLL_WRITE : 0); } EXPORT_SYMBOL_GPL(get_user_pages_fast); @@ -623,7 +624,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen) if (len > buflen) len = buflen; - res = access_process_vm(task, arg_start, buffer, len, 0); + res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE); /* * If the nul at the end of args has been overwritten, then @@ -638,7 +639,8 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen) if (len > buflen - res) len = buflen - res; res += access_process_vm(task, env_start, - buffer+res, len, 0); + buffer+res, len, + FOLL_FORCE); res = strnlen(buffer, res); } } |