diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-03 13:50:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-03 13:50:22 -0700 |
commit | 5264406cdb66c7003eb3edf53c9773b1b20611b9 (patch) | |
tree | e94f76f64a0b3b45dcb9f9bec85cce2ba78e1221 /lib/iov_iter.c | |
parent | 200e340f2196d7fd427a5810d06e893b932f145a (diff) | |
parent | dd45ab9dd28c82fc495d98cd9788666fd8d76b99 (diff) |
Merge tag 'pull-work.iov_iter-base' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs iov_iter updates from Al Viro:
"Part 1 - isolated cleanups and optimizations.
One of the goals is to reduce the overhead of using ->read_iter() and
->write_iter() instead of ->read()/->write().
new_sync_{read,write}() has a surprising amount of overhead, in
particular inside iocb_flags(). That's the explanation for the
beginning of the series is in this pile; it's not directly
iov_iter-related, but it's a part of the same work..."
* tag 'pull-work.iov_iter-base' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
first_iovec_segment(): just return address
iov_iter: massage calling conventions for first_{iovec,bvec}_segment()
iov_iter: first_{iovec,bvec}_segment() - simplify a bit
iov_iter: lift dealing with maxpages out of first_{iovec,bvec}_segment()
iov_iter_get_pages{,_alloc}(): cap the maxsize with MAX_RW_COUNT
iov_iter_bvec_advance(): don't bother with bvec_iter
copy_page_{to,from}_iter(): switch iovec variants to generic
keep iocb_flags() result cached in struct file
iocb: delay evaluation of IS_SYNC(...) until we want to check IOCB_DSYNC
struct file: use anonymous union member for rcuhead and llist
btrfs: use IOMAP_DIO_NOSYNC
teach iomap_dio_rw() to suppress dsync
No need of likely/unlikely on calls of check_copy_size()
Diffstat (limited to 'lib/iov_iter.c')
-rw-r--r-- | lib/iov_iter.c | 283 |
1 files changed, 52 insertions, 231 deletions
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 507e732ef7cf..58648fcd9a88 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -168,174 +168,6 @@ static int copyin(void *to, const void __user *from, size_t n) return n; } -static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - size_t skip, copy, left, wanted; - const struct iovec *iov; - char __user *buf; - void *kaddr, *from; - - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - - might_fault(); - wanted = bytes; - iov = i->iov; - skip = i->iov_offset; - buf = iov->iov_base + skip; - copy = min(bytes, iov->iov_len - skip); - - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) { - kaddr = kmap_atomic(page); - from = kaddr + offset; - - /* first chunk, usually the only one */ - left = copyout(buf, from, copy); - copy -= left; - skip += copy; - from += copy; - bytes -= copy; - - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyout(buf, from, copy); - copy -= left; - skip = copy; - from += copy; - bytes -= copy; - } - if (likely(!bytes)) { - kunmap_atomic(kaddr); - goto done; - } - offset = from - kaddr; - buf += copy; - kunmap_atomic(kaddr); - copy = min(bytes, iov->iov_len - skip); - } - /* Too bad - revert to non-atomic kmap */ - - kaddr = kmap(page); - from = kaddr + offset; - left = copyout(buf, from, copy); - copy -= left; - skip += copy; - from += copy; - bytes -= copy; - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyout(buf, from, copy); - copy -= left; - skip = copy; - from += copy; - bytes -= copy; - } - kunmap(page); - -done: - if (skip == iov->iov_len) { - iov++; - skip = 0; - } - i->count -= wanted - bytes; - i->nr_segs -= iov - i->iov; - i->iov = iov; - i->iov_offset = skip; - return wanted - bytes; -} - -static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - size_t skip, copy, left, wanted; - const struct iovec *iov; - char __user *buf; - void *kaddr, *to; - - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - - might_fault(); - wanted = bytes; - iov = i->iov; - skip = i->iov_offset; - buf = iov->iov_base + skip; - copy = min(bytes, iov->iov_len - skip); - - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) { - kaddr = kmap_atomic(page); - to = kaddr + offset; - - /* first chunk, usually the only one */ - left = copyin(to, buf, copy); - copy -= left; - skip += copy; - to += copy; - bytes -= copy; - - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyin(to, buf, copy); - copy -= left; - skip = copy; - to += copy; - bytes -= copy; - } - if (likely(!bytes)) { - kunmap_atomic(kaddr); - goto done; - } - offset = to - kaddr; - buf += copy; - kunmap_atomic(kaddr); - copy = min(bytes, iov->iov_len - skip); - } - /* Too bad - revert to non-atomic kmap */ - - kaddr = kmap(page); - to = kaddr + offset; - left = copyin(to, buf, copy); - copy -= left; - skip += copy; - to += copy; - bytes -= copy; - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyin(to, buf, copy); - copy -= left; - skip = copy; - to += copy; - bytes -= copy; - } - kunmap(page); - -done: - if (skip == iov->iov_len) { - iov++; - skip = 0; - } - i->count -= wanted - bytes; - i->nr_segs -= iov - i->iov; - i->iov = iov; - i->iov_offset = skip; - return wanted - bytes; -} - #ifdef PIPE_PARANOIA static bool sanity(const struct iov_iter *i) { @@ -848,24 +680,14 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (likely(iter_is_iovec(i))) - return copy_page_to_iter_iovec(page, offset, bytes, i); - if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { + if (unlikely(iov_iter_is_pipe(i))) { + return copy_page_to_iter_pipe(page, offset, bytes, i); + } else { void *kaddr = kmap_local_page(page); size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); kunmap_local(kaddr); return wanted; } - if (iov_iter_is_pipe(i)) - return copy_page_to_iter_pipe(page, offset, bytes, i); - if (unlikely(iov_iter_is_discard(i))) { - if (unlikely(i->count < bytes)) - bytes = i->count; - i->count -= bytes; - return bytes; - } - WARN_ON(1); - return 0; } size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, @@ -896,17 +718,12 @@ EXPORT_SYMBOL(copy_page_to_iter); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (unlikely(!page_copy_sane(page, offset, bytes))) - return 0; - if (likely(iter_is_iovec(i))) - return copy_page_from_iter_iovec(page, offset, bytes, i); - if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { + if (page_copy_sane(page, offset, bytes)) { void *kaddr = kmap_local_page(page); size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); kunmap_local(kaddr); return wanted; } - WARN_ON(1); return 0; } EXPORT_SYMBOL(copy_page_from_iter); @@ -1029,17 +846,22 @@ static void pipe_advance(struct iov_iter *i, size_t size) static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) { - struct bvec_iter bi; + const struct bio_vec *bvec, *end; - bi.bi_size = i->count; - bi.bi_bvec_done = i->iov_offset; - bi.bi_idx = 0; - bvec_iter_advance(i->bvec, &bi, size); + if (!i->count) + return; + i->count -= size; + + size += i->iov_offset; - i->bvec += bi.bi_idx; - i->nr_segs -= bi.bi_idx; - i->count = bi.bi_size; - i->iov_offset = bi.bi_bvec_done; + for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) { + if (likely(size < bvec->bv_len)) + break; + size -= bvec->bv_len; + } + i->iov_offset = size; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; } static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) @@ -1557,47 +1379,36 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, } /* must be done on non-empty ITER_IOVEC one */ -static unsigned long first_iovec_segment(const struct iov_iter *i, - size_t *size, size_t *start, - size_t maxsize, unsigned maxpages) +static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) { size_t skip; long k; for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { - unsigned long addr = (unsigned long)i->iov[k].iov_base + skip; size_t len = i->iov[k].iov_len - skip; if (unlikely(!len)) continue; - if (len > maxsize) - len = maxsize; - len += (*start = addr % PAGE_SIZE); - if (len > maxpages * PAGE_SIZE) - len = maxpages * PAGE_SIZE; - *size = len; - return addr & PAGE_MASK; + if (*size > len) + *size = len; + return (unsigned long)i->iov[k].iov_base + skip; } BUG(); // if it had been empty, we wouldn't get called } /* must be done on non-empty ITER_BVEC one */ static struct page *first_bvec_segment(const struct iov_iter *i, - size_t *size, size_t *start, - size_t maxsize, unsigned maxpages) + size_t *size, size_t *start) { struct page *page; size_t skip = i->iov_offset, len; len = i->bvec->bv_len - skip; - if (len > maxsize) - len = maxsize; + if (*size > len) + *size = len; skip += i->bvec->bv_offset; page = i->bvec->bv_page + skip / PAGE_SIZE; - len += (*start = skip % PAGE_SIZE); - if (len > maxpages * PAGE_SIZE) - len = maxpages * PAGE_SIZE; - *size = len; + *start = skip % PAGE_SIZE; return page; } @@ -1605,13 +1416,14 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { - size_t len; int n, res; if (maxsize > i->count) maxsize = i->count; if (!maxsize) return 0; + if (maxsize > MAX_RW_COUNT) + maxsize = MAX_RW_COUNT; if (likely(iter_is_iovec(i))) { unsigned int gup_flags = 0; @@ -1622,21 +1434,27 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &len, start, maxsize, maxpages); - n = DIV_ROUND_UP(len, PAGE_SIZE); + addr = first_iovec_segment(i, &maxsize); + *start = addr % PAGE_SIZE; + addr &= PAGE_MASK; + n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); + if (n > maxpages) + n = maxpages; res = get_user_pages_fast(addr, n, gup_flags, pages); if (unlikely(res <= 0)) return res; - return (res == n ? len : res * PAGE_SIZE) - *start; + return min_t(size_t, maxsize, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { struct page *page; - page = first_bvec_segment(i, &len, start, maxsize, maxpages); - n = DIV_ROUND_UP(len, PAGE_SIZE); - while (n--) + page = first_bvec_segment(i, &maxsize, start); + n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); + if (n > maxpages) + n = maxpages; + for (int k = 0; k < n; k++) get_page(*pages++ = page++); - return len - *start; + return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) return pipe_get_pages(i, pages, maxsize, maxpages, start); @@ -1725,13 +1543,14 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; - size_t len; int n, res; if (maxsize > i->count) maxsize = i->count; if (!maxsize) return 0; + if (maxsize > MAX_RW_COUNT) + maxsize = MAX_RW_COUNT; if (likely(iter_is_iovec(i))) { unsigned int gup_flags = 0; @@ -1742,8 +1561,10 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &len, start, maxsize, ~0U); - n = DIV_ROUND_UP(len, PAGE_SIZE); + addr = first_iovec_segment(i, &maxsize); + *start = addr % PAGE_SIZE; + addr &= PAGE_MASK; + n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); p = get_pages_array(n); if (!p) return -ENOMEM; @@ -1754,19 +1575,19 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return res; } *pages = p; - return (res == n ? len : res * PAGE_SIZE) - *start; + return min_t(size_t, maxsize, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { struct page *page; - page = first_bvec_segment(i, &len, start, maxsize, ~0U); - n = DIV_ROUND_UP(len, PAGE_SIZE); + page = first_bvec_segment(i, &maxsize, start); + n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); *pages = p = get_pages_array(n); if (!p) return -ENOMEM; - while (n--) + for (int k = 0; k < n; k++) get_page(*p++ = page++); - return len - *start; + return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) return pipe_get_pages_alloc(i, pages, maxsize, start); |