diff options
author | David Howells <dhowells@redhat.com> | 2024-06-19 00:20:42 +0100 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2024-09-12 12:20:21 +0200 |
commit | db0aa2e9566fda2d23dc8f6c102856ead95578a4 (patch) | |
tree | d39f09396b198bfe21a67cc96d419cda12c3fa7e /lib | |
parent | 22de489d1e9d6c30bf9554a9c58bf1dc8d951f7d (diff) |
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios
Define a data structure, struct folio_queue, to represent a sequence of
folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a
list of folio_queue structures to be used to provide a buffer to
iov_iter-taking functions, such as sendmsg and recvmsg.
The folio_queue structure looks like:
struct folio_queue {
struct folio_batch vec;
u8 orders[PAGEVEC_SIZE];
struct folio_queue *next;
struct folio_queue *prev;
unsigned long marks;
unsigned long marks2;
};
It does not use a list_head so that next and/or prev can be set to NULL at
the ends of the list, allowing iov_iter-handling routines to determine that
they *are* the ends without needing to store a head pointer in the iov_iter
struct.
A folio_batch struct is used to hold the folio pointers which allows the
batch to be passed to batch handling functions. Two mark bits are
available per slot. The intention is to use at least one of them to mark
folios that need putting, but that might not be ultimately necessary.
Accessor functions are used to access the slots to do the masking and an
additional accessor function is used to indicate the size of the array.
The order of each folio is also stored in the structure to avoid the need
for iov_iter_advance() and iov_iter_revert() to have to query each folio to
find its size.
With careful barriering, this can be used as an extending buffer with new
folios inserted and new folio_queue structs added without the need for a
lock. Further, provided we always keep at least one struct in the buffer,
we can also remove consumed folios and consumed structs from the head end
as we without the need for locks.
[Questions/thoughts]
(1) To manage this, I need a head pointer, a tail pointer, a tail slot
number (assuming insertion happens at the tail end and the next
pointers point from head to tail). Should I put these into a struct
of their own, say "folio_queue_head" or "rolling_buffer"?
I will end up with two of these in netfs_io_request eventually, one
keeping track of the pagecache I'm dealing with for buffered I/O and
the other to hold a bounce buffer when we need one.
(2) Should I make the slots {folio,off,len} or bio_vec?
(3) This is intended to replace ITER_XARRAY eventually. Using an xarray
in I/O iteration requires the taking of the RCU read lock, doing
copying under the RCU read lock, walking the xarray (which may change
under us), handling retries and dealing with special values.
The advantage of ITER_XARRAY is that when we're dealing with the
pagecache directly, we don't need any allocation - but if we're doing
encrypted comms, there's a good chance we'd be using a bounce buffer
anyway.
This will require afs, erofs, cifs, orangefs and fscache to be
converted to not use this. afs still uses it for dirs and symlinks;
some of erofs usages should be easy to change, but there's one which
won't be so easy; ceph's use via fscache can be fixed by porting ceph
to netfslib; cifs is using xarray as a bounce buffer - that can be
moved to use sheaves instead; and orangefs has a similar problem to
erofs - maybe orangefs could use netfslib?
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <sfrench@samba.org>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Gao Xiang <xiang@kernel.org>
cc: Mike Marshall <hubcap@omnibond.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: linux-erofs@lists.ozlabs.org
cc: devel@lists.orangefs.org
Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/iov_iter.c | 240 | ||||
-rw-r--r-- | lib/kunit_iov_iter.c | 259 | ||||
-rw-r--r-- | lib/scatterlist.c | 69 |
3 files changed, 564 insertions, 4 deletions
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 4a6a9f419bd7..97003155bfac 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -527,6 +527,39 @@ static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) i->__iov = iov; } +static void iov_iter_folioq_advance(struct iov_iter *i, size_t size) +{ + const struct folio_queue *folioq = i->folioq; + unsigned int slot = i->folioq_slot; + + if (!i->count) + return; + i->count -= size; + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + slot = 0; + } + + size += i->iov_offset; /* From beginning of current segment. */ + do { + size_t fsize = folioq_folio_size(folioq, slot); + + if (likely(size < fsize)) + break; + size -= fsize; + slot++; + if (slot >= folioq_nr_slots(folioq) && folioq->next) { + folioq = folioq->next; + slot = 0; + } + } while (size); + + i->iov_offset = size; + i->folioq_slot = slot; + i->folioq = folioq; +} + void iov_iter_advance(struct iov_iter *i, size_t size) { if (unlikely(i->count < size)) @@ -539,12 +572,40 @@ void iov_iter_advance(struct iov_iter *i, size_t size) iov_iter_iovec_advance(i, size); } else if (iov_iter_is_bvec(i)) { iov_iter_bvec_advance(i, size); + } else if (iov_iter_is_folioq(i)) { + iov_iter_folioq_advance(i, size); } else if (iov_iter_is_discard(i)) { i->count -= size; } } EXPORT_SYMBOL(iov_iter_advance); +static void iov_iter_folioq_revert(struct iov_iter *i, size_t unroll) +{ + const struct folio_queue *folioq = i->folioq; + unsigned int slot = i->folioq_slot; + + for (;;) { + size_t fsize; + + if (slot == 0) { + folioq = folioq->prev; + slot = folioq_nr_slots(folioq); + } + slot--; + + fsize = folioq_folio_size(folioq, slot); + if (unroll <= fsize) { + i->iov_offset = fsize - unroll; + break; + } + unroll -= fsize; + } + + i->folioq_slot = slot; + i->folioq = folioq; +} + void iov_iter_revert(struct iov_iter *i, size_t unroll) { if (!unroll) @@ -576,6 +637,9 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) } unroll -= n; } + } else if (iov_iter_is_folioq(i)) { + i->iov_offset = 0; + iov_iter_folioq_revert(i, unroll); } else { /* same logics for iovec and kvec */ const struct iovec *iov = iter_iov(i); while (1) { @@ -603,6 +667,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) if (iov_iter_is_bvec(i)) return min(i->count, i->bvec->bv_len - i->iov_offset); } + if (unlikely(iov_iter_is_folioq(i))) + return !i->count ? 0 : + umin(folioq_folio_size(i->folioq, i->folioq_slot), i->count); return i->count; } EXPORT_SYMBOL(iov_iter_single_seg_count); @@ -640,6 +707,36 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, EXPORT_SYMBOL(iov_iter_bvec); /** + * iov_iter_folio_queue - Initialise an I/O iterator to use the folios in a folio queue + * @i: The iterator to initialise. + * @direction: The direction of the transfer. + * @folioq: The starting point in the folio queue. + * @first_slot: The first slot in the folio queue to use + * @offset: The offset into the folio in the first slot to start at + * @count: The size of the I/O buffer in bytes. + * + * Set up an I/O iterator to either draw data out of the pages attached to an + * inode or to inject data into those pages. The pages *must* be prevented + * from evaporation, either by taking a ref on them or locking them by the + * caller. + */ +void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction, + const struct folio_queue *folioq, unsigned int first_slot, + unsigned int offset, size_t count) +{ + BUG_ON(direction & ~1); + *i = (struct iov_iter) { + .iter_type = ITER_FOLIOQ, + .data_source = direction, + .folioq = folioq, + .folioq_slot = first_slot, + .count = count, + .iov_offset = offset, + }; +} +EXPORT_SYMBOL(iov_iter_folio_queue); + +/** * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray * @i: The iterator to initialise. * @direction: The direction of the transfer. @@ -765,12 +862,19 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, if (iov_iter_is_bvec(i)) return iov_iter_aligned_bvec(i, addr_mask, len_mask); + /* With both xarray and folioq types, we're dealing with whole folios. */ if (iov_iter_is_xarray(i)) { if (i->count & len_mask) return false; if ((i->xarray_start + i->iov_offset) & addr_mask) return false; } + if (iov_iter_is_folioq(i)) { + if (i->count & len_mask) + return false; + if (i->iov_offset & addr_mask) + return false; + } return true; } @@ -835,6 +939,9 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) if (iov_iter_is_bvec(i)) return iov_iter_alignment_bvec(i); + /* With both xarray and folioq types, we're dealing with whole folios. */ + if (iov_iter_is_folioq(i)) + return i->iov_offset | i->count; if (iov_iter_is_xarray(i)) return (i->xarray_start + i->iov_offset) | i->count; @@ -887,6 +994,62 @@ static int want_pages_array(struct page ***res, size_t size, return count; } +static ssize_t iter_folioq_get_pages(struct iov_iter *iter, + struct page ***ppages, size_t maxsize, + unsigned maxpages, size_t *_start_offset) +{ + const struct folio_queue *folioq = iter->folioq; + struct page **pages; + unsigned int slot = iter->folioq_slot; + size_t extracted = 0, count = iter->count, iov_offset = iter->iov_offset; + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + slot = 0; + if (WARN_ON(iov_offset != 0)) + return -EIO; + } + + maxpages = want_pages_array(ppages, maxsize, iov_offset & ~PAGE_MASK, maxpages); + if (!maxpages) + return -ENOMEM; + *_start_offset = iov_offset & ~PAGE_MASK; + pages = *ppages; + + for (;;) { + struct folio *folio = folioq_folio(folioq, slot); + size_t offset = iov_offset, fsize = folioq_folio_size(folioq, slot); + size_t part = PAGE_SIZE - offset % PAGE_SIZE; + + part = umin(part, umin(maxsize - extracted, fsize - offset)); + count -= part; + iov_offset += part; + extracted += part; + + *pages = folio_page(folio, offset / PAGE_SIZE); + get_page(*pages); + pages++; + maxpages--; + if (maxpages == 0 || extracted >= maxsize) + break; + + if (offset >= fsize) { + iov_offset = 0; + slot++; + if (slot == folioq_nr_slots(folioq) && folioq->next) { + folioq = folioq->next; + slot = 0; + } + } + } + + iter->count = count; + iter->iov_offset = iov_offset; + iter->folioq = folioq; + iter->folioq_slot = slot; + return extracted; +} + static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, pgoff_t index, unsigned int nr_pages) { @@ -1034,6 +1197,8 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, } return maxsize; } + if (iov_iter_is_folioq(i)) + return iter_folioq_get_pages(i, pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); return -EFAULT; @@ -1118,6 +1283,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) return iov_npages(i, maxpages); if (iov_iter_is_bvec(i)) return bvec_npages(i, maxpages); + if (iov_iter_is_folioq(i)) { + unsigned offset = i->iov_offset % PAGE_SIZE; + int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); + return min(npages, maxpages); + } if (iov_iter_is_xarray(i)) { unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE; int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); @@ -1399,6 +1569,68 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) } /* + * Extract a list of contiguous pages from an ITER_FOLIOQ iterator. This does + * not get references on the pages, nor does it get a pin on them. + */ +static ssize_t iov_iter_extract_folioq_pages(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + const struct folio_queue *folioq = i->folioq; + struct page **p; + unsigned int nr = 0; + size_t extracted = 0, offset, slot = i->folioq_slot; + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + slot = 0; + if (WARN_ON(i->iov_offset != 0)) + return -EIO; + } + + offset = i->iov_offset & ~PAGE_MASK; + *offset0 = offset; + + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + p = *pages; + + for (;;) { + struct folio *folio = folioq_folio(folioq, slot); + size_t offset = i->iov_offset, fsize = folioq_folio_size(folioq, slot); + size_t part = PAGE_SIZE - offset % PAGE_SIZE; + + if (offset < fsize) { + part = umin(part, umin(maxsize - extracted, fsize - offset)); + i->count -= part; + i->iov_offset += part; + extracted += part; + + p[nr++] = folio_page(folio, offset / PAGE_SIZE); + } + + if (nr >= maxpages || extracted >= maxsize) + break; + + if (i->iov_offset >= fsize) { + i->iov_offset = 0; + slot++; + if (slot == folioq_nr_slots(folioq) && folioq->next) { + folioq = folioq->next; + slot = 0; + } + } + } + + i->folioq = folioq; + i->folioq_slot = slot; + return extracted; +} + +/* * Extract a list of contiguous pages from an ITER_XARRAY iterator. This does not * get references on the pages, nor does it get a pin on them. */ @@ -1618,8 +1850,8 @@ static ssize_t iov_iter_extract_user_pages(struct iov_iter *i, * added to the pages, but refs will not be taken. * iov_iter_extract_will_pin() will return true. * - * (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are - * merely listed; no extra refs or pins are obtained. + * (*) If the iterator is ITER_KVEC, ITER_BVEC, ITER_FOLIOQ or ITER_XARRAY, the + * pages are merely listed; no extra refs or pins are obtained. * iov_iter_extract_will_pin() will return 0. * * Note also: @@ -1654,6 +1886,10 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, return iov_iter_extract_bvec_pages(i, pages, maxsize, maxpages, extraction_flags, offset0); + if (iov_iter_is_folioq(i)) + return iov_iter_extract_folioq_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); if (iov_iter_is_xarray(i)) return iov_iter_extract_xarray_pages(i, pages, maxsize, maxpages, extraction_flags, diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c index 27e0c8ee71d8..13e15687675a 100644 --- a/lib/kunit_iov_iter.c +++ b/lib/kunit_iov_iter.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/uio.h> #include <linux/bvec.h> +#include <linux/folio_queue.h> #include <kunit/test.h> MODULE_DESCRIPTION("iov_iter testing"); @@ -62,6 +63,9 @@ static void *__init iov_kunit_create_buffer(struct kunit *test, KUNIT_ASSERT_EQ(test, got, npages); } + for (int i = 0; i < npages; i++) + pages[i]->index = i; + buffer = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer); @@ -362,6 +366,179 @@ stop: KUNIT_SUCCEED(test); } +static void iov_kunit_destroy_folioq(void *data) +{ + struct folio_queue *folioq, *next; + + for (folioq = data; folioq; folioq = next) { + next = folioq->next; + for (int i = 0; i < folioq_nr_slots(folioq); i++) + if (folioq_folio(folioq, i)) + folio_put(folioq_folio(folioq, i)); + kfree(folioq); + } +} + +static void __init iov_kunit_load_folioq(struct kunit *test, + struct iov_iter *iter, int dir, + struct folio_queue *folioq, + struct page **pages, size_t npages) +{ + struct folio_queue *p = folioq; + size_t size = 0; + int i; + + for (i = 0; i < npages; i++) { + if (folioq_full(p)) { + p->next = kzalloc(sizeof(struct folio_queue), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p->next); + folioq_init(p->next); + p->next->prev = p; + p = p->next; + } + folioq_append(p, page_folio(pages[i])); + size += PAGE_SIZE; + } + iov_iter_folio_queue(iter, dir, folioq, 0, 0, size); +} + +static struct folio_queue *iov_kunit_create_folioq(struct kunit *test) +{ + struct folio_queue *folioq; + + folioq = kzalloc(sizeof(struct folio_queue), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, folioq); + kunit_add_action_or_reset(test, iov_kunit_destroy_folioq, folioq); + folioq_init(folioq); + return folioq; +} + +/* + * Test copying to a ITER_FOLIOQ-type iterator. + */ +static void __init iov_kunit_copy_to_folioq(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct folio_queue *folioq; + struct page **spages, **bpages; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, patt; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + folioq = iov_kunit_create_folioq(test); + + scratch = iov_kunit_create_buffer(test, &spages, npages); + for (i = 0; i < bufsize; i++) + scratch[i] = pattern(i); + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + memset(buffer, 0, bufsize); + + iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages); + + i = 0; + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + size = pr->to - pr->from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_folio_queue(&iter, READ, folioq, 0, 0, pr->to); + iov_iter_advance(&iter, pr->from); + copied = copy_to_iter(scratch + i, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to % PAGE_SIZE); + i += size; + if (test->status == KUNIT_FAILURE) + goto stop; + } + + /* Build the expected image in the scratch buffer. */ + patt = 0; + memset(scratch, 0, bufsize); + for (pr = kvec_test_ranges; pr->from >= 0; pr++) + for (i = pr->from; i < pr->to; i++) + scratch[i] = pattern(patt++); + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i); + if (buffer[i] != scratch[i]) + return; + } + +stop: + KUNIT_SUCCEED(test); +} + +/* + * Test copying from a ITER_FOLIOQ-type iterator. + */ +static void __init iov_kunit_copy_from_folioq(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct folio_queue *folioq; + struct page **spages, **bpages; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, j; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + folioq = iov_kunit_create_folioq(test); + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + for (i = 0; i < bufsize; i++) + buffer[i] = pattern(i); + + scratch = iov_kunit_create_buffer(test, &spages, npages); + memset(scratch, 0, bufsize); + + iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages); + + i = 0; + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + size = pr->to - pr->from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_folio_queue(&iter, WRITE, folioq, 0, 0, pr->to); + iov_iter_advance(&iter, pr->from); + copied = copy_from_iter(scratch + i, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to % PAGE_SIZE); + i += size; + } + + /* Build the expected image in the main buffer. */ + i = 0; + memset(buffer, 0, bufsize); + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + for (j = pr->from; j < pr->to; j++) { + buffer[i++] = pattern(j); + if (i >= bufsize) + goto stop; + } + } +stop: + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i); + if (scratch[i] != buffer[i]) + return; + } + + KUNIT_SUCCEED(test); +} + static void iov_kunit_destroy_xarray(void *data) { struct xarray *xarray = data; @@ -678,6 +855,85 @@ stop: } /* + * Test the extraction of ITER_FOLIOQ-type iterators. + */ +static void __init iov_kunit_extract_pages_folioq(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct folio_queue *folioq; + struct iov_iter iter; + struct page **bpages, *pagelist[8], **pages = pagelist; + ssize_t len; + size_t bufsize, size = 0, npages; + int i, from; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + folioq = iov_kunit_create_folioq(test); + + iov_kunit_create_buffer(test, &bpages, npages); + iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages); + + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + from = pr->from; + size = pr->to - from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_folio_queue(&iter, WRITE, folioq, 0, 0, pr->to); + iov_iter_advance(&iter, from); + + do { + size_t offset0 = LONG_MAX; + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) + pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL; + + len = iov_iter_extract_pages(&iter, &pages, 100 * 1024, + ARRAY_SIZE(pagelist), 0, &offset0); + KUNIT_EXPECT_GE(test, len, 0); + if (len < 0) + break; + KUNIT_EXPECT_LE(test, len, size); + KUNIT_EXPECT_EQ(test, iter.count, size - len); + if (len == 0) + break; + size -= len; + KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0); + KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE); + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) { + struct page *p; + ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0); + int ix; + + KUNIT_ASSERT_GE(test, part, 0); + ix = from / PAGE_SIZE; + KUNIT_ASSERT_LT(test, ix, npages); + p = bpages[ix]; + KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p); + KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE); + from += part; + len -= part; + KUNIT_ASSERT_GE(test, len, 0); + if (len == 0) + break; + offset0 = 0; + } + + if (test->status == KUNIT_FAILURE) + goto stop; + } while (iov_iter_count(&iter) > 0); + + KUNIT_EXPECT_EQ(test, size, 0); + KUNIT_EXPECT_EQ(test, iter.count, 0); + } + +stop: + KUNIT_SUCCEED(test); +} + +/* * Test the extraction of ITER_XARRAY-type iterators. */ static void __init iov_kunit_extract_pages_xarray(struct kunit *test) @@ -761,10 +1017,13 @@ static struct kunit_case __refdata iov_kunit_cases[] = { KUNIT_CASE(iov_kunit_copy_from_kvec), KUNIT_CASE(iov_kunit_copy_to_bvec), KUNIT_CASE(iov_kunit_copy_from_bvec), + KUNIT_CASE(iov_kunit_copy_to_folioq), + KUNIT_CASE(iov_kunit_copy_from_folioq), KUNIT_CASE(iov_kunit_copy_to_xarray), KUNIT_CASE(iov_kunit_copy_from_xarray), KUNIT_CASE(iov_kunit_extract_pages_kvec), KUNIT_CASE(iov_kunit_extract_pages_bvec), + KUNIT_CASE(iov_kunit_extract_pages_folioq), KUNIT_CASE(iov_kunit_extract_pages_xarray), {} }; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7bc2220fea80..473b2646f71c 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -11,6 +11,7 @@ #include <linux/kmemleak.h> #include <linux/bvec.h> #include <linux/uio.h> +#include <linux/folio_queue.h> /** * sg_next - return the next scatterlist entry in a list @@ -1262,6 +1263,67 @@ static ssize_t extract_kvec_to_sg(struct iov_iter *iter, } /* + * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to + * the scatterlist. The pages are not pinned. + */ +static ssize_t extract_folioq_to_sg(struct iov_iter *iter, + ssize_t maxsize, + struct sg_table *sgtable, + unsigned int sg_max, + iov_iter_extraction_t extraction_flags) +{ + const struct folio_queue *folioq = iter->folioq; + struct scatterlist *sg = sgtable->sgl + sgtable->nents; + unsigned int slot = iter->folioq_slot; + ssize_t ret = 0; + size_t offset = iter->iov_offset; + + BUG_ON(!folioq); + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + if (WARN_ON_ONCE(!folioq)) + return 0; + slot = 0; + } + + do { + struct folio *folio = folioq_folio(folioq, slot); + size_t fsize = folioq_folio_size(folioq, slot); + + if (offset < fsize) { + size_t part = umin(maxsize - ret, fsize - offset); + + sg_set_page(sg, folio_page(folio, 0), part, offset); + sgtable->nents++; + sg++; + sg_max--; + offset += part; + ret += part; + } + + if (offset >= fsize) { + offset = 0; + slot++; + if (slot >= folioq_nr_slots(folioq)) { + if (!folioq->next) { + WARN_ON_ONCE(ret < iter->count); + break; + } + folioq = folioq->next; + slot = 0; + } + } + } while (sg_max > 0 && ret < maxsize); + + iter->folioq = folioq; + iter->folioq_slot = slot; + iter->iov_offset = offset; + iter->count -= ret; + return ret; +} + +/* * Extract up to sg_max folios from an XARRAY-type iterator and add them to * the scatterlist. The pages are not pinned. */ @@ -1323,8 +1385,8 @@ static ssize_t extract_xarray_to_sg(struct iov_iter *iter, * addition of @sg_max elements. * * The pages referred to by UBUF- and IOVEC-type iterators are extracted and - * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE- - * and DISCARD-type are not supported. + * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't + * pinned; DISCARD-type is not supported. * * No end mark is placed on the scatterlist; that's left to the caller. * @@ -1356,6 +1418,9 @@ ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, case ITER_KVEC: return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max, extraction_flags); + case ITER_FOLIOQ: + return extract_folioq_to_sg(iter, maxsize, sgtable, sg_max, + extraction_flags); case ITER_XARRAY: return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max, extraction_flags); |