diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:14:04 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:14:04 -0800 |
commit | 4a6bff1187409f2c2ba1b17234541d314f0680fc (patch) | |
tree | 8dcc29cbfde92d6fdc04cb4ff2af7bb1a1dda9fc /fs | |
parent | ad0d9da164cb52e62637e427517b2060dc956a2d (diff) | |
parent | c505feba4c0d76084e56ec498ce819f02a7043ae (diff) |
Merge tag 'erofs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang:
"In this cycle, large folios are now enabled in the iomap/fscache mode
for uncompressed files first. In order to do that, we've also cleaned
up better interfaces between erofs and fscache, which are acked by
fscache/netfs folks and included in this pull request.
Other than that, there are random fixes around erofs over fscache and
crafted images by syzbot, minor cleanups and documentation updates.
Summary:
- Enable large folios for iomap/fscache mode
- Avoid sysfs warning due to mounting twice with the same fsid and
domain_id in fscache mode
- Refine fscache interface among erofs, fscache, and cachefiles
- Use kmap_local_page() only for metabuf
- Fixes around crafted images found by syzbot
- Minor cleanups and documentation updates"
* tag 'erofs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: validate the extent length for uncompressed pclusters
erofs: fix missing unmap if z_erofs_get_extent_compressedlen() fails
erofs: Fix pcluster memleak when its block address is zero
erofs: use kmap_local_page() only for erofs_bread()
erofs: enable large folios for fscache mode
erofs: support large folios for fscache mode
erofs: switch to prepare_ondemand_read() in fscache mode
fscache,cachefiles: add prepare_ondemand_read() callback
erofs: clean up cached I/O strategies
erofs: update documentation
erofs: check the uniqueness of fsid in shared domain in advance
erofs: enable large folios for iomap mode
Diffstat (limited to 'fs')
-rw-r--r-- | fs/cachefiles/io.c | 77 | ||||
-rw-r--r-- | fs/erofs/data.c | 10 | ||||
-rw-r--r-- | fs/erofs/fscache.c | 408 | ||||
-rw-r--r-- | fs/erofs/inode.c | 2 | ||||
-rw-r--r-- | fs/erofs/internal.h | 13 | ||||
-rw-r--r-- | fs/erofs/super.c | 2 | ||||
-rw-r--r-- | fs/erofs/xattr.c | 8 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 80 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 15 |
9 files changed, 297 insertions, 318 deletions
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 000a28f46e59..175a25fcade8 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -385,38 +385,35 @@ static int cachefiles_write(struct netfs_cache_resources *cres, term_func, term_func_priv); } -/* - * Prepare a read operation, shortening it to a cached/uncached - * boundary as appropriate. - */ -static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq, - loff_t i_size) +static inline enum netfs_io_source +cachefiles_do_prepare_read(struct netfs_cache_resources *cres, + loff_t start, size_t *_len, loff_t i_size, + unsigned long *_flags, ino_t netfs_ino) { enum cachefiles_prepare_read_trace why; - struct netfs_io_request *rreq = subreq->rreq; - struct netfs_cache_resources *cres = &rreq->cache_resources; - struct cachefiles_object *object; + struct cachefiles_object *object = NULL; struct cachefiles_cache *cache; struct fscache_cookie *cookie = fscache_cres_cookie(cres); const struct cred *saved_cred; struct file *file = cachefiles_cres_file(cres); enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER; + size_t len = *_len; loff_t off, to; ino_t ino = file ? file_inode(file)->i_ino : 0; int rc; - _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); + _enter("%zx @%llx/%llx", len, start, i_size); - if (subreq->start >= i_size) { + if (start >= i_size) { ret = NETFS_FILL_WITH_ZEROES; why = cachefiles_trace_read_after_eof; goto out_no_object; } if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) { - __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); + __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags); why = cachefiles_trace_read_no_data; - if (!test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) + if (!test_bit(NETFS_SREQ_ONDEMAND, _flags)) goto out_no_object; } @@ -437,7 +434,7 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest * retry: off = cachefiles_inject_read_error(); if (off == 0) - off = vfs_llseek(file, subreq->start, SEEK_DATA); + off = vfs_llseek(file, start, SEEK_DATA); if (off < 0 && off >= (loff_t)-MAX_ERRNO) { if (off == (loff_t)-ENXIO) { why = cachefiles_trace_read_seek_nxio; @@ -449,21 +446,22 @@ retry: goto out; } - if (off >= subreq->start + subreq->len) { + if (off >= start + len) { why = cachefiles_trace_read_found_hole; goto download_and_store; } - if (off > subreq->start) { + if (off > start) { off = round_up(off, cache->bsize); - subreq->len = off - subreq->start; + len = off - start; + *_len = len; why = cachefiles_trace_read_found_part; goto download_and_store; } to = cachefiles_inject_read_error(); if (to == 0) - to = vfs_llseek(file, subreq->start, SEEK_HOLE); + to = vfs_llseek(file, start, SEEK_HOLE); if (to < 0 && to >= (loff_t)-MAX_ERRNO) { trace_cachefiles_io_error(object, file_inode(file), to, cachefiles_trace_seek_error); @@ -471,12 +469,13 @@ retry: goto out; } - if (to < subreq->start + subreq->len) { - if (subreq->start + subreq->len >= i_size) + if (to < start + len) { + if (start + len >= i_size) to = round_up(to, cache->bsize); else to = round_down(to, cache->bsize); - subreq->len = to - subreq->start; + len = to - start; + *_len = len; } why = cachefiles_trace_read_have_data; @@ -484,12 +483,11 @@ retry: goto out; download_and_store: - __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); - if (test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) { - rc = cachefiles_ondemand_read(object, subreq->start, - subreq->len); + __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags); + if (test_bit(NETFS_SREQ_ONDEMAND, _flags)) { + rc = cachefiles_ondemand_read(object, start, len); if (!rc) { - __clear_bit(NETFS_SREQ_ONDEMAND, &subreq->flags); + __clear_bit(NETFS_SREQ_ONDEMAND, _flags); goto retry; } ret = NETFS_INVALID_READ; @@ -497,11 +495,35 @@ download_and_store: out: cachefiles_end_secure(cache, saved_cred); out_no_object: - trace_cachefiles_prep_read(subreq, ret, why, ino); + trace_cachefiles_prep_read(object, start, len, *_flags, ret, why, ino, netfs_ino); return ret; } /* + * Prepare a read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ +static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq, + loff_t i_size) +{ + return cachefiles_do_prepare_read(&subreq->rreq->cache_resources, + subreq->start, &subreq->len, i_size, + &subreq->flags, subreq->rreq->inode->i_ino); +} + +/* + * Prepare an on-demand read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ +static enum netfs_io_source +cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres, + loff_t start, size_t *_len, loff_t i_size, + unsigned long *_flags, ino_t ino) +{ + return cachefiles_do_prepare_read(cres, start, _len, i_size, _flags, ino); +} + +/* * Prepare for a write to occur. */ int __cachefiles_prepare_write(struct cachefiles_object *object, @@ -621,6 +643,7 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { .write = cachefiles_write, .prepare_read = cachefiles_prepare_read, .prepare_write = cachefiles_prepare_write, + .prepare_ondemand_read = cachefiles_prepare_ondemand_read, .query_occupancy = cachefiles_query_occupancy, }; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index fe8ac0e163f7..f57f921683d7 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -13,9 +13,7 @@ void erofs_unmap_metabuf(struct erofs_buf *buf) { if (buf->kmap_type == EROFS_KMAP) - kunmap(buf->page); - else if (buf->kmap_type == EROFS_KMAP_ATOMIC) - kunmap_atomic(buf->base); + kunmap_local(buf->base); buf->base = NULL; buf->kmap_type = EROFS_NO_KMAP; } @@ -54,9 +52,7 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode, } if (buf->kmap_type == EROFS_NO_KMAP) { if (type == EROFS_KMAP) - buf->base = kmap(page); - else if (type == EROFS_KMAP_ATOMIC) - buf->base = kmap_atomic(page); + buf->base = kmap_local_page(page); buf->kmap_type = type; } else if (buf->kmap_type != type) { DBG_BUGON(1); @@ -403,6 +399,8 @@ const struct address_space_operations erofs_raw_access_aops = { .readahead = erofs_readahead, .bmap = erofs_bmap, .direct_IO = noop_direct_IO, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, }; #ifdef CONFIG_FS_DAX diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 4c837be3b6e3..014e20962376 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -11,265 +11,201 @@ static DEFINE_MUTEX(erofs_domain_cookies_lock); static LIST_HEAD(erofs_domain_list); static struct vfsmount *erofs_pseudo_mnt; -static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, +struct erofs_fscache_request { + struct erofs_fscache_request *primary; + struct netfs_cache_resources cache_resources; + struct address_space *mapping; /* The mapping being accessed */ + loff_t start; /* Start position */ + size_t len; /* Length of the request */ + size_t submitted; /* Length of submitted */ + short error; /* 0 or error that occurred */ + refcount_t ref; +}; + +static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping, loff_t start, size_t len) { - struct netfs_io_request *rreq; + struct erofs_fscache_request *req; - rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL); - if (!rreq) + req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL); + if (!req) return ERR_PTR(-ENOMEM); - rreq->start = start; - rreq->len = len; - rreq->mapping = mapping; - rreq->inode = mapping->host; - INIT_LIST_HEAD(&rreq->subrequests); - refcount_set(&rreq->ref, 1); - return rreq; -} + req->mapping = mapping; + req->start = start; + req->len = len; + refcount_set(&req->ref, 1); -static void erofs_fscache_put_request(struct netfs_io_request *rreq) -{ - if (!refcount_dec_and_test(&rreq->ref)) - return; - if (rreq->cache_resources.ops) - rreq->cache_resources.ops->end_operation(&rreq->cache_resources); - kfree(rreq); + return req; } -static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq) +static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary, + size_t len) { - if (!refcount_dec_and_test(&subreq->ref)) - return; - erofs_fscache_put_request(subreq->rreq); - kfree(subreq); -} + struct erofs_fscache_request *req; -static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq) -{ - struct netfs_io_subrequest *subreq; + /* use primary request for the first submission */ + if (!primary->submitted) { + refcount_inc(&primary->ref); + return primary; + } - while (!list_empty(&rreq->subrequests)) { - subreq = list_first_entry(&rreq->subrequests, - struct netfs_io_subrequest, rreq_link); - list_del(&subreq->rreq_link); - erofs_fscache_put_subrequest(subreq); + req = erofs_fscache_req_alloc(primary->mapping, + primary->start + primary->submitted, len); + if (!IS_ERR(req)) { + req->primary = primary; + refcount_inc(&primary->ref); } + return req; } -static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq) +static void erofs_fscache_req_complete(struct erofs_fscache_request *req) { - struct netfs_io_subrequest *subreq; struct folio *folio; - unsigned int iopos = 0; - pgoff_t start_page = rreq->start / PAGE_SIZE; - pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; - bool subreq_failed = false; + bool failed = req->error; + pgoff_t start_page = req->start / PAGE_SIZE; + pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; - XA_STATE(xas, &rreq->mapping->i_pages, start_page); - - subreq = list_first_entry(&rreq->subrequests, - struct netfs_io_subrequest, rreq_link); - subreq_failed = (subreq->error < 0); + XA_STATE(xas, &req->mapping->i_pages, start_page); rcu_read_lock(); xas_for_each(&xas, folio, last_page) { - unsigned int pgpos, pgend; - bool pg_failed = false; - if (xas_retry(&xas, folio)) continue; - - pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; - pgend = pgpos + folio_size(folio); - - for (;;) { - if (!subreq) { - pg_failed = true; - break; - } - - pg_failed |= subreq_failed; - if (pgend < iopos + subreq->len) - break; - - iopos += subreq->len; - if (!list_is_last(&subreq->rreq_link, - &rreq->subrequests)) { - subreq = list_next_entry(subreq, rreq_link); - subreq_failed = (subreq->error < 0); - } else { - subreq = NULL; - subreq_failed = false; - } - if (pgend == iopos) - break; - } - - if (!pg_failed) + if (!failed) folio_mark_uptodate(folio); - folio_unlock(folio); } rcu_read_unlock(); } -static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq) +static void erofs_fscache_req_put(struct erofs_fscache_request *req) { - erofs_fscache_rreq_unlock_folios(rreq); - erofs_fscache_clear_subrequests(rreq); - erofs_fscache_put_request(rreq); + if (refcount_dec_and_test(&req->ref)) { + if (req->cache_resources.ops) + req->cache_resources.ops->end_operation(&req->cache_resources); + if (!req->primary) + erofs_fscache_req_complete(req); + else + erofs_fscache_req_put(req->primary); + kfree(req); + } } -static void erofc_fscache_subreq_complete(void *priv, +static void erofs_fscache_subreq_complete(void *priv, ssize_t transferred_or_error, bool was_async) { - struct netfs_io_subrequest *subreq = priv; - struct netfs_io_request *rreq = subreq->rreq; - - if (IS_ERR_VALUE(transferred_or_error)) - subreq->error = transferred_or_error; + struct erofs_fscache_request *req = priv; - if (atomic_dec_and_test(&rreq->nr_outstanding)) - erofs_fscache_rreq_complete(rreq); - - erofs_fscache_put_subrequest(subreq); + if (IS_ERR_VALUE(transferred_or_error)) { + if (req->primary) + req->primary->error = transferred_or_error; + else + req->error = transferred_or_error; + } + erofs_fscache_req_put(req); } /* - * Read data from fscache and fill the read data into page cache described by - * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes - * the start physical address in the cache file. + * Read data from fscache (cookie, pstart, len), and fill the read data into + * page cache described by (req->mapping, lstart, len). @pstart describeis the + * start physical address in the cache file. */ static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, - struct netfs_io_request *rreq, loff_t pstart) + struct erofs_fscache_request *req, loff_t pstart, size_t len) { enum netfs_io_source source; - struct super_block *sb = rreq->mapping->host->i_sb; - struct netfs_io_subrequest *subreq; - struct netfs_cache_resources *cres = &rreq->cache_resources; + struct super_block *sb = req->mapping->host->i_sb; + struct netfs_cache_resources *cres = &req->cache_resources; struct iov_iter iter; - loff_t start = rreq->start; - size_t len = rreq->len; + loff_t lstart = req->start + req->submitted; size_t done = 0; int ret; - atomic_set(&rreq->nr_outstanding, 1); + DBG_BUGON(len > req->len - req->submitted); ret = fscache_begin_read_operation(cres, cookie); if (ret) - goto out; + return ret; while (done < len) { - subreq = kzalloc(sizeof(struct netfs_io_subrequest), - GFP_KERNEL); - if (subreq) { - INIT_LIST_HEAD(&subreq->rreq_link); - refcount_set(&subreq->ref, 2); - subreq->rreq = rreq; - refcount_inc(&rreq->ref); - } else { - ret = -ENOMEM; - goto out; - } - - subreq->start = pstart + done; - subreq->len = len - done; - subreq->flags = 1 << NETFS_SREQ_ONDEMAND; + loff_t sstart = pstart + done; + size_t slen = len - done; + unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; - list_add_tail(&subreq->rreq_link, &rreq->subrequests); - - source = cres->ops->prepare_read(subreq, LLONG_MAX); - if (WARN_ON(subreq->len == 0)) + source = cres->ops->prepare_ondemand_read(cres, + sstart, &slen, LLONG_MAX, &flags, 0); + if (WARN_ON(slen == 0)) source = NETFS_INVALID_READ; if (source != NETFS_READ_FROM_CACHE) { - erofs_err(sb, "failed to fscache prepare_read (source %d)", - source); - ret = -EIO; - subreq->error = ret; - erofs_fscache_put_subrequest(subreq); - goto out; + erofs_err(sb, "failed to fscache prepare_read (source %d)", source); + return -EIO; } - atomic_inc(&rreq->nr_outstanding); + refcount_inc(&req->ref); + iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages, + lstart + done, slen); - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, - start + done, subreq->len); - - ret = fscache_read(cres, subreq->start, &iter, - NETFS_READ_HOLE_FAIL, - erofc_fscache_subreq_complete, subreq); + ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL, + erofs_fscache_subreq_complete, req); if (ret == -EIOCBQUEUED) ret = 0; if (ret) { erofs_err(sb, "failed to fscache_read (ret %d)", ret); - goto out; + return ret; } - done += subreq->len; + done += slen; } -out: - if (atomic_dec_and_test(&rreq->nr_outstanding)) - erofs_fscache_rreq_complete(rreq); - - return ret; + DBG_BUGON(done != len); + return 0; } static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) { int ret; struct super_block *sb = folio_mapping(folio)->host->i_sb; - struct netfs_io_request *rreq; + struct erofs_fscache_request *req; struct erofs_map_dev mdev = { .m_deviceid = 0, .m_pa = folio_pos(folio), }; ret = erofs_map_dev(sb, &mdev); - if (ret) - goto out; + if (ret) { + folio_unlock(folio); + return ret; + } - rreq = erofs_fscache_alloc_request(folio_mapping(folio), + req = erofs_fscache_req_alloc(folio_mapping(folio), folio_pos(folio), folio_size(folio)); - if (IS_ERR(rreq)) { - ret = PTR_ERR(rreq); - goto out; + if (IS_ERR(req)) { + folio_unlock(folio); + return PTR_ERR(req); } - return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, - rreq, mdev.m_pa); -out: - folio_unlock(folio); + ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, + req, mdev.m_pa, folio_size(folio)); + if (ret) + req->error = ret; + + erofs_fscache_req_put(req); return ret; } -/* - * Read into page cache in the range described by (@pos, @len). - * - * On return, the caller is responsible for page unlocking if the output @unlock - * is true, or the callee will take this responsibility through netfs_io_request - * interface. - * - * The return value is the number of bytes successfully handled, or negative - * error code on failure. The only exception is that, the length of the range - * instead of the error code is returned on failure after netfs_io_request is - * allocated, so that .readahead() could advance rac accordingly. - */ -static int erofs_fscache_data_read(struct address_space *mapping, - loff_t pos, size_t len, bool *unlock) +static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) { + struct address_space *mapping = primary->mapping; struct inode *inode = mapping->host; struct super_block *sb = inode->i_sb; - struct netfs_io_request *rreq; + struct erofs_fscache_request *req; struct erofs_map_blocks map; struct erofs_map_dev mdev; struct iov_iter iter; + loff_t pos = primary->start + primary->submitted; size_t count; int ret; - *unlock = true; - map.m_la = pos; ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); if (ret) @@ -297,17 +233,19 @@ static int erofs_fscache_data_read(struct address_space *mapping, } iov_iter_zero(PAGE_SIZE - size, &iter); erofs_put_metabuf(&buf); - return PAGE_SIZE; + primary->submitted += PAGE_SIZE; + return 0; } + count = primary->len - primary->submitted; if (!(map.m_flags & EROFS_MAP_MAPPED)) { - count = len; iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); iov_iter_zero(count, &iter); - return count; + primary->submitted += count; + return 0; } - count = min_t(size_t, map.m_llen - (pos - map.m_la), len); + count = min_t(size_t, map.m_llen - (pos - map.m_la), count); DBG_BUGON(!count || count % PAGE_SIZE); mdev = (struct erofs_map_dev) { @@ -318,64 +256,65 @@ static int erofs_fscache_data_read(struct address_space *mapping, if (ret) return ret; - rreq = erofs_fscache_alloc_request(mapping, pos, count); - if (IS_ERR(rreq)) - return PTR_ERR(rreq); + req = erofs_fscache_req_chain(primary, count); + if (IS_ERR(req)) + return PTR_ERR(req); - *unlock = false; - erofs_fscache_read_folios_async(mdev.m_fscache->cookie, - rreq, mdev.m_pa + (pos - map.m_la)); - return count; + ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, + req, mdev.m_pa + (pos - map.m_la), count); + erofs_fscache_req_put(req); + primary->submitted += count; + return ret; } -static int erofs_fscache_read_folio(struct file *file, struct folio *folio) +static int erofs_fscache_data_read(struct erofs_fscache_request *req) { - bool unlock; int ret; - DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); + do { + ret = erofs_fscache_data_read_slice(req); + if (ret) + req->error = ret; + } while (!ret && req->submitted < req->len); - ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), - folio_size(folio), &unlock); - if (unlock) { - if (ret > 0) - folio_mark_uptodate(folio); + return ret; +} + +static int erofs_fscache_read_folio(struct file *file, struct folio *folio) +{ + struct erofs_fscache_request *req; + int ret; + + req = erofs_fscache_req_alloc(folio_mapping(folio), + folio_pos(folio), folio_size(folio)); + if (IS_ERR(req)) { folio_unlock(folio); + return PTR_ERR(req); } - return ret < 0 ? ret : 0; + + ret = erofs_fscache_data_read(req); + erofs_fscache_req_put(req); + return ret; } static void erofs_fscache_readahead(struct readahead_control *rac) { - struct folio *folio; - size_t len, done = 0; - loff_t start, pos; - bool unlock; - int ret, size; + struct erofs_fscache_request *req; if (!readahead_count(rac)) return; - start = readahead_pos(rac); - len = readahead_length(rac); + req = erofs_fscache_req_alloc(rac->mapping, + readahead_pos(rac), readahead_length(rac)); + if (IS_ERR(req)) + return; - do { - pos = start + done; - ret = erofs_fscache_data_read(rac->mapping, pos, - len - done, &unlock); - if (ret <= 0) - return; + /* The request completion will drop refs on the folios. */ + while (readahead_folio(rac)) + ; - size = ret; - while (size) { - folio = readahead_folio(rac); - size -= folio_size(folio); - if (unlock) { - folio_mark_uptodate(folio); - folio_unlock(folio); - } - } - } while ((done += ret) < len); + erofs_fscache_data_read(req); + erofs_fscache_req_put(req); } static const struct address_space_operations erofs_fscache_meta_aops = { @@ -494,7 +433,8 @@ static int erofs_fscache_register_domain(struct super_block *sb) static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { struct fscache_volume *volume = EROFS_SB(sb)->volume; struct erofs_fscache *ctx; @@ -516,7 +456,7 @@ struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, fscache_use_cookie(cookie, false); ctx->cookie = cookie; - if (need_inode) { + if (flags & EROFS_REG_COOKIE_NEED_INODE) { struct inode *const inode = new_inode(sb); if (!inode) { @@ -554,14 +494,15 @@ static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) static struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { int err; struct inode *inode; struct erofs_fscache *ctx; struct erofs_domain *domain = EROFS_SB(sb)->domain; - ctx = erofs_fscache_acquire_cookie(sb, name, need_inode); + ctx = erofs_fscache_acquire_cookie(sb, name, flags); if (IS_ERR(ctx)) return ctx; @@ -589,7 +530,8 @@ out: static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { struct inode *inode; struct erofs_fscache *ctx; @@ -602,23 +544,30 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, ctx = inode->i_private; if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) continue; - igrab(inode); + if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { + igrab(inode); + } else { + erofs_err(sb, "%s already exists in domain %s", name, + domain->domain_id); + ctx = ERR_PTR(-EEXIST); + } spin_unlock(&psb->s_inode_list_lock); mutex_unlock(&erofs_domain_cookies_lock); return ctx; } spin_unlock(&psb->s_inode_list_lock); - ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); + ctx = erofs_fscache_domain_init_cookie(sb, name, flags); mutex_unlock(&erofs_domain_cookies_lock); return ctx; } struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { if (EROFS_SB(sb)->domain_id) - return erofs_domain_register_cookie(sb, name, need_inode); - return erofs_fscache_acquire_cookie(sb, name, need_inode); + return erofs_domain_register_cookie(sb, name, flags); + return erofs_fscache_acquire_cookie(sb, name, flags); } void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) @@ -647,6 +596,7 @@ int erofs_fscache_register_fs(struct super_block *sb) int ret; struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_fscache *fscache; + unsigned int flags; if (sbi->domain_id) ret = erofs_fscache_register_domain(sb); @@ -655,8 +605,20 @@ int erofs_fscache_register_fs(struct super_block *sb) if (ret) return ret; - /* acquired domain/volume will be relinquished in kill_sb() on error */ - fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true); + /* + * When shared domain is enabled, using NEED_NOEXIST to guarantee + * the primary data blob (aka fsid) is unique in the shared domain. + * + * For non-shared-domain case, fscache_acquire_volume() invoked by + * erofs_fscache_register_volume() has already guaranteed + * the uniqueness of primary data blob. + * + * Acquired domain/volume will be relinquished in kill_sb() on error. + */ + flags = EROFS_REG_COOKIE_NEED_INODE; + if (sbi->domain_id) + flags |= EROFS_REG_COOKIE_NEED_NOEXIST; + fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); if (IS_ERR(fscache)) return PTR_ERR(fscache); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 2d571343deec..d3b8736fa124 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -268,6 +268,7 @@ static int erofs_fill_inode(struct inode *inode) case S_IFDIR: inode->i_op = &erofs_dir_iops; inode->i_fop = &erofs_dir_fops; + inode_nohighmem(inode); break; case S_IFLNK: err = erofs_fill_symlink(inode, kaddr, ofs); @@ -295,6 +296,7 @@ static int erofs_fill_inode(struct inode *inode) goto out_unlock; } inode->i_mapping->a_ops = &erofs_raw_access_aops; + mapping_set_large_folios(inode->i_mapping); #ifdef CONFIG_EROFS_FS_ONDEMAND if (erofs_is_fscache_mode(inode->i_sb)) inode->i_mapping->a_ops = &erofs_fscache_access_aops; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 05dc68627722..bb8501c0ff5b 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -255,8 +255,7 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) enum erofs_kmap_type { EROFS_NO_KMAP, /* don't map the buffer */ - EROFS_KMAP, /* use kmap() to map the buffer */ - EROFS_KMAP_ATOMIC, /* use kmap_atomic() to map the buffer */ + EROFS_KMAP, /* use kmap_local_page() to map the buffer */ }; struct erofs_buf { @@ -604,13 +603,18 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb, } #endif /* !CONFIG_EROFS_FS_ZIP */ +/* flags for erofs_fscache_register_cookie() */ +#define EROFS_REG_COOKIE_NEED_INODE 1 +#define EROFS_REG_COOKIE_NEED_NOEXIST 2 + /* fscache.c */ #ifdef CONFIG_EROFS_FS_ONDEMAND int erofs_fscache_register_fs(struct super_block *sb); void erofs_fscache_unregister_fs(struct super_block *sb); struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, - char *name, bool need_inode); + char *name, + unsigned int flags); void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); extern const struct address_space_operations erofs_fscache_access_aops; @@ -623,7 +627,8 @@ static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} static inline struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { return ERR_PTR(-EOPNOTSUPP); } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 1c7dcca702b3..481788c24a68 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -245,7 +245,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, } if (erofs_is_fscache_mode(sb)) { - fscache = erofs_fscache_register_cookie(sb, dif->path, false); + fscache = erofs_fscache_register_cookie(sb, dif->path, 0); if (IS_ERR(fscache)) return PTR_ERR(fscache); dif->fscache = fscache; diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 8106bcb5a38d..a62fb8a3318a 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -148,7 +148,7 @@ static inline int xattr_iter_fixup(struct xattr_iter *it) it->blkaddr += erofs_blknr(it->ofs); it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr, - EROFS_KMAP_ATOMIC); + EROFS_KMAP); if (IS_ERR(it->kaddr)) return PTR_ERR(it->kaddr); it->ofs = erofs_blkoff(it->ofs); @@ -174,7 +174,7 @@ static int inline_xattr_iter_begin(struct xattr_iter *it, it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs); it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr, - EROFS_KMAP_ATOMIC); + EROFS_KMAP); if (IS_ERR(it->kaddr)) return PTR_ERR(it->kaddr); return vi->xattr_isize - xattr_header_sz; @@ -368,7 +368,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr, - EROFS_KMAP_ATOMIC); + EROFS_KMAP); if (IS_ERR(it->it.kaddr)) return PTR_ERR(it->it.kaddr); it->it.blkaddr = blkaddr; @@ -580,7 +580,7 @@ static int shared_listxattr(struct listxattr_iter *it) it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr, - EROFS_KMAP_ATOMIC); + EROFS_KMAP); if (IS_ERR(it->it.kaddr)) return PTR_ERR(it->it.kaddr); it->it.blkaddr = blkaddr; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index b792d424d774..ccf7c55d477f 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -175,16 +175,6 @@ static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl) DBG_BUGON(1); } -/* how to allocate cached pages for a pcluster */ -enum z_erofs_cache_alloctype { - DONTALLOC, /* don't allocate any cached pages */ - /* - * try to use cached I/O if page allocation succeeds or fallback - * to in-place I/O instead to avoid any direct reclaim. - */ - TRYALLOC, -}; - /* * tagged pointer with 1-bit tag for all compressed pages * tag 0 - the page is just found with an extra page reference @@ -292,12 +282,29 @@ struct z_erofs_decompress_frontend { .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ .mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true } +static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe) +{ + unsigned int cachestrategy = EROFS_I_SB(fe->inode)->opt.cache_strategy; + + if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED) + return false; + + if (fe->backmost) + return true; + + if (cachestrategy >= EROFS_ZIP_CACHE_READAROUND && + fe->map.m_la < fe->headoffset) + return true; + + return false; +} + static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, - enum z_erofs_cache_alloctype type, struct page **pagepool) { struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); struct z_erofs_pcluster *pcl = fe->pcl; + bool shouldalloc = z_erofs_should_alloc_cache(fe); bool standalone = true; /* * optimistic allocation without direct reclaim since inplace I/O @@ -326,18 +333,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, } else { /* I/O is needed, no possible to decompress directly */ standalone = false; - switch (type) { - case TRYALLOC: - newpage = erofs_allocpage(pagepool, gfp); - if (!newpage) - continue; - set_page_private(newpage, - Z_EROFS_PREALLOCATED_PAGE); - t = tag_compressed_page_justfound(newpage); - break; - default: /* DONTALLOC */ + if (!shouldalloc) continue; - } + + /* + * try to use cached I/O if page allocation + * succeeds or fallback to in-place I/O instead + * to avoid any direct reclaim. + */ + newpage = erofs_allocpage(pagepool, gfp); + if (!newpage) + continue; + set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); + t = tag_compressed_page_justfound(newpage); } if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, @@ -488,7 +496,8 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) struct erofs_workgroup *grp; int err; - if (!(map->m_flags & EROFS_MAP_ENCODED)) { + if (!(map->m_flags & EROFS_MAP_ENCODED) || + (!ztailpacking && !(map->m_pa >> PAGE_SHIFT))) { DBG_BUGON(1); return -EFSCORRUPTED; } @@ -637,20 +646,6 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) return true; } -static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe, - unsigned int cachestrategy, - erofs_off_t la) -{ - if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED) - return false; - - if (fe->backmost) - return true; - - return cachestrategy >= EROFS_ZIP_CACHE_READAROUND && - la < fe->headoffset; -} - static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, struct page *page, unsigned int pageofs, unsigned int len) @@ -687,12 +682,9 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct page *page, struct page **pagepool) { struct inode *const inode = fe->inode; - struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct erofs_map_blocks *const map = &fe->map; const loff_t offset = page_offset(page); bool tight = true, exclusive; - - enum z_erofs_cache_alloctype cache_strategy; unsigned int cur, end, spiltted; int err = 0; @@ -746,13 +738,7 @@ repeat: fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } else { /* bind cache first when cached decompression is preferred */ - if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, - map->m_la)) - cache_strategy = TRYALLOC; - else - cache_strategy = DONTALLOC; - - z_erofs_bind_cache(fe, cache_strategy, pagepool); + z_erofs_bind_cache(fe, pagepool); } hitted: /* diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0bb66927e3d0..0150570c33aa 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -178,7 +178,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, unsigned int advise, type; m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, - erofs_blknr(pos), EROFS_KMAP_ATOMIC); + erofs_blknr(pos), EROFS_KMAP); if (IS_ERR(m->kaddr)) return PTR_ERR(m->kaddr); @@ -416,7 +416,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, out: pos += lcn * (1 << amortizedshift); m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, - erofs_blknr(pos), EROFS_KMAP_ATOMIC); + erofs_blknr(pos), EROFS_KMAP); if (IS_ERR(m->kaddr)) return PTR_ERR(m->kaddr); return unpack_compacted_index(m, amortizedshift, pos, lookahead); @@ -694,10 +694,15 @@ static int z_erofs_do_map_blocks(struct inode *inode, map->m_pa = blknr_to_addr(m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); if (err) - goto out; + goto unmap_out; } if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) { + if (map->m_llen > map->m_plen) { + DBG_BUGON(1); + err = -EFSCORRUPTED; + goto unmap_out; + } if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED; @@ -718,14 +723,12 @@ static int z_erofs_do_map_blocks(struct inode *inode, if (!err) map->m_flags |= EROFS_MAP_FULL_MAPPED; } + unmap_out: erofs_unmap_metabuf(&m.map->buf); - -out: erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", __func__, map->m_la, map->m_pa, map->m_llen, map->m_plen, map->m_flags); - return err; } |