summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt8
-rw-r--r--fs/aio.c8
-rw-r--r--fs/attr.c2
-rw-r--r--fs/autofs/init.c1
-rw-r--r--fs/autofs/inode.c16
-rw-r--r--fs/bcachefs/alloc_background.c48
-rw-r--r--fs/bcachefs/alloc_foreground.c2
-rw-r--r--fs/bcachefs/backpointers.c70
-rw-r--r--fs/bcachefs/bkey.c5
-rw-r--r--fs/bcachefs/bkey.h7
-rw-r--r--fs/bcachefs/btree_gc.c54
-rw-r--r--fs/bcachefs/btree_iter.c7
-rw-r--r--fs/bcachefs/btree_locking.c10
-rw-r--r--fs/bcachefs/btree_locking.h22
-rw-r--r--fs/bcachefs/btree_types.h1
-rw-r--r--fs/bcachefs/btree_write_buffer.c37
-rw-r--r--fs/bcachefs/btree_write_buffer.h3
-rw-r--r--fs/bcachefs/buckets.c2
-rw-r--r--fs/bcachefs/buckets.h8
-rw-r--r--fs/bcachefs/clock.c7
-rw-r--r--fs/bcachefs/data_update.c44
-rw-r--r--fs/bcachefs/data_update.h5
-rw-r--r--fs/bcachefs/debug.c12
-rw-r--r--fs/bcachefs/eytzinger.h6
-rw-r--r--fs/bcachefs/fs.c9
-rw-r--r--fs/bcachefs/io_misc.c2
-rw-r--r--fs/bcachefs/io_read.c4
-rw-r--r--fs/bcachefs/journal.c18
-rw-r--r--fs/bcachefs/journal.h2
-rw-r--r--fs/bcachefs/journal_io.c12
-rw-r--r--fs/bcachefs/lru.c39
-rw-r--r--fs/bcachefs/lru.h3
-rw-r--r--fs/bcachefs/move.c25
-rw-r--r--fs/bcachefs/sb-errors_format.h3
-rw-r--r--fs/bcachefs/super.c11
-rw-r--r--fs/bcachefs/util.c25
-rw-r--r--fs/bcachefs/util.h1
-rw-r--r--fs/befs/linuxvfs.c10
-rw-r--r--fs/binfmt_elf.c101
-rw-r--r--fs/binfmt_elf_fdpic.c5
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/binfmt_script.c1
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_map.c123
-rw-r--r--fs/btrfs/fs.h1
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/cachefiles/cache.c45
-rw-r--r--fs/cachefiles/daemon.c4
-rw-r--r--fs/cachefiles/internal.h3
-rw-r--r--fs/cachefiles/ondemand.c52
-rw-r--r--fs/cachefiles/volume.c1
-rw-r--r--fs/cachefiles/xattr.c5
-rw-r--r--fs/coda/symlink.c10
-rw-r--r--fs/configfs/dir.c10
-rw-r--r--fs/cramfs/inode.c26
-rw-r--r--fs/dcache.c59
-rw-r--r--fs/debugfs/inode.c16
-rw-r--r--fs/efivarfs/super.c12
-rw-r--r--fs/efs/inode.c1
-rw-r--r--fs/efs/symlink.c14
-rw-r--r--fs/exec.c63
-rw-r--r--fs/exec_test.c141
-rw-r--r--fs/exfat/super.c8
-rw-r--r--fs/exportfs/expfs.c9
-rw-r--r--fs/ext4/crypto.c10
-rw-r--r--fs/ext4/ext4.h35
-rw-r--r--fs/ext4/namei.c122
-rw-r--r--fs/ext4/super.c26
-rw-r--r--fs/f2fs/acl.c3
-rw-r--r--fs/f2fs/dir.c105
-rw-r--r--fs/f2fs/f2fs.h16
-rw-r--r--fs/f2fs/file.c6
-rw-r--r--fs/f2fs/namei.c10
-rw-r--r--fs/f2fs/recovery.c9
-rw-r--r--fs/f2fs/super.c8
-rw-r--r--fs/fat/fat.h18
-rw-r--r--fs/fat/fat_test.c1
-rw-r--r--fs/fat/inode.c675
-rw-r--r--fs/fat/namei_msdos.c38
-rw-r--r--fs/fat/namei_vfat.c38
-rw-r--r--fs/fhandle.c178
-rw-r--r--fs/fs_parser.c34
-rw-r--r--fs/fsopen.c7
-rw-r--r--fs/fuse/acl.c4
-rw-r--r--fs/fuse/inode.c24
-rw-r--r--fs/hfs/inode.c3
-rw-r--r--fs/hfs/super.c1
-rw-r--r--fs/hfsplus/bfind.c15
-rw-r--r--fs/hfsplus/extents.c9
-rw-r--r--fs/hfsplus/hfsplus_fs.h21
-rw-r--r--fs/hfsplus/ioctl.c4
-rw-r--r--fs/hfsplus/xattr.c2
-rw-r--r--fs/hostfs/hostfs_kern.c106
-rw-r--r--fs/hpfs/namei.c15
-rw-r--r--fs/hpfs/super.c1
-rw-r--r--fs/hugetlbfs/inode.c12
-rw-r--r--fs/inode.c109
-rw-r--r--fs/internal.h16
-rw-r--r--fs/iomap/buffered-io.c81
-rw-r--r--fs/isofs/inode.c16
-rw-r--r--fs/isofs/rock.c17
-rw-r--r--fs/jffs2/file.c14
-rw-r--r--fs/libfs.c74
-rw-r--r--fs/locks.c2
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/minix/namei.c3
-rw-r--r--fs/mount.h3
-rw-r--r--fs/mpage.c13
-rw-r--r--fs/namei.c239
-rw-r--r--fs/namespace.c524
-rw-r--r--fs/netfs/buffered_read.c14
-rw-r--r--fs/netfs/buffered_write.c12
-rw-r--r--fs/netfs/direct_read.c2
-rw-r--r--fs/netfs/direct_write.c8
-rw-r--r--fs/netfs/fscache_cache.c4
-rw-r--r--fs/netfs/fscache_cookie.c28
-rw-r--r--fs/netfs/fscache_io.c12
-rw-r--r--fs/netfs/fscache_main.c2
-rw-r--r--fs/netfs/fscache_volume.c18
-rw-r--r--fs/netfs/internal.h35
-rw-r--r--fs/netfs/io.c12
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/netfs/misc.c4
-rw-r--r--fs/netfs/write_collect.c16
-rw-r--r--fs/netfs/write_issue.c36
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/symlink.c10
-rw-r--r--fs/nfs/write.c1
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nilfs2/dir.c32
-rw-r--r--fs/nls/mac-celtic.c1
-rw-r--r--fs/nls/mac-centeuro.c1
-rw-r--r--fs/nls/mac-croatian.c1
-rw-r--r--fs/nls/mac-cyrillic.c1
-rw-r--r--fs/nls/mac-gaelic.c1
-rw-r--r--fs/nls/mac-greek.c1
-rw-r--r--fs/nls/mac-iceland.c1
-rw-r--r--fs/nls/mac-inuit.c1
-rw-r--r--fs/nls/mac-roman.c1
-rw-r--r--fs/nls/mac-romanian.c1
-rw-r--r--fs/nls/mac-turkish.c1
-rw-r--r--fs/nls/nls_ascii.c1
-rw-r--r--fs/nls/nls_base.c1
-rw-r--r--fs/nls/nls_cp1250.c1
-rw-r--r--fs/nls/nls_cp1251.c1
-rw-r--r--fs/nls/nls_cp1255.c1
-rw-r--r--fs/nls/nls_cp437.c1
-rw-r--r--fs/nls/nls_cp737.c1
-rw-r--r--fs/nls/nls_cp775.c1
-rw-r--r--fs/nls/nls_cp850.c1
-rw-r--r--fs/nls/nls_cp852.c1
-rw-r--r--fs/nls/nls_cp855.c1
-rw-r--r--fs/nls/nls_cp857.c1
-rw-r--r--fs/nls/nls_cp860.c1
-rw-r--r--fs/nls/nls_cp861.c1
-rw-r--r--fs/nls/nls_cp862.c1
-rw-r--r--fs/nls/nls_cp863.c1
-rw-r--r--fs/nls/nls_cp864.c1
-rw-r--r--fs/nls/nls_cp865.c1
-rw-r--r--fs/nls/nls_cp866.c1
-rw-r--r--fs/nls/nls_cp869.c1
-rw-r--r--fs/nls/nls_cp874.c1
-rw-r--r--fs/nls/nls_cp932.c1
-rw-r--r--fs/nls/nls_cp936.c1
-rw-r--r--fs/nls/nls_cp949.c1
-rw-r--r--fs/nls/nls_cp950.c1
-rw-r--r--fs/nls/nls_euc-jp.c1
-rw-r--r--fs/nls/nls_iso8859-1.c1
-rw-r--r--fs/nls/nls_iso8859-13.c1
-rw-r--r--fs/nls/nls_iso8859-14.c1
-rw-r--r--fs/nls/nls_iso8859-15.c1
-rw-r--r--fs/nls/nls_iso8859-2.c1
-rw-r--r--fs/nls/nls_iso8859-3.c1
-rw-r--r--fs/nls/nls_iso8859-4.c1
-rw-r--r--fs/nls/nls_iso8859-5.c1
-rw-r--r--fs/nls/nls_iso8859-6.c1
-rw-r--r--fs/nls/nls_iso8859-7.c1
-rw-r--r--fs/nls/nls_iso8859-9.c1
-rw-r--r--fs/nls/nls_koi8-r.c1
-rw-r--r--fs/nls/nls_koi8-ru.c1
-rw-r--r--fs/nls/nls_koi8-u.c1
-rw-r--r--fs/nls/nls_ucs2_utils.c1
-rw-r--r--fs/nls/nls_utf8.c1
-rw-r--r--fs/nsfs.c122
-rw-r--r--fs/ntfs3/super.c12
-rw-r--r--fs/open.c17
-rw-r--r--fs/openpromfs/inode.c1
-rw-r--r--fs/orangefs/inode.c13
-rw-r--r--fs/orangefs/orangefs-bufmap.c4
-rw-r--r--fs/pidfs.c90
-rw-r--r--fs/proc/generic.c6
-rw-r--r--fs/proc/proc_sysctl.c70
-rw-r--r--fs/proc_namespace.c6
-rw-r--r--fs/pstore/blk.c2
-rw-r--r--fs/pstore/platform.c1
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/qnx6/inode.c1
-rw-r--r--fs/quota/dquot.c8
-rw-r--r--fs/read_write.c18
-rw-r--r--fs/readdir.c4
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/romfs/super.c22
-rw-r--r--fs/smb/client/cifsglob.h4
-rw-r--r--fs/smb/client/fs_context.c39
-rw-r--r--fs/smb/common/smb2pdu.h34
-rw-r--r--fs/smb/server/smb2pdu.c22
-rw-r--r--fs/stat.c206
-rw-r--r--fs/sysv/super.c1
-rw-r--r--fs/tracefs/inode.c16
-rw-r--r--fs/ufs/dir.c1
-rw-r--r--fs/userfaultfd.c7
-rw-r--r--fs/vboxsf/file.c18
-rw-r--r--fs/vboxsf/super.c16
-rw-r--r--fs/xfs/xfs_icache.c5
-rw-r--r--fs/xfs/xfs_iops.c15
217 files changed, 3275 insertions, 1763 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index f5693164ca9a..bd2f530e5740 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -176,4 +176,12 @@ config COREDUMP
certainly want to say Y here. Not necessary on systems that never
need debugging or only ever run flawless code.
+config EXEC_KUNIT_TEST
+ bool "Build execve tests" if !KUNIT_ALL_TESTS
+ depends on KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the exec KUnit tests, which tests boundary conditions
+ of various aspects of the exec internals.
+
endmenu
diff --git a/fs/aio.c b/fs/aio.c
index 57c9f7c077e6..93ef59d358b3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1516,7 +1516,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res)
iocb_put(iocb);
}
-static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
+static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb, int rw_type)
{
int ret;
@@ -1542,7 +1542,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
} else
req->ki_ioprio = get_current_ioprio();
- ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
+ ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags, rw_type);
if (unlikely(ret))
return ret;
@@ -1594,7 +1594,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
struct file *file;
int ret;
- ret = aio_prep_rw(req, iocb);
+ ret = aio_prep_rw(req, iocb, READ);
if (ret)
return ret;
file = req->ki_filp;
@@ -1621,7 +1621,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
struct file *file;
int ret;
- ret = aio_prep_rw(req, iocb);
+ ret = aio_prep_rw(req, iocb, WRITE);
if (ret)
return ret;
file = req->ki_filp;
diff --git a/fs/attr.c b/fs/attr.c
index 960a310581eb..825007d5cda4 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -17,8 +17,6 @@
#include <linux/filelock.h>
#include <linux/security.h>
-#include "internal.h"
-
/**
* setattr_should_drop_sgid - determine whether the setgid bit needs to be
* removed
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index b5e4dfa04ed0..1d644a35ffa0 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -38,4 +38,5 @@ static void __exit exit_autofs_fs(void)
module_init(init_autofs_fs)
module_exit(exit_autofs_fs)
+MODULE_DESCRIPTION("Kernel automounter support");
MODULE_LICENSE("GPL");
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 1f5db6863663..cf792d4de4f1 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -126,7 +126,7 @@ enum {
const struct fs_parameter_spec autofs_param_specs[] = {
fsparam_flag ("direct", Opt_direct),
fsparam_fd ("fd", Opt_fd),
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_flag ("ignore", Opt_ignore),
fsparam_flag ("indirect", Opt_indirect),
fsparam_u32 ("maxproto", Opt_maxproto),
@@ -134,7 +134,7 @@ const struct fs_parameter_spec autofs_param_specs[] = {
fsparam_flag ("offset", Opt_offset),
fsparam_u32 ("pgrp", Opt_pgrp),
fsparam_flag ("strictexpire", Opt_strictexpire),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -193,8 +193,6 @@ static int autofs_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct autofs_fs_context *ctx = fc->fs_private;
struct autofs_sb_info *sbi = fc->s_fs_info;
struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
int opt;
opt = fs_parse(fc, autofs_param_specs, param, &result);
@@ -205,16 +203,10 @@ static int autofs_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_fd:
return autofs_parse_fd(fc, sbi, param, &result);
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return invalfc(fc, "Invalid uid");
- ctx->uid = uid;
+ ctx->uid = result.uid;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return invalfc(fc, "Invalid gid");
- ctx->gid = gid;
+ ctx->gid = result.gid;
break;
case Opt_pgrp:
ctx->pgrp = result.uint_32;
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 1de9fac3bcf4..658f11aebda1 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -3,6 +3,7 @@
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "backpointers.h"
+#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_key_cache.h"
@@ -1553,13 +1554,13 @@ err:
}
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
- struct btree_iter *alloc_iter)
+ struct btree_iter *alloc_iter,
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- struct btree_iter lru_iter;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a;
- struct bkey_s_c alloc_k, lru_k;
+ struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret;
@@ -1573,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = bch2_alloc_to_v4(alloc_k, &a_convert);
+ if (a->fragmentation_lru) {
+ ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
+ a->fragmentation_lru,
+ alloc_k, last_flushed);
+ if (ret)
+ return ret;
+ }
+
if (a->data_type != BCH_DATA_cached)
return 0;
@@ -1597,41 +1606,30 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = &a_mut->v;
}
- lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
- lru_pos(alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]), 0);
- ret = bkey_err(lru_k);
+ ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
+ alloc_k, last_flushed);
if (ret)
- return ret;
-
- if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
- alloc_key_to_missing_lru_entry,
- "missing lru entry\n"
- " %s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
- ret = bch2_lru_set(trans,
- alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]);
- if (ret)
- goto err;
- }
+ goto err;
err:
fsck_err:
- bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_alloc_to_lru_ref(trans, &iter)));
+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 9d3d64746a5b..27d97c22ae27 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
+ printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 24);
percpu_down_read(&c->mark_lock);
@@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
+ printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 12);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 4321f9fb73bd..6d8b1bc90be0 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
return ret;
}
-static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
-{
- return bpos_eq(l.k->p, r.k->p) &&
- bkey_bytes(l.k) == bkey_bytes(r.k) &&
- !memcmp(l.v, r.v, bkey_val_bytes(l.k));
-}
-
struct extents_to_bp_state {
struct bpos bucket_start;
struct bpos bucket_end;
@@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
- struct bkey_buf tmp;
int ret = 0;
- bch2_bkey_buf_init(&tmp);
-
struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket);
if (!ca) {
prt_str(&buf, "extent for nonexistent device:bucket ");
@@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans,
if (bp_k.k->type != KEY_TYPE_backpointer ||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
- bch2_bkey_buf_reassemble(&tmp, c, orig_k);
-
- if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
- if (bp.level) {
- bch2_trans_unlock(trans);
- bch2_btree_interior_updates_flush(c);
- }
-
- ret = bch2_btree_write_buffer_flush_sync(trans);
- if (ret)
- goto err;
-
- bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
- ret = -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
+ if (ret)
+ goto err;
goto check_existing_bp;
}
@@ -589,7 +566,6 @@ err:
fsck_err:
bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
- bch2_bkey_buf_exit(&tmp, c);
bch2_dev_put(ca);
printbuf_exit(&buf);
return ret;
@@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
!((1U << btree) & btree_interior_mask))
continue;
+ bch2_trans_begin(trans);
+
__for_each_btree_node(trans, iter, btree,
btree == start.btree ? start.pos : POS_MIN,
0, depth, BTREE_ITER_prefetch, b, ret) {
@@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start,
struct bbpos end,
struct bkey_s_c_backpointer bp,
- struct bpos *last_flushed_pos)
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans,
if (ret)
return ret;
- if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
- *last_flushed_pos = bp.k->p;
- ret = bch2_btree_write_buffer_flush_sync(trans) ?:
- -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ if (!k.k) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed);
+ if (ret)
+ goto out;
- if (fsck_err_on(!k.k, c,
- backpointer_to_missing_ptr,
- "backpointer for missing %s\n %s",
- bp.v->level ? "btree node" : "extent",
- (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
- ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
- goto out;
+ if (fsck_err(c, backpointer_to_missing_ptr,
+ "backpointer for missing %s\n %s",
+ bp.v->level ? "btree node" : "extent",
+ (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
+ ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
+ goto out;
+ }
}
out:
fsck_err:
@@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
struct bbpos start,
struct bbpos end)
{
- struct bpos last_flushed_pos = SPOS_MAX;
+ struct bkey_buf last_flushed;
- return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_one_backpointer(trans, start, end,
bkey_s_c_to_backpointer(k),
- &last_flushed_pos));
+ &last_flushed));
+
+ bch2_bkey_buf_exit(&last_flushed, trans->c);
+ return ret;
}
int bch2_check_backpointers_to_extents(struct bch_fs *c)
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index 94a1d1982fa8..587d7318a2e8 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
bch2_bkey_format_field_overflows(f, i)) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
- u64 packed_max = f->bits_per_field[i]
- ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
+ unsigned packed_bits = min(64, f->bits_per_field[i]);
+ u64 packed_max = packed_bits
+ ? ~((~0ULL << 1) << (packed_bits - 1))
: 0;
prt_printf(err, "field %u too large: %llu + %llu > %llu",
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index fcd43915df07..936357149cf0 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
return bkey_gt(l, r) ? l : r;
}
+static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
+{
+ return bpos_eq(l.k->p, r.k->p) &&
+ bkey_bytes(l.k) == bkey_bytes(r.k) &&
+ !memcmp(l.v, r.v, bkey_val_bytes(l.k));
+}
+
void bch2_bpos_swab(struct bpos *);
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 0e477a926579..a0deb8266011 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -641,16 +641,30 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
target_depth = 0;
/* root */
- mutex_lock(&c->btree_root_lock);
- struct btree *b = bch2_btree_id_root(c, btree)->b;
- if (!btree_node_fake(b)) {
+ do {
+retry_root:
+ bch2_trans_begin(trans);
+
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN,
+ 0, bch2_btree_id_root(c, btree)->b->c.level, 0);
+ struct btree *b = bch2_btree_iter_peek_node(&iter);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err_root;
+
+ if (b != btree_node_root(c, b)) {
+ bch2_trans_iter_exit(trans, &iter);
+ goto retry_root;
+ }
+
gc_pos_set(c, gc_pos_btree(btree, b->c.level + 1, SPOS_MAX));
- ret = lockrestart_do(trans,
- bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1,
- NULL, NULL, bkey_i_to_s_c(&b->key), initial));
+ struct bkey_s_c k = bkey_i_to_s_c(&b->key);
+ ret = bch2_gc_mark_key(trans, btree, b->c.level + 1, NULL, NULL, k, initial);
level = b->c.level;
- }
- mutex_unlock(&c->btree_root_lock);
+err_root:
+ bch2_trans_iter_exit(trans, &iter);
+ } while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
if (ret)
return ret;
@@ -903,6 +917,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true);
percpu_up_read(&c->mark_lock);
+ gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);
+
if (fsck_err_on(new.data_type != gc.data_type, c,
alloc_key_data_type_wrong,
"bucket %llu:%llu gen %u has wrong data_type"
@@ -916,23 +932,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
#define copy_bucket_field(_errtype, _f) \
if (fsck_err_on(new._f != gc._f, c, _errtype, \
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
- ": got %u, should be %u", \
+ ": got %llu, should be %llu", \
iter->pos.inode, iter->pos.offset, \
gc.gen, \
bch2_data_type_str(gc.data_type), \
- new._f, gc._f)) \
+ (u64) new._f, (u64) gc._f)) \
new._f = gc._f; \
- copy_bucket_field(alloc_key_gen_wrong,
- gen);
- copy_bucket_field(alloc_key_dirty_sectors_wrong,
- dirty_sectors);
- copy_bucket_field(alloc_key_cached_sectors_wrong,
- cached_sectors);
- copy_bucket_field(alloc_key_stripe_wrong,
- stripe);
- copy_bucket_field(alloc_key_stripe_redundancy_wrong,
- stripe_redundancy);
+ copy_bucket_field(alloc_key_gen_wrong, gen);
+ copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors);
+ copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors);
+ copy_bucket_field(alloc_key_stripe_wrong, stripe);
+ copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy);
+ copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru);
#undef copy_bucket_field
if (!bch2_alloc_v4_cmp(*old, new))
@@ -946,7 +958,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
a->v = new;
/*
- * The trigger normally makes sure this is set, but we're not running
+ * The trigger normally makes sure these are set, but we're not running
* triggers:
*/
if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 0ed9e6574fcd..19352a08ea20 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -996,7 +996,7 @@ retry_all:
bch2_trans_unlock(trans);
cond_resched();
- trans->locked = true;
+ trans_set_locked(trans);
if (unlikely(trans->memory_allocation_failure)) {
struct closure cl;
@@ -3089,7 +3089,8 @@ u32 bch2_trans_begin(struct btree_trans *trans)
bch2_trans_srcu_unlock(trans);
trans->last_begin_ip = _RET_IP_;
- trans->locked = true;
+
+ trans_set_locked(trans);
if (trans->restarted) {
bch2_btree_path_traverse_all(trans);
@@ -3159,7 +3160,6 @@ got_trans:
trans->last_begin_time = local_clock();
trans->fn_idx = fn_idx;
trans->locking_wait.task = current;
- trans->locked = true;
trans->journal_replay_not_finished =
unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags)) &&
atomic_inc_not_zero(&c->journal_keys.ref);
@@ -3193,6 +3193,7 @@ got_trans:
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
+ trans_set_locked(trans);
closure_init_stack_release(&trans->ref);
return trans;
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index d66fff22109a..c51826fd557f 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -231,7 +231,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
prt_newline(&buf);
}
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf);
printbuf_exit(&buf);
BUG();
}
@@ -792,7 +792,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
return bch2_trans_relock_fail(trans, path, &f, trace);
}
- trans->locked = true;
+ trans_set_locked(trans);
out:
bch2_trans_verify_locks(trans);
return 0;
@@ -812,16 +812,14 @@ void bch2_trans_unlock_noassert(struct btree_trans *trans)
{
__bch2_trans_unlock(trans);
- trans->locked = false;
- trans->last_unlock_ip = _RET_IP_;
+ trans_set_unlocked(trans);
}
void bch2_trans_unlock(struct btree_trans *trans)
{
__bch2_trans_unlock(trans);
- trans->locked = false;
- trans->last_unlock_ip = _RET_IP_;
+ trans_set_unlocked(trans);
}
void bch2_trans_unlock_long(struct btree_trans *trans)
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 7f41545b9147..75a6274c7d27 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -193,6 +193,28 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
/* lock: */
+static inline void trans_set_locked(struct btree_trans *trans)
+{
+ if (!trans->locked) {
+ trans->locked = true;
+ trans->last_unlock_ip = 0;
+
+ trans->pf_memalloc_nofs = (current->flags & PF_MEMALLOC_NOFS) != 0;
+ current->flags |= PF_MEMALLOC_NOFS;
+ }
+}
+
+static inline void trans_set_unlocked(struct btree_trans *trans)
+{
+ if (trans->locked) {
+ trans->locked = false;
+ trans->last_unlock_ip = _RET_IP_;
+
+ if (!trans->pf_memalloc_nofs)
+ current->flags &= ~PF_MEMALLOC_NOFS;
+ }
+}
+
static inline int __btree_node_lock_nopath(struct btree_trans *trans,
struct btree_bkey_cached_common *b,
enum six_lock_type type,
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 87f485e9c552..48cb1a7d31c5 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -484,6 +484,7 @@ struct btree_trans {
bool lock_may_not_fail:1;
bool srcu_held:1;
bool locked:1;
+ bool pf_memalloc_nofs:1;
bool write_locked:1;
bool used_mempool:1;
bool in_traverse_all:1;
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 75c8a196b3f6..d0e92d948002 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_buf.h"
#include "btree_locking.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "error.h"
+#include "extents.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
@@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
return ret;
}
+/**
+ * In check and repair code, when checking references to write buffer btrees we
+ * need to issue a flush before we have a definitive error: this issues a flush
+ * if this is a key we haven't yet checked.
+ */
+int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
+ struct bkey_s_c referring_k,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_buf tmp;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&tmp);
+
+ if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
+ bch2_bkey_buf_reassemble(&tmp, c, referring_k);
+
+ if (bkey_is_btree_ptr(referring_k.k)) {
+ bch2_trans_unlock(trans);
+ bch2_btree_interior_updates_flush(c);
+ }
+
+ ret = bch2_btree_write_buffer_flush_sync(trans);
+ if (ret)
+ goto err;
+
+ bch2_bkey_buf_copy(last_flushed, c, tmp.k);
+ ret = -BCH_ERR_transaction_restart_write_buffer_flush;
+ }
+err:
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
+}
+
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index eebcd2b15249..dd5e64218b50 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
+struct bkey_buf;
+int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *);
+
struct journal_keys_to_wb {
struct btree_write_buffer_keys *wb;
size_t room;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 743d57eba760..314ee3e0187f 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -805,7 +805,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
"bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
"while marking %s",
ptr->dev, bucket_nr, b_gen,
- *bucket_gen(ca, bucket_nr),
+ bucket_gen_get(ca, bucket_nr),
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
ptr->gen,
(printbuf_reset(&buf),
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 80ee0be9793e..8ad4be73860c 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -116,6 +116,14 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
return gens->b + b;
}
+static inline u8 bucket_gen_get(struct bch_dev *ca, size_t b)
+{
+ rcu_read_lock();
+ u8 gen = *bucket_gen(ca, b);
+ rcu_read_unlock();
+ return gen;
+}
+
static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
const struct bch_extent_ptr *ptr)
{
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index 363644451106..0f40b585ce2b 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
{
struct io_timer *ret = NULL;
- spin_lock(&clock->timer_lock);
-
if (clock->timers.used &&
time_after_eq(now, clock->timers.data[0]->expire))
heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
-
- spin_unlock(&clock->timer_lock);
-
return ret;
}
@@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
struct io_timer *timer;
unsigned long now = atomic64_add_return(sectors, &clock->now);
+ spin_lock(&clock->timer_lock);
while ((timer = get_expired_timer(clock, now)))
timer->fn(timer);
+ spin_unlock(&clock->timer_lock);
}
void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 1a0072eef109..0087b8555ead 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -5,7 +5,9 @@
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
+#include "compress.h"
#include "data_update.h"
+#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "extents.h"
@@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
}
}
+void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ printbuf_tabstop_push(out, 20);
+ prt_str(out, "rewrite ptrs:\t");
+ bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
+ prt_newline(out);
+
+ prt_str(out, "kill ptrs:\t");
+ bch2_prt_u64_base2(out, data_opts->kill_ptrs);
+ prt_newline(out);
+
+ prt_str(out, "target:\t");
+ bch2_target_to_text(out, c, data_opts->target);
+ prt_newline(out);
+
+ prt_str(out, "compression:\t");
+ bch2_compression_opt_to_text(out, background_compression(*io_opts));
+ prt_newline(out);
+
+ prt_str(out, "extra replicas:\t");
+ prt_u64(out, data_opts->extra_replicas);
+}
+
+void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
+{
+ bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
+ prt_newline(out);
+ bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
+}
+
int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans,
if (!(durability_have + durability_removing))
m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+ if (!m->op.nr_replicas) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_data_update_to_text(&buf, m);
+ WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf);
+ printbuf_exit(&buf);
+ ret = -BCH_ERR_data_update_done;
+ goto done;
+ }
+
m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 991095bbd469..8d36365bdea8 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -17,6 +17,9 @@ struct data_update_opts {
unsigned write_flags;
};
+void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
+ struct bch_io_opts *, struct data_update_opts *);
+
struct data_update {
/* extent being updated: */
enum btree_id btree_id;
@@ -27,6 +30,8 @@ struct data_update {
struct bch_write_op op;
};
+void bch2_data_update_to_text(struct printbuf *, struct data_update *);
+
int bch2_data_update_index_update(struct bch_write_op *);
void bch2_data_update_read_done(struct data_update *,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index f0d4727c4dc2..ebabab171fe5 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -610,7 +610,7 @@ restart:
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
list_for_each_entry(trans, &c->btree_trans_list, list) {
- if ((ulong) trans < i->iter)
+ if ((ulong) trans <= i->iter)
continue;
i->iter = (ulong) trans;
@@ -832,16 +832,16 @@ static const struct file_operations btree_transaction_stats_op = {
static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
{
struct btree_trans *trans;
- pid_t iter = 0;
+ ulong iter = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
- list_for_each_entry(trans, &c->btree_trans_list, list) {
- struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+ list_sort(&c->btree_trans_list, list_ptr_order_cmp);
- if (!task || task->pid <= iter)
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if ((ulong) trans <= iter)
continue;
- iter = task->pid;
+ iter = (ulong) trans;
if (!closure_get_not_zero(&trans->ref))
continue;
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
index 24840aee335c..795f4fc0bab1 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/fs/bcachefs/eytzinger.h
@@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i)
static inline unsigned eytzinger1_first(unsigned size)
{
- return rounddown_pow_of_two(size);
+ return size ? rounddown_pow_of_two(size) : 0;
}
static inline unsigned eytzinger1_last(unsigned size)
@@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
static inline unsigned eytzinger1_extra(unsigned size)
{
- return (size + 1 - rounddown_pow_of_two(size)) << 1;
+ return size
+ ? (size + 1 - rounddown_pow_of_two(size)) << 1
+ : 0;
}
static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index f9c9a95d7d4c..0c7d1bc0548a 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -194,6 +194,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
* discard_new_inode() expects it to be set...
*/
inode->v.i_flags |= I_NEW;
+ /*
+ * We don't want bch2_evict_inode() to delete the inode on disk,
+ * we just raced and had another inode in cache. Normally new
+ * inodes don't have nlink == 0 - except tmpfiles do...
+ */
+ set_nlink(&inode->v, 1);
discard_new_inode(&inode->v);
inode = old;
} else {
@@ -238,7 +244,6 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c)
inode->ei_flags = 0;
mutex_init(&inode->ei_quota_lock);
memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
- inode->v.i_state = 0;
if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) {
kmem_cache_free(bch2_inode_cache, inode);
@@ -2026,6 +2031,8 @@ err_put_super:
__bch2_fs_stop(c);
deactivate_locked_super(sb);
err:
+ if (ret)
+ pr_err("error: %s", bch2_err_str(ret));
/*
* On an inconsistency error in recovery we might see an -EROFS derived
* errorcode (from the journal), but we don't want to return that to
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 4ec979b4b23e..4583c9386e8c 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -125,7 +125,7 @@ err_noprint:
bch2_bkey_buf_exit(&old, c);
if (closure_nr_remaining(&cl) != 1) {
- bch2_trans_unlock(trans);
+ bch2_trans_unlock_long(trans);
closure_sync(&cl);
}
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index c97fa7002b06..ebf39ef72fb2 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -389,7 +389,6 @@ retry:
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(trans);
if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,
@@ -1004,6 +1003,9 @@ get_bio:
rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
+ if (flags & BCH_READ_NODECODE)
+ orig->pick = pick;
+
rbio->bio.bi_opf = orig->bio.bi_opf;
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 13669dd0e375..10b19791ec98 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1095,7 +1095,7 @@ unlock:
return ret;
}
-int bch2_dev_journal_alloc(struct bch_dev *ca)
+int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
{
unsigned nr;
int ret;
@@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
- ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
+ ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL);
err:
bch_err_fn(ca, ret);
return ret;
@@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c)
if (ca->journal.nr)
continue;
- int ret = bch2_dev_journal_alloc(ca);
+ int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
percpu_ref_put(&ca->io_ref);
return ret;
@@ -1184,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j)
journal_quiesce(j);
cancel_delayed_work_sync(&j->write_work);
- BUG_ON(!bch2_journal_error(j) &&
- test_bit(JOURNAL_replay_done, &j->flags) &&
- j->last_empty_seq != journal_cur_seq(j));
+ WARN(!bch2_journal_error(j) &&
+ test_bit(JOURNAL_replay_done, &j->flags) &&
+ j->last_empty_seq != journal_cur_seq(j),
+ "journal shutdown error: cur seq %llu but last empty seq %llu",
+ journal_cur_seq(j), j->last_empty_seq);
if (!bch2_journal_error(j))
clear_bit(JOURNAL_running, &j->flags);
@@ -1418,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
unsigned long now = jiffies;
u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
- if (!out->nr_tabstops)
- printbuf_tabstop_push(out, 28);
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 28);
out->atomic++;
rcu_read_lock();
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index fd1f7cdaa8bc..bc6b9c39dcb4 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);
-int bch2_dev_journal_alloc(struct bch_dev *);
+int bch2_dev_journal_alloc(struct bch_dev *, bool);
int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index db24ce21b2ac..2326e2cb9cd2 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
flags|BCH_VALIDATE_journal);
if (ret == FSCK_DELETED_KEY)
continue;
+ else if (ret)
+ return ret;
k = bkey_next(k);
}
@@ -1762,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush)
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
spin_lock(&j->lock);
- closure_wait(&j->async_wait, cl);
+ if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
+ closure_wait(&j->async_wait, cl);
+ spin_unlock(&j->lock);
+ continue_at(cl, journal_write_preflush, j->wq);
+ return;
+ }
spin_unlock(&j->lock);
-
- continue_at(cl, journal_write_preflush, j->wq);
- return;
}
if (w->separate_flush) {
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index a40d116224ed..b12894ef44f3 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = {
NULL
};
+int bch2_lru_check_set(struct btree_trans *trans,
+ u16 lru_id, u64 time,
+ struct bkey_s_c referring_k,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ struct btree_iter lru_iter;
+ struct bkey_s_c lru_k =
+ bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
+ lru_pos(lru_id,
+ bucket_to_u64(referring_k.k->p),
+ time), 0);
+ int ret = bkey_err(lru_k);
+ if (ret)
+ return ret;
+
+ if (lru_k.k->type != KEY_TYPE_set) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed);
+ if (ret)
+ goto err;
+
+ if (fsck_err(c, alloc_key_to_missing_lru_entry,
+ "missing %s lru entry\n"
+ " %s",
+ bch2_lru_types[lru_type(lru_k)],
+ (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
+ ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time);
+ if (ret)
+ goto err;
+ }
+ }
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &lru_iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index bd71ba77de07..ed75bcf59d47 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -61,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
+struct bkey_buf;
+int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *);
+
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 6e477fadaa2a..e714e3bd5bbb 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = {
NULL
};
-static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
- struct bch_io_opts *io_opts,
- struct data_update_opts *data_opts)
-{
- printbuf_tabstop_push(out, 20);
- prt_str(out, "rewrite ptrs:\t");
- bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
- prt_newline(out);
-
- prt_str(out, "kill ptrs:\t");
- bch2_prt_u64_base2(out, data_opts->kill_ptrs);
- prt_newline(out);
-
- prt_str(out, "target:\t");
- bch2_target_to_text(out, c, data_opts->target);
- prt_newline(out);
-
- prt_str(out, "compression:\t");
- bch2_compression_opt_to_text(out, background_compression(*io_opts));
- prt_newline(out);
-
- prt_str(out, "extra replicas:\t");
- prt_u64(out, data_opts->extra_replicas);
-}
-
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index d6f35a99c429..d54121ec093f 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -286,7 +286,8 @@ enum bch_fsck_flags {
x(accounting_mismatch, 272, 0) \
x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \
- x(alloc_key_io_time_bad, 275, 0)
+ x(alloc_key_io_time_bad, 275, 0) \
+ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index fb906467201e..da735608d47c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -563,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
- EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
- free_percpu(c->online_reserved);
+ if (c->online_reserved) {
+ u64 v = percpu_u64_get(c->online_reserved);
+ WARN(v, "online_reserved not 0 at shutdown: %lli", v);
+ free_percpu(c->online_reserved);
+ }
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
@@ -1769,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (ret)
goto err;
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, true);
bch_err_msg(c, ret, "allocating journal");
if (ret)
goto err;
@@ -1929,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
}
if (!ca->journal.nr) {
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, false);
bch_err_msg(ca, ret, "allocating journal");
if (ret)
goto err;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index de331dec2a99..4ec7e44d6e36 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -252,8 +252,10 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v)
bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1);
}
-void bch2_print_string_as_lines(const char *prefix, const char *lines)
+static void __bch2_print_string_as_lines(const char *prefix, const char *lines,
+ bool nonblocking)
{
+ bool locked = false;
const char *p;
if (!lines) {
@@ -261,7 +263,13 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
return;
}
- console_lock();
+ if (!nonblocking) {
+ console_lock();
+ locked = true;
+ } else {
+ locked = console_trylock();
+ }
+
while (1) {
p = strchrnul(lines, '\n');
printk("%s%.*s\n", prefix, (int) (p - lines), lines);
@@ -269,7 +277,18 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
break;
lines = p + 1;
}
- console_unlock();
+ if (locked)
+ console_unlock();
+}
+
+void bch2_print_string_as_lines(const char *prefix, const char *lines)
+{
+ return __bch2_print_string_as_lines(prefix, lines, false);
+}
+
+void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines)
+{
+ return __bch2_print_string_as_lines(prefix, lines, true);
}
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr,
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 5d2c470a49ac..5b0533ec4c7e 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -315,6 +315,7 @@ void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned);
void bch2_prt_u64_base2(struct printbuf *, u64);
void bch2_print_string_as_lines(const char *prefix, const char *lines);
+void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines);
typedef DARRAY(unsigned long) bch_stacktrace;
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index d76f406d3b2e..f92f108840f5 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -475,6 +475,7 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio)
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
char *link = folio_address(folio);
+ int err = -EIO;
if (len == 0 || len > PAGE_SIZE) {
befs_error(sb, "Long symlink with illegal length");
@@ -487,13 +488,10 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio)
goto fail;
}
link[len - 1] = '\0';
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
+ err = 0;
fail:
- folio_set_error(folio);
- folio_unlock(folio);
- return -EIO;
+ folio_end_read(folio, err == 0);
+ return err;
}
/*
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a43897b03ce9..5ae8045f4df4 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1003,7 +1003,8 @@ out_free_interp:
if (elf_read_implies_exec(*elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
current->flags |= PF_RANDOMIZE;
setup_new_exec(bprm);
@@ -1061,10 +1062,40 @@ out_free_interp:
* Header for ET_DYN binaries to calculate the
* randomization (load_bias) for all the LOAD
* Program Headers.
+ */
+
+ /*
+ * Calculate the entire size of the ELF mapping
+ * (total_size), used for the initial mapping,
+ * due to load_addr_set which is set to true later
+ * once the initial mapping is performed.
+ *
+ * Note that this is only sensible when the LOAD
+ * segments are contiguous (or overlapping). If
+ * used for LOADs that are far apart, this would
+ * cause the holes between LOADs to be mapped,
+ * running the risk of having the mapping fail,
+ * as it would be larger than the ELF file itself.
*
+ * As a result, only ET_DYN does this, since
+ * some ET_EXEC (e.g. ia64) may have large virtual
+ * memory holes between LOADs.
+ *
+ */
+ total_size = total_mapping_size(elf_phdata,
+ elf_ex->e_phnum);
+ if (!total_size) {
+ retval = -EINVAL;
+ goto out_free_dentry;
+ }
+
+ /* Calculate any requested alignment. */
+ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+
+ /*
* There are effectively two types of ET_DYN
- * binaries: programs (i.e. PIE: ET_DYN with INTERP)
- * and loaders (ET_DYN without INTERP, since they
+ * binaries: programs (i.e. PIE: ET_DYN with PT_INTERP)
+ * and loaders (ET_DYN without PT_INTERP, since they
* _are_ the ELF interpreter). The loaders must
* be loaded away from programs since the program
* may otherwise collide with the loader (especially
@@ -1084,15 +1115,44 @@ out_free_interp:
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
*/
if (interpreter) {
+ /* On ET_DYN with PT_INTERP, we do the ASLR. */
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
- alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+ /* Adjust alignment as requested. */
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
- } else
- load_bias = 0;
+ } else {
+ /*
+ * For ET_DYN without PT_INTERP, we rely on
+ * the architectures's (potentially ASLR) mmap
+ * base address (via a load_bias of 0).
+ *
+ * When a large alignment is requested, we
+ * must do the allocation at address "0" right
+ * now to discover where things will load so
+ * that we can adjust the resulting alignment.
+ * In this case (load_bias != 0), we can use
+ * MAP_FIXED_NOREPLACE to make sure the mapping
+ * doesn't collide with anything.
+ */
+ if (alignment > ELF_MIN_ALIGN) {
+ load_bias = elf_load(bprm->file, 0, elf_ppnt,
+ elf_prot, elf_flags, total_size);
+ if (BAD_ADDR(load_bias)) {
+ retval = IS_ERR_VALUE(load_bias) ?
+ PTR_ERR((void*)load_bias) : -EINVAL;
+ goto out_free_dentry;
+ }
+ vm_munmap(load_bias, total_size);
+ /* Adjust alignment as requested. */
+ if (alignment)
+ load_bias &= ~(alignment - 1);
+ elf_flags |= MAP_FIXED_NOREPLACE;
+ } else
+ load_bias = 0;
+ }
/*
* Since load_bias is used for all subsequent loading
@@ -1102,31 +1162,6 @@ out_free_interp:
* is then page aligned.
*/
load_bias = ELF_PAGESTART(load_bias - vaddr);
-
- /*
- * Calculate the entire size of the ELF mapping
- * (total_size), used for the initial mapping,
- * due to load_addr_set which is set to true later
- * once the initial mapping is performed.
- *
- * Note that this is only sensible when the LOAD
- * segments are contiguous (or overlapping). If
- * used for LOADs that are far apart, this would
- * cause the holes between LOADs to be mapped,
- * running the risk of having the mapping fail,
- * as it would be larger than the ELF file itself.
- *
- * As a result, only ET_DYN does this, since
- * some ET_EXEC (e.g. ia64) may have large virtual
- * memory holes between LOADs.
- *
- */
- total_size = total_mapping_size(elf_phdata,
- elf_ex->e_phnum);
- if (!total_size) {
- retval = -EINVAL;
- goto out_free_dentry;
- }
}
error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
@@ -1216,7 +1251,6 @@ out_free_interp:
}
reloc_func_desc = interp_load_addr;
- allow_write_access(interpreter);
fput(interpreter);
kfree(interp_elf_ex);
@@ -1251,7 +1285,7 @@ out_free_interp:
mm->end_data = end_data;
mm->start_stack = bprm->p;
- if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
+ if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) {
/*
* For architectures with ELF randomization, when executing
* a loader directly (i.e. no interpreter listed in ELF
@@ -1308,7 +1342,6 @@ out_free_dentry:
kfree(interp_elf_ex);
kfree(interp_elf_phdata);
out_free_file:
- allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
out_free_ph:
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index b799701454a9..28a3439f163a 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -394,7 +394,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
goto error;
}
- allow_write_access(interpreter);
fput(interpreter);
interpreter = NULL;
}
@@ -466,10 +465,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
retval = 0;
error:
- if (interpreter) {
- allow_write_access(interpreter);
+ if (interpreter)
fput(interpreter);
- }
kfree(interpreter_name);
kfree(exec_params.phdrs);
kfree(exec_params.loadmap);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 68fa225f89e5..31660d8cc2c6 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -247,13 +247,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
if (retval < 0)
goto ret;
- if (fmt->flags & MISC_FMT_OPEN_FILE) {
+ if (fmt->flags & MISC_FMT_OPEN_FILE)
interp_file = file_clone_open(fmt->interp_file);
- if (!IS_ERR(interp_file))
- deny_write_access(interp_file);
- } else {
+ else
interp_file = open_exec(fmt->interpreter);
- }
retval = PTR_ERR(interp_file);
if (IS_ERR(interp_file))
goto ret;
@@ -1086,4 +1083,5 @@ static void __exit exit_misc_binfmt(void)
core_initcall(init_misc_binfmt);
module_exit(exit_misc_binfmt);
+MODULE_DESCRIPTION("Kernel support for miscellaneous binaries");
MODULE_LICENSE("GPL");
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1b6625e95958..637daf6e4d45 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -155,4 +155,5 @@ static void __exit exit_script_binfmt(void)
core_initcall(init_script_binfmt);
module_exit(exit_script_binfmt);
+MODULE_DESCRIPTION("Kernel support for scripts starting with #!");
MODULE_LICENSE("GPL");
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 38cdb8875e8e..cabb558dbdaa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2856,6 +2856,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (ret)
return ret;
+ spin_lock_init(&fs_info->extent_map_shrinker_lock);
+
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
if (ret)
return ret;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 744e8952abb0..b4c9a6aa118c 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1028,7 +1028,14 @@ out_free_pre:
return ret;
}
-static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_to_scan)
+struct btrfs_em_shrink_ctx {
+ long nr_to_scan;
+ long scanned;
+ u64 last_ino;
+ u64 last_root;
+};
+
+static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_ctx *ctx)
{
const u64 cur_fs_gen = btrfs_get_fs_generation(inode->root->fs_info);
struct extent_map_tree *tree = &inode->extent_tree;
@@ -1057,14 +1064,25 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
if (!down_read_trylock(&inode->i_mmap_lock))
return 0;
- write_lock(&tree->lock);
+ /*
+ * We want to be fast because we can be called from any path trying to
+ * allocate memory, so if the lock is busy we don't want to spend time
+ * waiting for it - either some task is about to do IO for the inode or
+ * we may have another task shrinking extent maps, here in this code, so
+ * skip this inode.
+ */
+ if (!write_trylock(&tree->lock)) {
+ up_read(&inode->i_mmap_lock);
+ return 0;
+ }
+
node = rb_first_cached(&tree->map);
while (node) {
struct extent_map *em;
em = rb_entry(node, struct extent_map, rb_node);
node = rb_next(node);
- (*scanned)++;
+ ctx->scanned++;
if (em->flags & EXTENT_FLAG_PINNED)
goto next;
@@ -1085,16 +1103,18 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
free_extent_map(em);
nr_dropped++;
next:
- if (*scanned >= nr_to_scan)
+ if (ctx->scanned >= ctx->nr_to_scan)
break;
/*
- * Restart if we had to reschedule, and any extent maps that were
- * pinned before may have become unpinned after we released the
- * lock and took it again.
+ * Stop if we need to reschedule or there's contention on the
+ * lock. This is to avoid slowing other tasks trying to take the
+ * lock and because the shrinker might be called during a memory
+ * allocation path and we want to avoid taking a very long time
+ * and slowing down all sorts of tasks.
*/
- if (cond_resched_rwlock_write(&tree->lock))
- node = rb_first_cached(&tree->map);
+ if (need_resched() || rwlock_needbreak(&tree->lock))
+ break;
}
write_unlock(&tree->lock);
up_read(&inode->i_mmap_lock);
@@ -1102,25 +1122,30 @@ next:
return nr_dropped;
}
-static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_scan)
+static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_inode *inode;
long nr_dropped = 0;
- u64 min_ino = fs_info->extent_map_shrinker_last_ino + 1;
+ u64 min_ino = ctx->last_ino + 1;
inode = btrfs_find_first_inode(root, min_ino);
while (inode) {
- nr_dropped += btrfs_scan_inode(inode, scanned, nr_to_scan);
+ nr_dropped += btrfs_scan_inode(inode, ctx);
min_ino = btrfs_ino(inode) + 1;
- fs_info->extent_map_shrinker_last_ino = btrfs_ino(inode);
- iput(&inode->vfs_inode);
+ ctx->last_ino = btrfs_ino(inode);
+ btrfs_add_delayed_iput(inode);
- if (*scanned >= nr_to_scan)
+ if (ctx->scanned >= ctx->nr_to_scan)
+ break;
+
+ /*
+ * We may be called from memory allocation paths, so we don't
+ * want to take too much time and slowdown tasks.
+ */
+ if (need_resched())
break;
- cond_resched();
inode = btrfs_find_first_inode(root, min_ino);
}
@@ -1132,14 +1157,14 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s
* inode if there is one or we will find out this was the last
* one and move to the next root.
*/
- fs_info->extent_map_shrinker_last_root = btrfs_root_id(root);
+ ctx->last_root = btrfs_root_id(root);
} else {
/*
* No more inodes in this root, set extent_map_shrinker_last_ino to 0 so
* that when processing the next root we start from its first inode.
*/
- fs_info->extent_map_shrinker_last_ino = 0;
- fs_info->extent_map_shrinker_last_root = btrfs_root_id(root) + 1;
+ ctx->last_ino = 0;
+ ctx->last_root = btrfs_root_id(root) + 1;
}
return nr_dropped;
@@ -1147,19 +1172,41 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s
long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
{
- const u64 start_root_id = fs_info->extent_map_shrinker_last_root;
- u64 next_root_id = start_root_id;
+ struct btrfs_em_shrink_ctx ctx;
+ u64 start_root_id;
+ u64 next_root_id;
bool cycled = false;
long nr_dropped = 0;
- long scanned = 0;
+
+ ctx.scanned = 0;
+ ctx.nr_to_scan = nr_to_scan;
+
+ /*
+ * In case we have multiple tasks running this shrinker, make the next
+ * one start from the next inode in case it starts before we finish.
+ */
+ spin_lock(&fs_info->extent_map_shrinker_lock);
+ ctx.last_ino = fs_info->extent_map_shrinker_last_ino;
+ fs_info->extent_map_shrinker_last_ino++;
+ ctx.last_root = fs_info->extent_map_shrinker_last_root;
+ spin_unlock(&fs_info->extent_map_shrinker_lock);
+
+ start_root_id = ctx.last_root;
+ next_root_id = ctx.last_root;
if (trace_btrfs_extent_map_shrinker_scan_enter_enabled()) {
s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
- trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan, nr);
+ trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan,
+ nr, ctx.last_root,
+ ctx.last_ino);
}
- while (scanned < nr_to_scan) {
+ /*
+ * We may be called from memory allocation paths, so we don't want to
+ * take too much time and slowdown tasks, so stop if we need reschedule.
+ */
+ while (ctx.scanned < ctx.nr_to_scan && !need_resched()) {
struct btrfs_root *root;
unsigned long count;
@@ -1171,8 +1218,8 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
spin_unlock(&fs_info->fs_roots_radix_lock);
if (start_root_id > 0 && !cycled) {
next_root_id = 0;
- fs_info->extent_map_shrinker_last_root = 0;
- fs_info->extent_map_shrinker_last_ino = 0;
+ ctx.last_root = 0;
+ ctx.last_ino = 0;
cycled = true;
continue;
}
@@ -1186,15 +1233,33 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
continue;
if (is_fstree(btrfs_root_id(root)))
- nr_dropped += btrfs_scan_root(root, &scanned, nr_to_scan);
+ nr_dropped += btrfs_scan_root(root, &ctx);
btrfs_put_root(root);
}
+ /*
+ * In case of multiple tasks running this extent map shrinking code this
+ * isn't perfect but it's simple and silences things like KCSAN. It's
+ * not possible to know which task made more progress because we can
+ * cycle back to the first root and first inode if it's not the first
+ * time the shrinker ran, see the above logic. Also a task that started
+ * later may finish ealier than another task and made less progress. So
+ * make this simple and update to the progress of the last task that
+ * finished, with the occasional possiblity of having two consecutive
+ * runs of the shrinker process the same inodes.
+ */
+ spin_lock(&fs_info->extent_map_shrinker_lock);
+ fs_info->extent_map_shrinker_last_ino = ctx.last_ino;
+ fs_info->extent_map_shrinker_last_root = ctx.last_root;
+ spin_unlock(&fs_info->extent_map_shrinker_lock);
+
if (trace_btrfs_extent_map_shrinker_scan_exit_enabled()) {
s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
- trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, nr);
+ trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped,
+ nr, ctx.last_root,
+ ctx.last_ino);
}
return nr_dropped;
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 89f0650631cd..833dc3fe0a38 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -630,6 +630,7 @@ struct btrfs_fs_info {
s32 delalloc_batch;
struct percpu_counter evictable_extent_maps;
+ spinlock_t extent_map_shrinker_lock;
u64 extent_map_shrinker_last_root;
u64 extent_map_shrinker_last_ino;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3a2b902b2d1f..d62c96f00ff8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5587,7 +5587,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
args.ino = ino;
args.root = root;
- inode = iget5_locked(s, hashval, btrfs_find_actor,
+ inode = iget5_locked_rcu(s, hashval, btrfs_find_actor,
btrfs_init_locked_inode,
(void *)&args);
return inode;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index efd5d6e9589e..6ad524b894fc 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4627,7 +4627,7 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
goto out_iov;
init_sync_kiocb(&kiocb, file);
- ret = kiocb_set_rw_flags(&kiocb, 0);
+ ret = kiocb_set_rw_flags(&kiocb, 0, WRITE);
if (ret)
goto out_iov;
kiocb.ki_pos = pos;
diff --git a/fs/buffer.c b/fs/buffer.c
index 8c19e705b9c3..dbe8f411ce52 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -258,7 +258,6 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
} else {
clear_buffer_uptodate(bh);
buffer_io_error(bh, ", async page read");
- folio_set_error(folio);
}
/*
@@ -391,7 +390,6 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
buffer_io_error(bh, ", lost async page write");
mark_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
- folio_set_error(folio);
}
first = folio_buffers(folio);
@@ -1960,7 +1958,6 @@ recover:
clear_buffer_dirty(bh);
}
} while ((bh = bh->b_this_page) != head);
- folio_set_error(folio);
BUG_ON(folio_test_writeback(folio));
mapping_set_error(folio->mapping, err);
folio_start_writeback(folio);
@@ -2405,10 +2402,8 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
if (iblock < lblock) {
WARN_ON(bh->b_size != blocksize);
err = get_block(inode, iblock, bh, 0);
- if (err) {
- folio_set_error(folio);
+ if (err)
page_error = true;
- }
}
if (!buffer_mapped(bh)) {
folio_zero_range(folio, i * blocksize,
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index f449f7340aad..9fb06dc16520 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/namei.h>
+#include <trace/events/fscache.h>
#include "internal.h"
/*
@@ -312,19 +313,59 @@ static void cachefiles_withdraw_objects(struct cachefiles_cache *cache)
}
/*
- * Withdraw volumes.
+ * Withdraw fscache volumes.
+ */
+static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache)
+{
+ struct list_head *cur;
+ struct cachefiles_volume *volume;
+ struct fscache_volume *vcookie;
+
+ _enter("");
+retry:
+ spin_lock(&cache->object_list_lock);
+ list_for_each(cur, &cache->volumes) {
+ volume = list_entry(cur, struct cachefiles_volume, cache_link);
+
+ if (atomic_read(&volume->vcookie->n_accesses) == 0)
+ continue;
+
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ fscache_withdraw_volume(vcookie);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
+ goto retry;
+ }
+ }
+ spin_unlock(&cache->object_list_lock);
+
+ _leave("");
+}
+
+/*
+ * Withdraw cachefiles volumes.
*/
static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
{
_enter("");
for (;;) {
+ struct fscache_volume *vcookie = NULL;
struct cachefiles_volume *volume = NULL;
spin_lock(&cache->object_list_lock);
if (!list_empty(&cache->volumes)) {
volume = list_first_entry(&cache->volumes,
struct cachefiles_volume, cache_link);
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (!vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ cpu_relax();
+ continue;
+ }
list_del_init(&volume->cache_link);
}
spin_unlock(&cache->object_list_lock);
@@ -332,6 +373,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
break;
cachefiles_withdraw_volume(volume);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
}
_leave("");
@@ -371,6 +413,7 @@ void cachefiles_withdraw_cache(struct cachefiles_cache *cache)
pr_info("File cache on %s unregistering\n", fscache->name);
fscache_withdraw_cache(fscache);
+ cachefiles_withdraw_fscache_volumes(cache);
/* we now have to destroy all the active objects pertaining to this
* cache - which we do by passing them off to thread pool to be
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 06cdf1a8a16f..89b11336a836 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -366,14 +366,14 @@ static __poll_t cachefiles_daemon_poll(struct file *file,
if (cachefiles_in_ondemand_mode(cache)) {
if (!xa_empty(&cache->reqs)) {
- rcu_read_lock();
+ xas_lock(&xas);
xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
if (!cachefiles_ondemand_is_reopening_read(req)) {
mask |= EPOLLIN;
break;
}
}
- rcu_read_unlock();
+ xas_unlock(&xas);
}
} else {
if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 6845a90cdfcc..7b99bd98de75 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -48,6 +48,7 @@ enum cachefiles_object_state {
CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */
CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */
CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */
+ CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */
};
struct cachefiles_ondemand_info {
@@ -128,6 +129,7 @@ struct cachefiles_cache {
unsigned long req_id_next;
struct xarray ondemand_ids; /* xarray for ondemand_id allocation */
u32 ondemand_id_next;
+ u32 msg_id_next;
};
static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache)
@@ -335,6 +337,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \
CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN);
CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE);
CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING);
+CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING);
static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
{
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index bce005f2b456..470c96658385 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -517,7 +517,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
*/
xas_lock(&xas);
- if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ if (test_bit(CACHEFILES_DEAD, &cache->flags) ||
+ cachefiles_ondemand_object_is_dropping(object)) {
xas_unlock(&xas);
ret = -EIO;
goto out;
@@ -527,20 +528,32 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
smp_mb();
if (opcode == CACHEFILES_OP_CLOSE &&
- !cachefiles_ondemand_object_is_open(object)) {
+ !cachefiles_ondemand_object_is_open(object)) {
WARN_ON_ONCE(object->ondemand->ondemand_id == 0);
xas_unlock(&xas);
ret = -EIO;
goto out;
}
- xas.xa_index = 0;
+ /*
+ * Cyclically find a free xas to avoid msg_id reuse that would
+ * cause the daemon to successfully copen a stale msg_id.
+ */
+ xas.xa_index = cache->msg_id_next;
xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART) {
+ xas.xa_index = 0;
+ xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK);
+ }
if (xas.xa_node == XAS_RESTART)
xas_set_err(&xas, -EBUSY);
+
xas_store(&xas, req);
- xas_clear_mark(&xas, XA_FREE_MARK);
- xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ if (xas_valid(&xas)) {
+ cache->msg_id_next = xas.xa_index + 1;
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ }
xas_unlock(&xas);
} while (xas_nomem(&xas, GFP_KERNEL));
@@ -568,7 +581,8 @@ out:
* If error occurs after creating the anonymous fd,
* cachefiles_ondemand_fd_release() will set object to close.
*/
- if (opcode == CACHEFILES_OP_OPEN)
+ if (opcode == CACHEFILES_OP_OPEN &&
+ !cachefiles_ondemand_object_is_dropping(object))
cachefiles_ondemand_set_object_close(object);
kfree(req);
return ret;
@@ -667,8 +681,34 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
{
+ unsigned long index;
+ struct cachefiles_req *req;
+ struct cachefiles_cache *cache;
+
+ if (!object->ondemand)
+ return;
+
cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0,
cachefiles_ondemand_init_close_req, NULL);
+
+ if (!object->ondemand->ondemand_id)
+ return;
+
+ /* Cancel all requests for the object that is being dropped. */
+ cache = object->volume->cache;
+ xa_lock(&cache->reqs);
+ cachefiles_ondemand_set_object_dropping(object);
+ xa_for_each(&cache->reqs, index, req) {
+ if (req->object == object) {
+ req->error = -EIO;
+ complete(&req->done);
+ __xa_erase(&cache->reqs, index);
+ }
+ }
+ xa_unlock(&cache->reqs);
+
+ /* Wait for ondemand_object_worker() to finish to avoid UAF. */
+ cancel_work_sync(&object->ondemand->ondemand_work);
}
int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c
index 89df0ba8ba5e..781aac4ef274 100644
--- a/fs/cachefiles/volume.c
+++ b/fs/cachefiles/volume.c
@@ -133,7 +133,6 @@ void cachefiles_free_volume(struct fscache_volume *vcookie)
void cachefiles_withdraw_volume(struct cachefiles_volume *volume)
{
- fscache_withdraw_volume(volume->vcookie);
cachefiles_set_volume_xattr(volume);
__cachefiles_free_volume(volume);
}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index bcb6173943ee..4dd8a993c60a 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
if (xlen == 0)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen);
if (xlen != tlen) {
- if (xlen < 0)
+ if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(object, file_inode(file), xlen,
cachefiles_trace_getxattr_error);
+ }
if (xlen == -EIO)
cachefiles_io_error_obj(
object,
@@ -252,6 +254,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len);
if (xlen != len) {
if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen,
cachefiles_trace_getxattr_error);
if (xlen == -EIO)
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index ccdbec388091..40f84d014524 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -31,15 +31,7 @@ static int coda_symlink_filler(struct file *file, struct folio *folio)
cii = ITOC(inode);
error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len);
- if (error)
- goto fail;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
-
-fail:
- folio_set_error(folio);
- folio_unlock(folio);
+ folio_end_read(folio, error == 0);
return error;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 18677cd4e62f..43d6bde1adcc 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -580,6 +580,7 @@ static void detach_attrs(struct config_item * item)
static int populate_attrs(struct config_item *item)
{
const struct config_item_type *t = item->ci_type;
+ struct configfs_group_operations *ops;
struct configfs_attribute *attr;
struct configfs_bin_attribute *bin_attr;
int error = 0;
@@ -587,14 +588,23 @@ static int populate_attrs(struct config_item *item)
if (!t)
return -EINVAL;
+
+ ops = t->ct_group_ops;
+
if (t->ct_attrs) {
for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) {
+ if (ops && ops->is_visible && !ops->is_visible(item, attr, i))
+ continue;
+
if ((error = configfs_create_file(item, attr)))
break;
}
}
if (t->ct_bin_attrs) {
for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) {
+ if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i))
+ continue;
+
error = configfs_create_bin_file(item, bin_attr);
if (error)
break;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 460690ca0174..b84d1747a020 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -811,19 +811,19 @@ out:
static int cramfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
u32 maxblock;
int bytes_filled;
void *pgdata;
+ bool success = false;
maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
bytes_filled = 0;
- pgdata = kmap_local_page(page);
+ pgdata = kmap_local_folio(folio, 0);
- if (page->index < maxblock) {
+ if (folio->index < maxblock) {
struct super_block *sb = inode->i_sb;
- u32 blkptr_offset = OFFSET(inode) + page->index * 4;
+ u32 blkptr_offset = OFFSET(inode) + folio->index * 4;
u32 block_ptr, block_start, block_len;
bool uncompressed, direct;
@@ -844,7 +844,7 @@ static int cramfs_read_folio(struct file *file, struct folio *folio)
if (uncompressed) {
block_len = PAGE_SIZE;
/* if last block: cap to file length */
- if (page->index == maxblock - 1)
+ if (folio->index == maxblock - 1)
block_len =
offset_in_page(inode->i_size);
} else {
@@ -861,7 +861,7 @@ static int cramfs_read_folio(struct file *file, struct folio *folio)
* from the previous block's pointer.
*/
block_start = OFFSET(inode) + maxblock * 4;
- if (page->index)
+ if (folio->index)
block_start = *(u32 *)
cramfs_read(sb, blkptr_offset - 4, 4);
/* Beware... previous ptr might be a direct ptr */
@@ -906,17 +906,12 @@ static int cramfs_read_folio(struct file *file, struct folio *folio)
}
memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled);
- flush_dcache_page(page);
- kunmap_local(pgdata);
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
+ flush_dcache_folio(folio);
+ success = true;
err:
kunmap_local(pgdata);
- ClearPageUptodate(page);
- SetPageError(page);
- unlock_page(page);
+ folio_end_read(folio, success);
return 0;
}
@@ -1003,4 +998,5 @@ static void __exit exit_cramfs_fs(void)
module_init(init_cramfs_fs)
module_exit(exit_cramfs_fs)
+MODULE_DESCRIPTION("Compressed ROM file system support");
MODULE_LICENSE("GPL");
diff --git a/fs/dcache.c b/fs/dcache.c
index d58dc9e58f3b..8bdc278a0205 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,8 @@
#include "internal.h"
#include "mount.h"
+#include <asm/runtime-const.h>
+
/*
* Usage:
* dcache->d_inode->i_lock protects:
@@ -100,9 +102,10 @@ static unsigned int d_hash_shift __ro_after_init;
static struct hlist_bl_head *dentry_hashtable __ro_after_init;
-static inline struct hlist_bl_head *d_hash(unsigned int hash)
+static inline struct hlist_bl_head *d_hash(unsigned long hashlen)
{
- return dentry_hashtable + (hash >> d_hash_shift);
+ return runtime_const_ptr(dentry_hashtable) +
+ runtime_const_shift_right_32(hashlen, d_hash_shift);
}
#define IN_LOOKUP_SHIFT 10
@@ -355,7 +358,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
flags &= ~DCACHE_ENTRY_TYPE;
WRITE_ONCE(dentry->d_flags, flags);
dentry->d_inode = NULL;
- if (flags & DCACHE_LRU_LIST)
+ /*
+ * The negative counter only tracks dentries on the LRU. Don't inc if
+ * d_lru is on another list.
+ */
+ if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
this_cpu_inc(nr_dentry_negative);
}
@@ -1548,7 +1555,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
{
struct dentry *dentry;
- WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
+ rwsem_assert_held_write(&sb->s_umount);
dentry = sb->s_root;
sb->s_root = NULL;
@@ -1844,9 +1851,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
spin_lock(&dentry->d_lock);
/*
- * Decrement negative dentry count if it was in the LRU list.
+ * The negative counter only tracks dentries on the LRU. Don't dec if
+ * d_lru is on another list.
*/
- if (dentry->d_flags & DCACHE_LRU_LIST)
+ if ((dentry->d_flags &
+ (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
this_cpu_dec(nr_dentry_negative);
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
raw_write_seqcount_begin(&dentry->d_seq);
@@ -2104,7 +2113,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare(
unsigned *seqp)
{
u64 hashlen = name->hash_len;
- struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
+ struct hlist_bl_head *b = d_hash(hashlen);
struct hlist_bl_node *node;
struct dentry *dentry;
@@ -2171,7 +2180,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
{
u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
- struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
+ struct hlist_bl_head *b = d_hash(hashlen);
struct hlist_bl_node *node;
struct dentry *dentry;
@@ -3097,6 +3106,34 @@ void d_tmpfile(struct file *file, struct inode *inode)
}
EXPORT_SYMBOL(d_tmpfile);
+/*
+ * Obtain inode number of the parent dentry.
+ */
+ino_t d_parent_ino(struct dentry *dentry)
+{
+ struct dentry *parent;
+ struct inode *iparent;
+ unsigned seq;
+ ino_t ret;
+
+ scoped_guard(rcu) {
+ seq = raw_seqcount_begin(&dentry->d_seq);
+ parent = READ_ONCE(dentry->d_parent);
+ iparent = d_inode_rcu(parent);
+ if (likely(iparent)) {
+ ret = iparent->i_ino;
+ if (!read_seqcount_retry(&dentry->d_seq, seq))
+ return ret;
+ }
+ }
+
+ spin_lock(&dentry->d_lock);
+ ret = dentry->d_parent->d_inode->i_ino;
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+EXPORT_SYMBOL(d_parent_ino);
+
static __initdata unsigned long dhash_entries;
static int __init set_dhash_entries(char *str)
{
@@ -3126,6 +3163,9 @@ static void __init dcache_init_early(void)
0,
0);
d_hash_shift = 32 - d_hash_shift;
+
+ runtime_const_init(shift, d_hash_shift);
+ runtime_const_init(ptr, dentry_hashtable);
}
static void __init dcache_init(void)
@@ -3154,6 +3194,9 @@ static void __init dcache_init(void)
0,
0);
d_hash_shift = 32 - d_hash_shift;
+
+ runtime_const_init(shift, d_hash_shift);
+ runtime_const_init(ptr, dentry_hashtable);
}
/* SLAB cache for __getname() consumers */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 8fd928899a59..91521576f500 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -92,9 +92,9 @@ enum {
};
static const struct fs_parameter_spec debugfs_param_specs[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_u32oct ("mode", Opt_mode),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -102,8 +102,6 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
{
struct debugfs_fs_info *opts = fc->s_fs_info;
struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
int opt;
opt = fs_parse(fc, debugfs_param_specs, param, &result);
@@ -120,16 +118,10 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
switch (opt) {
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return invalf(fc, "Unknown uid");
- opts->uid = uid;
+ opts->uid = result.uid;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return invalf(fc, "Unknown gid");
- opts->gid = gid;
+ opts->gid = result.gid;
break;
case Opt_mode:
opts->mode = result.uint_32 & S_IALLUGO;
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index bb14462f6d99..a929f1b613be 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -275,8 +275,8 @@ enum {
};
static const struct fs_parameter_spec efivarfs_parameters[] = {
- fsparam_u32("uid", Opt_uid),
- fsparam_u32("gid", Opt_gid),
+ fsparam_uid("uid", Opt_uid),
+ fsparam_gid("gid", Opt_gid),
{},
};
@@ -293,14 +293,10 @@ static int efivarfs_parse_param(struct fs_context *fc, struct fs_parameter *para
switch (opt) {
case Opt_uid:
- opts->uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(opts->uid))
- return -EINVAL;
+ opts->uid = result.uid;
break;
case Opt_gid:
- opts->gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(opts->gid))
- return -EINVAL;
+ opts->gid = result.gid;
break;
default:
return -EINVAL;
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 7844ab24b813..462619e59766 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -311,4 +311,5 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) {
return 0;
}
+MODULE_DESCRIPTION("Extent File System (efs)");
MODULE_LICENSE("GPL");
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 3b03a573cb1a..7749feded722 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -14,10 +14,9 @@
static int efs_symlink_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- char *link = page_address(page);
- struct buffer_head * bh;
- struct inode * inode = page->mapping->host;
+ char *link = folio_address(folio);
+ struct buffer_head *bh;
+ struct inode *inode = folio->mapping->host;
efs_block_t size = inode->i_size;
int err;
@@ -40,12 +39,9 @@ static int efs_symlink_read_folio(struct file *file, struct folio *folio)
brelse(bh);
}
link[size] = '\0';
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
+ err = 0;
fail:
- SetPageError(page);
- unlock_page(page);
+ folio_end_read(folio, err == 0);
return err;
}
diff --git a/fs/exec.c b/fs/exec.c
index 40073142288f..a47d0e4c54f6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -486,6 +486,35 @@ static int count_strings_kernel(const char *const *argv)
return i;
}
+static inline int bprm_set_stack_limit(struct linux_binprm *bprm,
+ unsigned long limit)
+{
+#ifdef CONFIG_MMU
+ /* Avoid a pathological bprm->p. */
+ if (bprm->p < limit)
+ return -E2BIG;
+ bprm->argmin = bprm->p - limit;
+#endif
+ return 0;
+}
+static inline bool bprm_hit_stack_limit(struct linux_binprm *bprm)
+{
+#ifdef CONFIG_MMU
+ return bprm->p < bprm->argmin;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Calculate bprm->argmin from:
+ * - _STK_LIM
+ * - ARG_MAX
+ * - bprm->rlim_stack.rlim_cur
+ * - bprm->argc
+ * - bprm->envc
+ * - bprm->p
+ */
static int bprm_stack_limits(struct linux_binprm *bprm)
{
unsigned long limit, ptr_size;
@@ -505,6 +534,9 @@ static int bprm_stack_limits(struct linux_binprm *bprm)
* of argument strings even with small stacks
*/
limit = max_t(unsigned long, limit, ARG_MAX);
+ /* Reject totally pathological counts. */
+ if (bprm->argc < 0 || bprm->envc < 0)
+ return -E2BIG;
/*
* We must account for the size of all the argv and envp pointers to
* the argv and envp strings, since they will also take up space in
@@ -518,13 +550,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm)
* argc can never be 0, to keep them from walking envp by accident.
* See do_execveat_common().
*/
- ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
+ if (check_add_overflow(max(bprm->argc, 1), bprm->envc, &ptr_size) ||
+ check_mul_overflow(ptr_size, sizeof(void *), &ptr_size))
+ return -E2BIG;
if (limit <= ptr_size)
return -E2BIG;
limit -= ptr_size;
- bprm->argmin = bprm->p - limit;
- return 0;
+ return bprm_set_stack_limit(bprm, limit);
}
/*
@@ -562,10 +595,8 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
pos = bprm->p;
str += len;
bprm->p -= len;
-#ifdef CONFIG_MMU
- if (bprm->p < bprm->argmin)
+ if (bprm_hit_stack_limit(bprm))
goto out;
-#endif
while (len > 0) {
int offset, bytes_to_copy;
@@ -640,7 +671,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
/* We're going to work our way backwards. */
arg += len;
bprm->p -= len;
- if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
+ if (bprm_hit_stack_limit(bprm))
return -E2BIG;
while (len > 0) {
@@ -952,10 +983,6 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
path_noexec(&file->f_path)))
goto exit;
- err = deny_write_access(file);
- if (err)
- goto exit;
-
out:
return file;
@@ -971,8 +998,7 @@ exit:
*
* Returns ERR_PTR on failure or allocated struct file on success.
*
- * As this is a wrapper for the internal do_open_execat(), callers
- * must call allow_write_access() before fput() on release. Also see
+ * As this is a wrapper for the internal do_open_execat(). Also see
* do_close_execat().
*/
struct file *open_exec(const char *name)
@@ -1524,10 +1550,8 @@ static int prepare_bprm_creds(struct linux_binprm *bprm)
/* Matches do_open_execat() */
static void do_close_execat(struct file *file)
{
- if (!file)
- return;
- allow_write_access(file);
- fput(file);
+ if (file)
+ fput(file);
}
static void free_bprm(struct linux_binprm *bprm)
@@ -1846,7 +1870,6 @@ static int exec_binprm(struct linux_binprm *bprm)
bprm->file = bprm->interpreter;
bprm->interpreter = NULL;
- allow_write_access(exec);
if (unlikely(bprm->have_execfd)) {
if (bprm->executable) {
fput(exec);
@@ -2211,3 +2234,7 @@ static int __init init_fs_exec_sysctls(void)
fs_initcall(init_fs_exec_sysctls);
#endif /* CONFIG_SYSCTL */
+
+#ifdef CONFIG_EXEC_KUNIT_TEST
+#include "exec_test.c"
+#endif
diff --git a/fs/exec_test.c b/fs/exec_test.c
new file mode 100644
index 000000000000..7c77d039680b
--- /dev/null
+++ b/fs/exec_test.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <kunit/test.h>
+
+struct bprm_stack_limits_result {
+ struct linux_binprm bprm;
+ int expected_rc;
+ unsigned long expected_argmin;
+};
+
+static const struct bprm_stack_limits_result bprm_stack_limits_results[] = {
+ /* Negative argc/envc counts produce -E2BIG */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = INT_MIN, .envc = INT_MIN }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = 5, .envc = -1 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = -1, .envc = 10 }, .expected_rc = -E2BIG },
+ /* The max value of argc or envc is MAX_ARG_STRINGS. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = INT_MAX, .envc = INT_MAX }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = MAX_ARG_STRINGS, .envc = MAX_ARG_STRINGS }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = 0, .envc = MAX_ARG_STRINGS }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = MAX_ARG_STRINGS, .envc = 0 }, .expected_rc = -E2BIG },
+ /*
+ * On 32-bit system these argc and envc counts, while likely impossible
+ * to represent within the associated TASK_SIZE, could overflow the
+ * limit calculation, and bypass the ptr_size <= limit check.
+ */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = 0x20000001, .envc = 0x20000001 }, .expected_rc = -E2BIG },
+#ifdef CONFIG_MMU
+ /* Make sure a pathological bprm->p doesn't cause an overflow. */
+ { { .p = sizeof(void *), .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = 10, .envc = 10 }, .expected_rc = -E2BIG },
+#endif
+ /*
+ * 0 rlim_stack will get raised to ARG_MAX. With 1 string pointer,
+ * we should see p - ARG_MAX + sizeof(void *).
+ */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 1, .envc = 0 }, .expected_argmin = ULONG_MAX - ARG_MAX + sizeof(void *)},
+ /* Validate that argc is always raised to a minimum of 1. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 0, .envc = 0 }, .expected_argmin = ULONG_MAX - ARG_MAX + sizeof(void *)},
+ /*
+ * 0 rlim_stack will get raised to ARG_MAX. With pointers filling ARG_MAX,
+ * we should see -E2BIG. (Note argc is always raised to at least 1.)
+ */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = ARG_MAX / sizeof(void *), .envc = 0 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = ARG_MAX / sizeof(void *) + 1, .envc = 0 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) }, .expected_rc = -E2BIG },
+ /* And with one less, we see space for exactly 1 pointer. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = (ARG_MAX / sizeof(void *)) - 1, .envc = 0 },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 0, .envc = (ARG_MAX / sizeof(void *)) - 2, },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ /* If we raise rlim_stack / 4 to exactly ARG_MAX, nothing changes. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = ARG_MAX / sizeof(void *), .envc = 0 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = ARG_MAX / sizeof(void *) + 1, .envc = 0 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = (ARG_MAX / sizeof(void *)) - 1, .envc = 0 },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = 0, .envc = (ARG_MAX / sizeof(void *)) - 2, },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ /* But raising it another pointer * 4 will provide space for 1 more pointer. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = (ARG_MAX + sizeof(void *)) * 4,
+ .argc = ARG_MAX / sizeof(void *), .envc = 0 },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = (ARG_MAX + sizeof(void *)) * 4,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ /* Raising rlim_stack / 4 to _STK_LIM / 4 * 3 will see more space. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3),
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3),
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ /* But raising it any further will see no increase. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3 + sizeof(void *)),
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * + sizeof(void *)),
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM,
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM,
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+};
+
+static void exec_test_bprm_stack_limits(struct kunit *test)
+{
+ /* Double-check the constants. */
+ KUNIT_EXPECT_EQ(test, _STK_LIM, SZ_8M);
+ KUNIT_EXPECT_EQ(test, ARG_MAX, 32 * SZ_4K);
+ KUNIT_EXPECT_EQ(test, MAX_ARG_STRINGS, 0x7FFFFFFF);
+
+ for (int i = 0; i < ARRAY_SIZE(bprm_stack_limits_results); i++) {
+ const struct bprm_stack_limits_result *result = &bprm_stack_limits_results[i];
+ struct linux_binprm bprm = result->bprm;
+ int rc;
+
+ rc = bprm_stack_limits(&bprm);
+ KUNIT_EXPECT_EQ_MSG(test, rc, result->expected_rc, "on loop %d", i);
+#ifdef CONFIG_MMU
+ KUNIT_EXPECT_EQ_MSG(test, bprm.argmin, result->expected_argmin, "on loop %d", i);
+#endif
+ }
+}
+
+static struct kunit_case exec_test_cases[] = {
+ KUNIT_CASE(exec_test_bprm_stack_limits),
+ {},
+};
+
+static struct kunit_suite exec_test_suite = {
+ .name = "exec",
+ .test_cases = exec_test_cases,
+};
+
+kunit_test_suite(exec_test_suite);
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 3d5ea2cfad66..a3c7173ef693 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -225,8 +225,8 @@ static const struct constant_table exfat_param_enums[] = {
};
static const struct fs_parameter_spec exfat_parameters[] = {
- fsparam_u32("uid", Opt_uid),
- fsparam_u32("gid", Opt_gid),
+ fsparam_uid("uid", Opt_uid),
+ fsparam_gid("gid", Opt_gid),
fsparam_u32oct("umask", Opt_umask),
fsparam_u32oct("dmask", Opt_dmask),
fsparam_u32oct("fmask", Opt_fmask),
@@ -262,10 +262,10 @@ static int exfat_parse_param(struct fs_context *fc, struct fs_parameter *param)
switch (opt) {
case Opt_uid:
- opts->fs_uid = make_kuid(current_user_ns(), result.uint_32);
+ opts->fs_uid = result.uid;
break;
case Opt_gid:
- opts->fs_gid = make_kgid(current_user_ns(), result.uint_32);
+ opts->fs_gid = result.gid;
break;
case Opt_umask:
opts->fs_fmask = result.uint_32;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 07ea3d62b298..4f2dd4ab4486 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -427,7 +427,7 @@ EXPORT_SYMBOL_GPL(exportfs_encode_fh);
struct dentry *
exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
- int fileid_type,
+ int fileid_type, unsigned int flags,
int (*acceptable)(void *, struct dentry *),
void *context)
{
@@ -445,6 +445,11 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
if (IS_ERR_OR_NULL(result))
return result;
+ if ((flags & EXPORT_FH_DIR_ONLY) && !d_is_dir(result)) {
+ err = -ENOTDIR;
+ goto err_result;
+ }
+
/*
* If no acceptance criteria was specified by caller, a disconnected
* dentry is also accepatable. Callers may use this mode to query if
@@ -581,7 +586,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
{
struct dentry *ret;
- ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type,
+ ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 0,
acceptable, context);
if (IS_ERR_OR_NULL(ret)) {
if (ret == ERR_PTR(-ENOMEM))
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 7ae0b61258a7..0a056d97e640 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -31,11 +31,10 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
ext4_fname_from_fscrypt_name(fname, &name);
-#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, iname, fname);
if (err)
ext4_fname_free_filename(fname);
-#endif
+
return err;
}
@@ -51,11 +50,9 @@ int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry,
ext4_fname_from_fscrypt_name(fname, &name);
-#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
if (err)
ext4_fname_free_filename(fname);
-#endif
return err;
}
@@ -70,10 +67,7 @@ void ext4_fname_free_filename(struct ext4_filename *fname)
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
-#if IS_ENABLED(CONFIG_UNICODE)
- kfree(fname->cf_name.name);
- fname->cf_name.name = NULL;
-#endif
+ ext4_fname_free_ci_filename(fname);
}
static bool uuid_is_zero(__u8 u[16])
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 983dad8c07ec..8007abd4972d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2511,7 +2511,7 @@ struct ext4_filename {
struct fscrypt_str crypto_buf;
#endif
#if IS_ENABLED(CONFIG_UNICODE)
- struct fscrypt_str cf_name;
+ struct qstr cf_name;
#endif
};
@@ -2745,8 +2745,25 @@ ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
#if IS_ENABLED(CONFIG_UNICODE)
extern int ext4_fname_setup_ci_filename(struct inode *dir,
- const struct qstr *iname,
- struct ext4_filename *fname);
+ const struct qstr *iname,
+ struct ext4_filename *fname);
+
+static inline void ext4_fname_free_ci_filename(struct ext4_filename *fname)
+{
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+}
+#else
+static inline int ext4_fname_setup_ci_filename(struct inode *dir,
+ const struct qstr *iname,
+ struct ext4_filename *fname)
+{
+ return 0;
+}
+
+static inline void ext4_fname_free_ci_filename(struct ext4_filename *fname)
+{
+}
#endif
/* ext4 encryption related stuff goes here crypto.c */
@@ -2769,16 +2786,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
int lookup,
struct ext4_filename *fname)
{
- int err = 0;
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
-#if IS_ENABLED(CONFIG_UNICODE)
- err = ext4_fname_setup_ci_filename(dir, iname, fname);
-#endif
-
- return err;
+ return ext4_fname_setup_ci_filename(dir, iname, fname);
}
static inline int ext4_fname_prepare_lookup(struct inode *dir,
@@ -2790,10 +2802,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
static inline void ext4_fname_free_filename(struct ext4_filename *fname)
{
-#if IS_ENABLED(CONFIG_UNICODE)
- kfree(fname->cf_name.name);
- fname->cf_name.name = NULL;
-#endif
+ ext4_fname_free_ci_filename(fname);
}
static inline int ext4_ioctl_get_encryption_pwsalt(struct file *filp,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a630b27a4cc6..e6769b97a970 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1390,62 +1390,11 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
}
#if IS_ENABLED(CONFIG_UNICODE)
-/*
- * Test whether a case-insensitive directory entry matches the filename
- * being searched for. If quick is set, assume the name being looked up
- * is already in the casefolded form.
- *
- * Returns: 0 if the directory entry matches, more than 0 if it
- * doesn't match or less than zero on error.
- */
-static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
- u8 *de_name, size_t de_name_len, bool quick)
-{
- const struct super_block *sb = parent->i_sb;
- const struct unicode_map *um = sb->s_encoding;
- struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
- struct qstr entry = QSTR_INIT(de_name, de_name_len);
- int ret;
-
- if (IS_ENCRYPTED(parent)) {
- const struct fscrypt_str encrypted_name =
- FSTR_INIT(de_name, de_name_len);
-
- decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
- if (!decrypted_name.name)
- return -ENOMEM;
- ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
- &decrypted_name);
- if (ret < 0)
- goto out;
- entry.name = decrypted_name.name;
- entry.len = decrypted_name.len;
- }
-
- if (quick)
- ret = utf8_strncasecmp_folded(um, name, &entry);
- else
- ret = utf8_strncasecmp(um, name, &entry);
- if (ret < 0) {
- /* Handle invalid character sequence as either an error
- * or as an opaque byte sequence.
- */
- if (sb_has_strict_encoding(sb))
- ret = -EINVAL;
- else if (name->len != entry.len)
- ret = 1;
- else
- ret = !!memcmp(name->name, entry.name, entry.len);
- }
-out:
- kfree(decrypted_name.name);
- return ret;
-}
-
int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
struct ext4_filename *name)
{
- struct fscrypt_str *cf_name = &name->cf_name;
+ struct qstr *cf_name = &name->cf_name;
+ unsigned char *buf;
struct dx_hash_info *hinfo = &name->hinfo;
int len;
@@ -1455,18 +1404,18 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
return 0;
}
- cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
- if (!cf_name->name)
+ buf = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
+ if (!buf)
return -ENOMEM;
- len = utf8_casefold(dir->i_sb->s_encoding,
- iname, cf_name->name,
- EXT4_NAME_LEN);
+ len = utf8_casefold(dir->i_sb->s_encoding, iname, buf, EXT4_NAME_LEN);
if (len <= 0) {
- kfree(cf_name->name);
- cf_name->name = NULL;
+ kfree(buf);
+ buf = NULL;
}
+ cf_name->name = buf;
cf_name->len = (unsigned) len;
+
if (!IS_ENCRYPTED(dir))
return 0;
@@ -1502,22 +1451,29 @@ static bool ext4_match(struct inode *parent,
#if IS_ENABLED(CONFIG_UNICODE)
if (IS_CASEFOLDED(parent) &&
(!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
- if (fname->cf_name.name) {
- struct qstr cf = {.name = fname->cf_name.name,
- .len = fname->cf_name.len};
- if (IS_ENCRYPTED(parent)) {
- if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
- fname->hinfo.minor_hash !=
- EXT4_DIRENT_MINOR_HASH(de)) {
-
- return false;
- }
- }
- return !ext4_ci_compare(parent, &cf, de->name,
- de->name_len, true);
- }
- return !ext4_ci_compare(parent, fname->usr_fname, de->name,
- de->name_len, false);
+ /*
+ * Just checking IS_ENCRYPTED(parent) below is not
+ * sufficient to decide whether one can use the hash for
+ * skipping the string comparison, because the key might
+ * have been added right after
+ * ext4_fname_setup_ci_filename(). In this case, a hash
+ * mismatch will be a false negative. Therefore, make
+ * sure cf_name was properly initialized before
+ * considering the calculated hash.
+ */
+ if (IS_ENCRYPTED(parent) && fname->cf_name.name &&
+ (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
+ fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de)))
+ return false;
+ /*
+ * Treat comparison errors as not a match. The
+ * only case where it happens is on a disk
+ * corruption or ENOMEM.
+ */
+
+ return generic_ci_match(parent, fname->usr_fname,
+ &fname->cf_name, de->name,
+ de->name_len) > 0;
}
#endif
@@ -1869,8 +1825,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
}
}
-#if IS_ENABLED(CONFIG_UNICODE)
- if (!inode && IS_CASEFOLDED(dir)) {
+ if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well. For now, prevent the negative dentry
@@ -1878,7 +1833,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
*/
return NULL;
}
-#endif
+
return d_splice_alias(inode, dentry);
}
@@ -3208,16 +3163,14 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
ext4_fc_track_unlink(handle, dentry);
retval = ext4_mark_inode_dirty(handle, dir);
-#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at ext4_lookup(), when it is better
* supported by the VFS for the CI case.
*/
- if (IS_CASEFOLDED(dir))
+ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
d_invalidate(dentry);
-#endif
end_rmdir:
brelse(bh);
@@ -3319,16 +3272,15 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
goto out_trace;
retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry);
-#if IS_ENABLED(CONFIG_UNICODE)
+
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at ext4_lookup(), when it is better
* supported by the VFS for the CI case.
*/
- if (IS_CASEFOLDED(dir))
+ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
d_invalidate(dentry);
-#endif
out_trace:
trace_ext4_unlink_exit(dentry, retval);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c682fb927b64..eb899628e121 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1721,8 +1721,8 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
fsparam_flag ("bsdgroups", Opt_grpid),
fsparam_flag ("nogrpid", Opt_nogrpid),
fsparam_flag ("sysvgroups", Opt_nogrpid),
- fsparam_u32 ("resgid", Opt_resgid),
- fsparam_u32 ("resuid", Opt_resuid),
+ fsparam_gid ("resgid", Opt_resgid),
+ fsparam_uid ("resuid", Opt_resuid),
fsparam_u32 ("sb", Opt_sb),
fsparam_enum ("errors", Opt_errors, ext4_param_errors),
fsparam_flag ("nouid32", Opt_nouid32),
@@ -2127,8 +2127,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct fs_parse_result result;
const struct mount_opts *m;
int is_remount;
- kuid_t uid;
- kgid_t gid;
int token;
token = fs_parse(fc, ext4_param_specs, param, &result);
@@ -2270,23 +2268,11 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->spec |= EXT4_SPEC_s_stripe;
return 0;
case Opt_resuid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid)) {
- ext4_msg(NULL, KERN_ERR, "Invalid uid value %d",
- result.uint_32);
- return -EINVAL;
- }
- ctx->s_resuid = uid;
+ ctx->s_resuid = result.uid;
ctx->spec |= EXT4_SPEC_s_resuid;
return 0;
case Opt_resgid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid)) {
- ext4_msg(NULL, KERN_ERR, "Invalid gid value %d",
- result.uint_32);
- return -EINVAL;
- }
- ctx->s_resgid = gid;
+ ctx->s_resgid = result.gid;
ctx->spec |= EXT4_SPEC_s_resgid;
return 0;
case Opt_journal_dev:
@@ -3586,14 +3572,12 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
-#if !IS_ENABLED(CONFIG_UNICODE)
- if (ext4_has_feature_casefold(sb)) {
+ if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) {
ext4_msg(sb, KERN_ERR,
"Filesystem with casefold feature cannot be "
"mounted without CONFIG_UNICODE");
return 0;
}
-#endif
if (readonly)
return 1;
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index ec2aeccb69a3..8bffdeccdbc3 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -219,8 +219,7 @@ static int f2fs_acl_update_mode(struct mnt_idmap *idmap,
return error;
if (error == 0)
*acl = NULL;
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
- !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+ if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 02c9355176d3..cbd7a5e96a37 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -42,35 +42,49 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}
+#if IS_ENABLED(CONFIG_UNICODE)
/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
{
-#if IS_ENABLED(CONFIG_UNICODE)
struct super_block *sb = dir->i_sb;
+ unsigned char *buf;
+ int len;
if (IS_CASEFOLDED(dir) &&
!is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) {
- fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
- GFP_NOFS, false, F2FS_SB(sb));
- if (!fname->cf_name.name)
+ buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
+ GFP_NOFS, false, F2FS_SB(sb));
+ if (!buf)
return -ENOMEM;
- fname->cf_name.len = utf8_casefold(sb->s_encoding,
- fname->usr_fname,
- fname->cf_name.name,
- F2FS_NAME_LEN);
- if ((int)fname->cf_name.len <= 0) {
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
+
+ len = utf8_casefold(sb->s_encoding, fname->usr_fname,
+ buf, F2FS_NAME_LEN);
+ if (len <= 0) {
+ kmem_cache_free(f2fs_cf_name_slab, buf);
if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
+ return 0;
}
+ fname->cf_name.name = buf;
+ fname->cf_name.len = len;
}
-#endif
+
return 0;
}
+void f2fs_free_casefolded_name(struct f2fs_filename *fname)
+{
+ unsigned char *buf = (unsigned char *)fname->cf_name.name;
+
+ if (buf) {
+ kmem_cache_free(f2fs_cf_name_slab, buf);
+ fname->cf_name.name = NULL;
+ }
+}
+#endif /* CONFIG_UNICODE */
+
static int __f2fs_setup_filename(const struct inode *dir,
const struct fscrypt_name *crypt_name,
struct f2fs_filename *fname)
@@ -142,12 +156,7 @@ void f2fs_free_filename(struct f2fs_filename *fname)
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
#endif
-#if IS_ENABLED(CONFIG_UNICODE)
- if (fname->cf_name.name) {
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
- }
-#endif
+ f2fs_free_casefolded_name(fname);
}
static unsigned long dir_block_index(unsigned int level,
@@ -176,58 +185,6 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
return f2fs_find_target_dentry(&d, fname, max_slots);
}
-#if IS_ENABLED(CONFIG_UNICODE)
-/*
- * Test whether a case-insensitive directory entry matches the filename
- * being searched for.
- *
- * Returns 1 for a match, 0 for no match, and -errno on an error.
- */
-static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
- const u8 *de_name, u32 de_name_len)
-{
- const struct super_block *sb = dir->i_sb;
- const struct unicode_map *um = sb->s_encoding;
- struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
- struct qstr entry = QSTR_INIT(de_name, de_name_len);
- int res;
-
- if (IS_ENCRYPTED(dir)) {
- const struct fscrypt_str encrypted_name =
- FSTR_INIT((u8 *)de_name, de_name_len);
-
- if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
- return -EINVAL;
-
- decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
- if (!decrypted_name.name)
- return -ENOMEM;
- res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name,
- &decrypted_name);
- if (res < 0)
- goto out;
- entry.name = decrypted_name.name;
- entry.len = decrypted_name.len;
- }
-
- res = utf8_strncasecmp_folded(um, name, &entry);
- /*
- * In strict mode, ignore invalid names. In non-strict mode,
- * fall back to treating them as opaque byte sequences.
- */
- if (res < 0 && !sb_has_strict_encoding(sb)) {
- res = name->len == entry.len &&
- memcmp(name->name, entry.name, name->len) == 0;
- } else {
- /* utf8_strncasecmp_folded returns 0 on match */
- res = (res == 0);
- }
-out:
- kfree(decrypted_name.name);
- return res;
-}
-#endif /* CONFIG_UNICODE */
-
static inline int f2fs_match_name(const struct inode *dir,
const struct f2fs_filename *fname,
const u8 *de_name, u32 de_name_len)
@@ -235,11 +192,11 @@ static inline int f2fs_match_name(const struct inode *dir,
struct fscrypt_name f;
#if IS_ENABLED(CONFIG_UNICODE)
- if (fname->cf_name.name) {
- struct qstr cf = FSTR_TO_QSTR(&fname->cf_name);
+ if (fname->cf_name.name)
+ return generic_ci_match(dir, fname->usr_fname,
+ &fname->cf_name,
+ de_name, de_name_len);
- return f2fs_match_ci_name(dir, &cf, de_name, de_name_len);
- }
#endif
f.usr_fname = fname->usr_fname;
f.disk_name = fname->disk_name;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1974b6aff397..8a9d910aa552 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -531,7 +531,7 @@ struct f2fs_filename {
* internal operation where usr_fname is also NULL. In all these cases
* we fall back to treating the name as an opaque byte sequence.
*/
- struct fscrypt_str cf_name;
+ struct qstr cf_name;
#endif
};
@@ -3533,8 +3533,22 @@ int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
/*
* dir.c
*/
+#if IS_ENABLED(CONFIG_UNICODE)
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname);
+void f2fs_free_casefolded_name(struct f2fs_filename *fname);
+#else
+static inline int f2fs_init_casefolded_name(const struct inode *dir,
+ struct f2fs_filename *fname)
+{
+ return 0;
+}
+
+static inline void f2fs_free_casefolded_name(struct f2fs_filename *fname)
+{
+}
+#endif /* CONFIG_UNICODE */
+
int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct f2fs_filename *fname);
int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5c0b281a70f3..c1ad9b278c47 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -185,7 +185,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
if (!dentry)
return 0;
- *pino = parent_ino(dentry);
+ *pino = d_parent_ino(dentry);
dput(dentry);
return 1;
}
@@ -923,10 +923,8 @@ static void __setattr_copy(struct mnt_idmap *idmap,
inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
- if (!vfsgid_in_group_p(vfsgid) &&
- !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+ if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
set_acl_inode(inode, mode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e54f8c08bda8..1ecde2b45e99 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -576,8 +576,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
goto out_iput;
}
out_splice:
-#if IS_ENABLED(CONFIG_UNICODE)
- if (!inode && IS_CASEFOLDED(dir)) {
+ if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well. For now, prevent the negative dentry
@@ -586,7 +585,7 @@ out_splice:
trace_f2fs_lookup_end(dir, dentry, ino, err);
return NULL;
}
-#endif
+
new = d_splice_alias(inode, dentry);
trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry,
ino, IS_ERR(new) ? PTR_ERR(new) : err);
@@ -639,16 +638,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
f2fs_delete_entry(de, page, dir, inode);
f2fs_unlock_op(sbi);
-#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at f2fs_lookup(), when it is better
* supported by the VFS for the CI case.
*/
- if (IS_CASEFOLDED(dir))
+ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
d_invalidate(dentry);
-#endif
+
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
fail:
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 496aee53c38a..8712e264071f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -46,10 +46,6 @@
static struct kmem_cache *fsync_entry_slab;
-#if IS_ENABLED(CONFIG_UNICODE)
-extern struct kmem_cache *f2fs_cf_name_slab;
-#endif
-
bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
{
s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
@@ -153,11 +149,8 @@ static int init_recovered_filename(const struct inode *dir,
if (err)
return err;
f2fs_hash_filename(dir, fname);
-#if IS_ENABLED(CONFIG_UNICODE)
/* Case-sensitive match is fine for recovery */
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
-#endif
+ f2fs_free_casefolded_name(fname);
} else {
f2fs_hash_filename(dir, fname);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1f1b3647a998..df4cf31f93df 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -321,7 +321,7 @@ struct kmem_cache *f2fs_cf_name_slab;
static int __init f2fs_create_casefold_cache(void)
{
f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name",
- F2FS_NAME_LEN);
+ F2FS_NAME_LEN);
return f2fs_cf_name_slab ? 0 : -ENOMEM;
}
@@ -1326,13 +1326,13 @@ default_check:
return -EINVAL;
}
#endif
-#if !IS_ENABLED(CONFIG_UNICODE)
- if (f2fs_sb_has_casefold(sbi)) {
+
+ if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
"Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
return -EINVAL;
}
-#endif
+
/*
* The BLKZONED feature indicates that the drive was formatted with
* zone alignment optimization. This is optional for host-aware
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 66cf4778cf3b..d3e426de5f01 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -7,6 +7,8 @@
#include <linux/hash.h>
#include <linux/ratelimit.h>
#include <linux/msdos_fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
/*
* vfat shortname flags
@@ -51,7 +53,8 @@ struct fat_mount_options {
tz_set:1, /* Filesystem timestamps' offset set */
rodir:1, /* allow ATTR_RO for directory */
discard:1, /* Issue discard requests on deletions */
- dos1xfloppy:1; /* Assume default BPB for DOS 1.x floppies */
+ dos1xfloppy:1, /* Assume default BPB for DOS 1.x floppies */
+ debug:1; /* Not currently used */
};
#define FAT_HASH_BITS 8
@@ -415,12 +418,21 @@ extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos);
extern struct inode *fat_build_inode(struct super_block *sb,
struct msdos_dir_entry *de, loff_t i_pos);
extern int fat_sync_inode(struct inode *inode);
-extern int fat_fill_super(struct super_block *sb, void *data, int silent,
- int isvfat, void (*setup)(struct super_block *));
+extern int fat_fill_super(struct super_block *sb, struct fs_context *fc,
+ void (*setup)(struct super_block *));
extern int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de);
extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
+
+extern const struct fs_parameter_spec fat_param_spec[];
+int fat_init_fs_context(struct fs_context *fc, bool is_vfat);
+void fat_free_fc(struct fs_context *fc);
+
+int fat_parse_param(struct fs_context *fc, struct fs_parameter *param,
+ bool is_vfat);
+int fat_reconfigure(struct fs_context *fc);
+
static inline unsigned long fat_dir_hash(int logstart)
{
return hash_32(logstart, FAT_HASH_BITS);
diff --git a/fs/fat/fat_test.c b/fs/fat/fat_test.c
index 2dab4ca1d0d8..1f0062659067 100644
--- a/fs/fat/fat_test.c
+++ b/fs/fat/fat_test.c
@@ -193,4 +193,5 @@ static struct kunit_suite fat_test_suite = {
kunit_test_suites(&fat_test_suite);
+MODULE_DESCRIPTION("KUnit tests for FAT filesystems");
MODULE_LICENSE("GPL v2");
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d9e6fbb6f246..19115fd2d2a4 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -16,7 +16,6 @@
#include <linux/mpage.h>
#include <linux/vfs.h>
#include <linux/seq_file.h>
-#include <linux/parser.h>
#include <linux/uio.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
@@ -804,16 +803,17 @@ static void __exit fat_destroy_inodecache(void)
kmem_cache_destroy(fat_inode_cachep);
}
-static int fat_remount(struct super_block *sb, int *flags, char *data)
+int fat_reconfigure(struct fs_context *fc)
{
bool new_rdonly;
+ struct super_block *sb = fc->root->d_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);
+ fc->sb_flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);
sync_filesystem(sb);
/* make sure we update state on remount. */
- new_rdonly = *flags & SB_RDONLY;
+ new_rdonly = fc->sb_flags & SB_RDONLY;
if (new_rdonly != sb_rdonly(sb)) {
if (new_rdonly)
fat_set_state(sb, 0, 0);
@@ -822,6 +822,7 @@ static int fat_remount(struct super_block *sb, int *flags, char *data)
}
return 0;
}
+EXPORT_SYMBOL_GPL(fat_reconfigure);
static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
{
@@ -939,8 +940,6 @@ static const struct super_operations fat_sops = {
.evict_inode = fat_evict_inode,
.put_super = fat_put_super,
.statfs = fat_statfs,
- .remount_fs = fat_remount,
-
.show_options = fat_show_options,
};
@@ -1037,355 +1036,282 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
}
enum {
- Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
- Opt_umask, Opt_dmask, Opt_fmask, Opt_allow_utime, Opt_codepage,
- Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug,
- Opt_immutable, Opt_dots, Opt_nodots,
- Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
- Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
- Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
- Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
- Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset,
- Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err, Opt_dos1xfloppy,
+ Opt_check, Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask,
+ Opt_allow_utime, Opt_codepage, Opt_usefree, Opt_nocase, Opt_quiet,
+ Opt_showexec, Opt_debug, Opt_immutable, Opt_dots, Opt_dotsOK,
+ Opt_charset, Opt_shortname, Opt_utf8, Opt_utf8_bool,
+ Opt_uni_xl, Opt_uni_xl_bool, Opt_nonumtail, Opt_nonumtail_bool,
+ Opt_obsolete, Opt_flush, Opt_tz, Opt_rodir, Opt_errors, Opt_discard,
+ Opt_nfs, Opt_nfs_enum, Opt_time_offset, Opt_dos1xfloppy,
};
-static const match_table_t fat_tokens = {
- {Opt_check_r, "check=relaxed"},
- {Opt_check_s, "check=strict"},
- {Opt_check_n, "check=normal"},
- {Opt_check_r, "check=r"},
- {Opt_check_s, "check=s"},
- {Opt_check_n, "check=n"},
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_umask, "umask=%o"},
- {Opt_dmask, "dmask=%o"},
- {Opt_fmask, "fmask=%o"},
- {Opt_allow_utime, "allow_utime=%o"},
- {Opt_codepage, "codepage=%u"},
- {Opt_usefree, "usefree"},
- {Opt_nocase, "nocase"},
- {Opt_quiet, "quiet"},
- {Opt_showexec, "showexec"},
- {Opt_debug, "debug"},
- {Opt_immutable, "sys_immutable"},
- {Opt_flush, "flush"},
- {Opt_tz_utc, "tz=UTC"},
- {Opt_time_offset, "time_offset=%d"},
- {Opt_err_cont, "errors=continue"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_discard, "discard"},
- {Opt_nfs_stale_rw, "nfs"},
- {Opt_nfs_stale_rw, "nfs=stale_rw"},
- {Opt_nfs_nostale_ro, "nfs=nostale_ro"},
- {Opt_dos1xfloppy, "dos1xfloppy"},
- {Opt_obsolete, "conv=binary"},
- {Opt_obsolete, "conv=text"},
- {Opt_obsolete, "conv=auto"},
- {Opt_obsolete, "conv=b"},
- {Opt_obsolete, "conv=t"},
- {Opt_obsolete, "conv=a"},
- {Opt_obsolete, "fat=%u"},
- {Opt_obsolete, "blocksize=%u"},
- {Opt_obsolete, "cvf_format=%20s"},
- {Opt_obsolete, "cvf_options=%100s"},
- {Opt_obsolete, "posix"},
- {Opt_err, NULL},
-};
-static const match_table_t msdos_tokens = {
- {Opt_nodots, "nodots"},
- {Opt_nodots, "dotsOK=no"},
- {Opt_dots, "dots"},
- {Opt_dots, "dotsOK=yes"},
- {Opt_err, NULL}
+static const struct constant_table fat_param_check[] = {
+ {"relaxed", 'r'},
+ {"r", 'r'},
+ {"strict", 's'},
+ {"s", 's'},
+ {"normal", 'n'},
+ {"n", 'n'},
+ {}
};
-static const match_table_t vfat_tokens = {
- {Opt_charset, "iocharset=%s"},
- {Opt_shortname_lower, "shortname=lower"},
- {Opt_shortname_win95, "shortname=win95"},
- {Opt_shortname_winnt, "shortname=winnt"},
- {Opt_shortname_mixed, "shortname=mixed"},
- {Opt_utf8_no, "utf8=0"}, /* 0 or no or false */
- {Opt_utf8_no, "utf8=no"},
- {Opt_utf8_no, "utf8=false"},
- {Opt_utf8_yes, "utf8=1"}, /* empty or 1 or yes or true */
- {Opt_utf8_yes, "utf8=yes"},
- {Opt_utf8_yes, "utf8=true"},
- {Opt_utf8_yes, "utf8"},
- {Opt_uni_xl_no, "uni_xlate=0"}, /* 0 or no or false */
- {Opt_uni_xl_no, "uni_xlate=no"},
- {Opt_uni_xl_no, "uni_xlate=false"},
- {Opt_uni_xl_yes, "uni_xlate=1"}, /* empty or 1 or yes or true */
- {Opt_uni_xl_yes, "uni_xlate=yes"},
- {Opt_uni_xl_yes, "uni_xlate=true"},
- {Opt_uni_xl_yes, "uni_xlate"},
- {Opt_nonumtail_no, "nonumtail=0"}, /* 0 or no or false */
- {Opt_nonumtail_no, "nonumtail=no"},
- {Opt_nonumtail_no, "nonumtail=false"},
- {Opt_nonumtail_yes, "nonumtail=1"}, /* empty or 1 or yes or true */
- {Opt_nonumtail_yes, "nonumtail=yes"},
- {Opt_nonumtail_yes, "nonumtail=true"},
- {Opt_nonumtail_yes, "nonumtail"},
- {Opt_rodir, "rodir"},
- {Opt_err, NULL}
+
+static const struct constant_table fat_param_tz[] = {
+ {"UTC", 0},
+ {}
};
-static int parse_options(struct super_block *sb, char *options, int is_vfat,
- int silent, int *debug, struct fat_mount_options *opts)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int option;
- char *iocharset;
+static const struct constant_table fat_param_errors[] = {
+ {"continue", FAT_ERRORS_CONT},
+ {"panic", FAT_ERRORS_PANIC},
+ {"remount-ro", FAT_ERRORS_RO},
+ {}
+};
- opts->isvfat = is_vfat;
- opts->fs_uid = current_uid();
- opts->fs_gid = current_gid();
- opts->fs_fmask = opts->fs_dmask = current_umask();
- opts->allow_utime = -1;
- opts->codepage = fat_default_codepage;
- fat_reset_iocharset(opts);
- if (is_vfat) {
- opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
- opts->rodir = 0;
- } else {
- opts->shortname = 0;
- opts->rodir = 1;
- }
- opts->name_check = 'n';
- opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
- opts->unicode_xlate = 0;
- opts->numtail = 1;
- opts->usefree = opts->nocase = 0;
- opts->tz_set = 0;
- opts->nfs = 0;
- opts->errors = FAT_ERRORS_RO;
- *debug = 0;
+static const struct constant_table fat_param_nfs[] = {
+ {"stale_rw", FAT_NFS_STALE_RW},
+ {"nostale_ro", FAT_NFS_NOSTALE_RO},
+ {}
+};
- opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
+/*
+ * These are all obsolete but we still reject invalid options.
+ * The corresponding values are therefore meaningless.
+ */
+static const struct constant_table fat_param_conv[] = {
+ {"binary", 0},
+ {"text", 0},
+ {"auto", 0},
+ {"b", 0},
+ {"t", 0},
+ {"a", 0},
+ {}
+};
- if (!options)
- goto out;
+/* Core options. See below for vfat and msdos extras */
+const struct fs_parameter_spec fat_param_spec[] = {
+ fsparam_enum ("check", Opt_check, fat_param_check),
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
+ fsparam_u32oct ("umask", Opt_umask),
+ fsparam_u32oct ("dmask", Opt_dmask),
+ fsparam_u32oct ("fmask", Opt_fmask),
+ fsparam_u32oct ("allow_utime", Opt_allow_utime),
+ fsparam_u32 ("codepage", Opt_codepage),
+ fsparam_flag ("usefree", Opt_usefree),
+ fsparam_flag ("nocase", Opt_nocase),
+ fsparam_flag ("quiet", Opt_quiet),
+ fsparam_flag ("showexec", Opt_showexec),
+ fsparam_flag ("debug", Opt_debug),
+ fsparam_flag ("sys_immutable", Opt_immutable),
+ fsparam_flag ("flush", Opt_flush),
+ fsparam_enum ("tz", Opt_tz, fat_param_tz),
+ fsparam_s32 ("time_offset", Opt_time_offset),
+ fsparam_enum ("errors", Opt_errors, fat_param_errors),
+ fsparam_flag ("discard", Opt_discard),
+ fsparam_flag ("nfs", Opt_nfs),
+ fsparam_enum ("nfs", Opt_nfs_enum, fat_param_nfs),
+ fsparam_flag ("dos1xfloppy", Opt_dos1xfloppy),
+ __fsparam(fs_param_is_enum, "conv",
+ Opt_obsolete, fs_param_deprecated, fat_param_conv),
+ __fsparam(fs_param_is_u32, "fat",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ __fsparam(fs_param_is_u32, "blocksize",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ __fsparam(fs_param_is_string, "cvf_format",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ __fsparam(fs_param_is_string, "cvf_options",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ __fsparam(NULL, "posix",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ {}
+};
+EXPORT_SYMBOL_GPL(fat_param_spec);
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
+static const struct fs_parameter_spec msdos_param_spec[] = {
+ fsparam_flag_no ("dots", Opt_dots),
+ fsparam_bool ("dotsOK", Opt_dotsOK),
+ {}
+};
- token = match_token(p, fat_tokens, args);
- if (token == Opt_err) {
- if (is_vfat)
- token = match_token(p, vfat_tokens, args);
- else
- token = match_token(p, msdos_tokens, args);
- }
- switch (token) {
- case Opt_check_s:
- opts->name_check = 's';
- break;
- case Opt_check_r:
- opts->name_check = 'r';
- break;
- case Opt_check_n:
- opts->name_check = 'n';
- break;
- case Opt_usefree:
- opts->usefree = 1;
- break;
- case Opt_nocase:
- if (!is_vfat)
- opts->nocase = 1;
- else {
- /* for backward compatibility */
- opts->shortname = VFAT_SFN_DISPLAY_WIN95
- | VFAT_SFN_CREATE_WIN95;
- }
- break;
- case Opt_quiet:
- opts->quiet = 1;
- break;
- case Opt_showexec:
- opts->showexec = 1;
- break;
- case Opt_debug:
- *debug = 1;
- break;
- case Opt_immutable:
- opts->sys_immutable = 1;
- break;
- case Opt_uid:
- if (match_int(&args[0], &option))
- return -EINVAL;
- opts->fs_uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(opts->fs_uid))
- return -EINVAL;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return -EINVAL;
- opts->fs_gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(opts->fs_gid))
- return -EINVAL;
- break;
- case Opt_umask:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->fs_fmask = opts->fs_dmask = option;
- break;
- case Opt_dmask:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->fs_dmask = option;
- break;
- case Opt_fmask:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->fs_fmask = option;
- break;
- case Opt_allow_utime:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->allow_utime = option & (S_IWGRP | S_IWOTH);
- break;
- case Opt_codepage:
- if (match_int(&args[0], &option))
- return -EINVAL;
- opts->codepage = option;
- break;
- case Opt_flush:
- opts->flush = 1;
- break;
- case Opt_time_offset:
- if (match_int(&args[0], &option))
- return -EINVAL;
- /*
- * GMT+-12 zones may have DST corrections so at least
- * 13 hours difference is needed. Make the limit 24
- * just in case someone invents something unusual.
- */
- if (option < -24 * 60 || option > 24 * 60)
- return -EINVAL;
- opts->tz_set = 1;
- opts->time_offset = option;
- break;
- case Opt_tz_utc:
- opts->tz_set = 1;
- opts->time_offset = 0;
- break;
- case Opt_err_cont:
- opts->errors = FAT_ERRORS_CONT;
- break;
- case Opt_err_panic:
- opts->errors = FAT_ERRORS_PANIC;
- break;
- case Opt_err_ro:
- opts->errors = FAT_ERRORS_RO;
- break;
- case Opt_nfs_stale_rw:
- opts->nfs = FAT_NFS_STALE_RW;
- break;
- case Opt_nfs_nostale_ro:
- opts->nfs = FAT_NFS_NOSTALE_RO;
- break;
- case Opt_dos1xfloppy:
- opts->dos1xfloppy = 1;
- break;
+static const struct constant_table fat_param_shortname[] = {
+ {"lower", VFAT_SFN_DISPLAY_LOWER | VFAT_SFN_CREATE_WIN95},
+ {"win95", VFAT_SFN_DISPLAY_WIN95 | VFAT_SFN_CREATE_WIN95},
+ {"winnt", VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WINNT},
+ {"mixed", VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WIN95},
+ {}
+};
- /* msdos specific */
- case Opt_dots:
- opts->dotsOK = 1;
- break;
- case Opt_nodots:
- opts->dotsOK = 0;
- break;
+static const struct fs_parameter_spec vfat_param_spec[] = {
+ fsparam_string ("iocharset", Opt_charset),
+ fsparam_enum ("shortname", Opt_shortname, fat_param_shortname),
+ fsparam_flag ("utf8", Opt_utf8),
+ fsparam_bool ("utf8", Opt_utf8_bool),
+ fsparam_flag ("uni_xlate", Opt_uni_xl),
+ fsparam_bool ("uni_xlate", Opt_uni_xl_bool),
+ fsparam_flag ("nonumtail", Opt_nonumtail),
+ fsparam_bool ("nonumtail", Opt_nonumtail_bool),
+ fsparam_flag ("rodir", Opt_rodir),
+ {}
+};
- /* vfat specific */
- case Opt_charset:
- fat_reset_iocharset(opts);
- iocharset = match_strdup(&args[0]);
- if (!iocharset)
- return -ENOMEM;
- opts->iocharset = iocharset;
- break;
- case Opt_shortname_lower:
- opts->shortname = VFAT_SFN_DISPLAY_LOWER
- | VFAT_SFN_CREATE_WIN95;
- break;
- case Opt_shortname_win95:
- opts->shortname = VFAT_SFN_DISPLAY_WIN95
- | VFAT_SFN_CREATE_WIN95;
- break;
- case Opt_shortname_winnt:
- opts->shortname = VFAT_SFN_DISPLAY_WINNT
- | VFAT_SFN_CREATE_WINNT;
- break;
- case Opt_shortname_mixed:
- opts->shortname = VFAT_SFN_DISPLAY_WINNT
- | VFAT_SFN_CREATE_WIN95;
- break;
- case Opt_utf8_no: /* 0 or no or false */
- opts->utf8 = 0;
- break;
- case Opt_utf8_yes: /* empty or 1 or yes or true */
- opts->utf8 = 1;
- break;
- case Opt_uni_xl_no: /* 0 or no or false */
- opts->unicode_xlate = 0;
- break;
- case Opt_uni_xl_yes: /* empty or 1 or yes or true */
- opts->unicode_xlate = 1;
- break;
- case Opt_nonumtail_no: /* 0 or no or false */
- opts->numtail = 1; /* negated option */
- break;
- case Opt_nonumtail_yes: /* empty or 1 or yes or true */
- opts->numtail = 0; /* negated option */
- break;
- case Opt_rodir:
- opts->rodir = 1;
- break;
- case Opt_discard:
- opts->discard = 1;
- break;
+int fat_parse_param(struct fs_context *fc, struct fs_parameter *param,
+ bool is_vfat)
+{
+ struct fat_mount_options *opts = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
- /* obsolete mount options */
- case Opt_obsolete:
- fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
- "not supported now", p);
- break;
- /* unknown option */
- default:
- if (!silent) {
- fat_msg(sb, KERN_ERR,
- "Unrecognized mount option \"%s\" "
- "or missing value", p);
- }
- return -EINVAL;
- }
- }
+ /* remount options have traditionally been ignored */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)
+ return 0;
-out:
- /* UTF-8 doesn't provide FAT semantics */
- if (!strcmp(opts->iocharset, "utf8")) {
- fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
- " for FAT filesystems, filesystem will be "
- "case sensitive!");
+ opt = fs_parse(fc, fat_param_spec, param, &result);
+ /* If option not found in fat_param_spec, try vfat/msdos options */
+ if (opt == -ENOPARAM) {
+ if (is_vfat)
+ opt = fs_parse(fc, vfat_param_spec, param, &result);
+ else
+ opt = fs_parse(fc, msdos_param_spec, param, &result);
}
- /* If user doesn't specify allow_utime, it's initialized from dmask. */
- if (opts->allow_utime == (unsigned short)-1)
- opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
- if (opts->unicode_xlate)
- opts->utf8 = 0;
- if (opts->nfs == FAT_NFS_NOSTALE_RO) {
- sb->s_flags |= SB_RDONLY;
- sb->s_export_op = &fat_export_ops_nostale;
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_check:
+ opts->name_check = result.uint_32;
+ break;
+ case Opt_usefree:
+ opts->usefree = 1;
+ break;
+ case Opt_nocase:
+ if (!is_vfat)
+ opts->nocase = 1;
+ else {
+ /* for backward compatibility */
+ opts->shortname = VFAT_SFN_DISPLAY_WIN95
+ | VFAT_SFN_CREATE_WIN95;
+ }
+ break;
+ case Opt_quiet:
+ opts->quiet = 1;
+ break;
+ case Opt_showexec:
+ opts->showexec = 1;
+ break;
+ case Opt_debug:
+ opts->debug = 1;
+ break;
+ case Opt_immutable:
+ opts->sys_immutable = 1;
+ break;
+ case Opt_uid:
+ opts->fs_uid = result.uid;
+ break;
+ case Opt_gid:
+ opts->fs_gid = result.gid;
+ break;
+ case Opt_umask:
+ opts->fs_fmask = opts->fs_dmask = result.uint_32;
+ break;
+ case Opt_dmask:
+ opts->fs_dmask = result.uint_32;
+ break;
+ case Opt_fmask:
+ opts->fs_fmask = result.uint_32;
+ break;
+ case Opt_allow_utime:
+ opts->allow_utime = result.uint_32 & (S_IWGRP | S_IWOTH);
+ break;
+ case Opt_codepage:
+ opts->codepage = result.uint_32;
+ break;
+ case Opt_flush:
+ opts->flush = 1;
+ break;
+ case Opt_time_offset:
+ /*
+ * GMT+-12 zones may have DST corrections so at least
+ * 13 hours difference is needed. Make the limit 24
+ * just in case someone invents something unusual.
+ */
+ if (result.int_32 < -24 * 60 || result.int_32 > 24 * 60)
+ return -EINVAL;
+ opts->tz_set = 1;
+ opts->time_offset = result.int_32;
+ break;
+ case Opt_tz:
+ opts->tz_set = 1;
+ opts->time_offset = result.uint_32;
+ break;
+ case Opt_errors:
+ opts->errors = result.uint_32;
+ break;
+ case Opt_nfs:
+ opts->nfs = FAT_NFS_STALE_RW;
+ break;
+ case Opt_nfs_enum:
+ opts->nfs = result.uint_32;
+ break;
+ case Opt_dos1xfloppy:
+ opts->dos1xfloppy = 1;
+ break;
+
+ /* msdos specific */
+ case Opt_dots: /* dots / nodots */
+ opts->dotsOK = !result.negated;
+ break;
+ case Opt_dotsOK: /* dotsOK = yes/no */
+ opts->dotsOK = result.boolean;
+ break;
+
+ /* vfat specific */
+ case Opt_charset:
+ fat_reset_iocharset(opts);
+ opts->iocharset = param->string;
+ param->string = NULL; /* Steal string */
+ break;
+ case Opt_shortname:
+ opts->shortname = result.uint_32;
+ break;
+ case Opt_utf8:
+ opts->utf8 = 1;
+ break;
+ case Opt_utf8_bool:
+ opts->utf8 = result.boolean;
+ break;
+ case Opt_uni_xl:
+ opts->unicode_xlate = 1;
+ break;
+ case Opt_uni_xl_bool:
+ opts->unicode_xlate = result.boolean;
+ break;
+ case Opt_nonumtail:
+ opts->numtail = 0; /* negated option */
+ break;
+ case Opt_nonumtail_bool:
+ opts->numtail = !result.boolean; /* negated option */
+ break;
+ case Opt_rodir:
+ opts->rodir = 1;
+ break;
+ case Opt_discard:
+ opts->discard = 1;
+ break;
+
+ /* obsolete mount options */
+ case Opt_obsolete:
+ printk(KERN_INFO "FAT-fs: \"%s\" option is obsolete, "
+ "not supported now", param->key);
+ break;
+ default:
+ return -EINVAL;
}
return 0;
}
+EXPORT_SYMBOL_GPL(fat_parse_param);
static int fat_read_root(struct inode *inode)
{
@@ -1604,9 +1530,11 @@ out:
/*
* Read the super block of an MS-DOS FS.
*/
-int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
+int fat_fill_super(struct super_block *sb, struct fs_context *fc,
void (*setup)(struct super_block *))
{
+ struct fat_mount_options *opts = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
struct inode *root_inode = NULL, *fat_inode = NULL;
struct inode *fsinfo_inode = NULL;
struct buffer_head *bh;
@@ -1614,7 +1542,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
struct msdos_sb_info *sbi;
u16 logical_sector_size;
u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors;
- int debug;
long error;
char buf[50];
struct timespec64 ts;
@@ -1643,9 +1570,27 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
- if (error)
- goto out_fail;
+ /* UTF-8 doesn't provide FAT semantics */
+ if (!strcmp(opts->iocharset, "utf8")) {
+ fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
+ " for FAT filesystems, filesystem will be"
+ " case sensitive!");
+ }
+
+ /* If user doesn't specify allow_utime, it's initialized from dmask. */
+ if (opts->allow_utime == (unsigned short)-1)
+ opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
+ if (opts->unicode_xlate)
+ opts->utf8 = 0;
+ if (opts->nfs == FAT_NFS_NOSTALE_RO) {
+ sb->s_flags |= SB_RDONLY;
+ sb->s_export_op = &fat_export_ops_nostale;
+ }
+
+ /* Apply parsed options to sbi (structure copy) */
+ sbi->options = *opts;
+ /* Transfer ownership of iocharset to sbi->options */
+ opts->iocharset = NULL;
setup(sb); /* flavour-specific stuff that needs options */
@@ -1950,6 +1895,57 @@ int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
}
EXPORT_SYMBOL_GPL(fat_flush_inodes);
+int fat_init_fs_context(struct fs_context *fc, bool is_vfat)
+{
+ struct fat_mount_options *opts;
+
+ opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+
+ opts->isvfat = is_vfat;
+ opts->fs_uid = current_uid();
+ opts->fs_gid = current_gid();
+ opts->fs_fmask = opts->fs_dmask = current_umask();
+ opts->allow_utime = -1;
+ opts->codepage = fat_default_codepage;
+ fat_reset_iocharset(opts);
+ if (is_vfat) {
+ opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
+ opts->rodir = 0;
+ } else {
+ opts->shortname = 0;
+ opts->rodir = 1;
+ }
+ opts->name_check = 'n';
+ opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
+ opts->unicode_xlate = 0;
+ opts->numtail = 1;
+ opts->usefree = opts->nocase = 0;
+ opts->tz_set = 0;
+ opts->nfs = 0;
+ opts->errors = FAT_ERRORS_RO;
+ opts->debug = 0;
+
+ opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
+
+ fc->fs_private = opts;
+ /* fc->ops assigned by caller */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fat_init_fs_context);
+
+void fat_free_fc(struct fs_context *fc)
+{
+ struct fat_mount_options *opts = fc->fs_private;
+
+ if (opts->iocharset != fat_default_iocharset)
+ kfree(opts->iocharset);
+ kfree(fc->fs_private);
+}
+EXPORT_SYMBOL_GPL(fat_free_fc);
+
static int __init init_fat_fs(void)
{
int err;
@@ -1978,4 +1974,5 @@ static void __exit exit_fat_fs(void)
module_init(init_fat_fs)
module_exit(exit_fat_fs)
+MODULE_DESCRIPTION("Core FAT filesystem support");
MODULE_LICENSE("GPL");
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 2116c486843b..f06f6ba643cc 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -650,24 +650,48 @@ static void setup(struct super_block *sb)
sb->s_flags |= SB_NOATIME;
}
-static int msdos_fill_super(struct super_block *sb, void *data, int silent)
+static int msdos_fill_super(struct super_block *sb, struct fs_context *fc)
{
- return fat_fill_super(sb, data, silent, 0, setup);
+ return fat_fill_super(sb, fc, setup);
}
-static struct dentry *msdos_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+static int msdos_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
+ return get_tree_bdev(fc, msdos_fill_super);
+}
+
+static int msdos_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ return fat_parse_param(fc, param, false);
+}
+
+static const struct fs_context_operations msdos_context_ops = {
+ .parse_param = msdos_parse_param,
+ .get_tree = msdos_get_tree,
+ .reconfigure = fat_reconfigure,
+ .free = fat_free_fc,
+};
+
+static int msdos_init_fs_context(struct fs_context *fc)
+{
+ int err;
+
+ /* Initialize with is_vfat == false */
+ err = fat_init_fs_context(fc, false);
+ if (err)
+ return err;
+
+ fc->ops = &msdos_context_ops;
+ return 0;
}
static struct file_system_type msdos_fs_type = {
.owner = THIS_MODULE,
.name = "msdos",
- .mount = msdos_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .init_fs_context = msdos_init_fs_context,
+ .parameters = fat_param_spec,
};
MODULE_ALIAS_FS("msdos");
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index c4d00999a433..6423e1dedf14 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1195,24 +1195,48 @@ static void setup(struct super_block *sb)
sb->s_d_op = &vfat_dentry_ops;
}
-static int vfat_fill_super(struct super_block *sb, void *data, int silent)
+static int vfat_fill_super(struct super_block *sb, struct fs_context *fc)
{
- return fat_fill_super(sb, data, silent, 1, setup);
+ return fat_fill_super(sb, fc, setup);
}
-static struct dentry *vfat_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+static int vfat_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
+ return get_tree_bdev(fc, vfat_fill_super);
+}
+
+static int vfat_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ return fat_parse_param(fc, param, true);
+}
+
+static const struct fs_context_operations vfat_context_ops = {
+ .parse_param = vfat_parse_param,
+ .get_tree = vfat_get_tree,
+ .reconfigure = fat_reconfigure,
+ .free = fat_free_fc,
+};
+
+static int vfat_init_fs_context(struct fs_context *fc)
+{
+ int err;
+
+ /* Initialize with is_vfat == true */
+ err = fat_init_fs_context(fc, true);
+ if (err)
+ return err;
+
+ fc->ops = &vfat_context_ops;
+ return 0;
}
static struct file_system_type vfat_fs_type = {
.owner = THIS_MODULE,
.name = "vfat",
- .mount = vfat_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .init_fs_context = vfat_init_fs_context,
+ .parameters = fat_param_spec,
};
MODULE_ALIAS_FS("vfat");
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 8a7f86c2139a..6e8cea16790e 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -115,88 +115,188 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
return err;
}
-static struct vfsmount *get_vfsmount_from_fd(int fd)
+static int get_path_from_fd(int fd, struct path *root)
{
- struct vfsmount *mnt;
-
if (fd == AT_FDCWD) {
struct fs_struct *fs = current->fs;
spin_lock(&fs->lock);
- mnt = mntget(fs->pwd.mnt);
+ *root = fs->pwd;
+ path_get(root);
spin_unlock(&fs->lock);
} else {
struct fd f = fdget(fd);
if (!f.file)
- return ERR_PTR(-EBADF);
- mnt = mntget(f.file->f_path.mnt);
+ return -EBADF;
+ *root = f.file->f_path;
+ path_get(root);
fdput(f);
}
- return mnt;
+
+ return 0;
}
+enum handle_to_path_flags {
+ HANDLE_CHECK_PERMS = (1 << 0),
+ HANDLE_CHECK_SUBTREE = (1 << 1),
+};
+
+struct handle_to_path_ctx {
+ struct path root;
+ enum handle_to_path_flags flags;
+ unsigned int fh_flags;
+};
+
static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
{
- return 1;
+ struct handle_to_path_ctx *ctx = context;
+ struct user_namespace *user_ns = current_user_ns();
+ struct dentry *d, *root = ctx->root.dentry;
+ struct mnt_idmap *idmap = mnt_idmap(ctx->root.mnt);
+ int retval = 0;
+
+ if (!root)
+ return 1;
+
+ /* Old permission model with global CAP_DAC_READ_SEARCH. */
+ if (!ctx->flags)
+ return 1;
+
+ /*
+ * It's racy as we're not taking rename_lock but we're able to ignore
+ * permissions and we just need an approximation whether we were able
+ * to follow a path to the file.
+ *
+ * It's also potentially expensive on some filesystems especially if
+ * there is a deep path.
+ */
+ d = dget(dentry);
+ while (d != root && !IS_ROOT(d)) {
+ struct dentry *parent = dget_parent(d);
+
+ /*
+ * We know that we have the ability to override DAC permissions
+ * as we've verified this earlier via CAP_DAC_READ_SEARCH. But
+ * we also need to make sure that there aren't any unmapped
+ * inodes in the path that would prevent us from reaching the
+ * file.
+ */
+ if (!privileged_wrt_inode_uidgid(user_ns, idmap,
+ d_inode(parent))) {
+ dput(d);
+ dput(parent);
+ return retval;
+ }
+
+ dput(d);
+ d = parent;
+ }
+
+ if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root)
+ retval = 1;
+ WARN_ON_ONCE(d != root && d != root->d_sb->s_root);
+ dput(d);
+ return retval;
}
-static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
- struct path *path)
+static int do_handle_to_path(struct file_handle *handle, struct path *path,
+ struct handle_to_path_ctx *ctx)
{
- int retval = 0;
int handle_dwords;
+ struct vfsmount *mnt = ctx->root.mnt;
- path->mnt = get_vfsmount_from_fd(mountdirfd);
- if (IS_ERR(path->mnt)) {
- retval = PTR_ERR(path->mnt);
- goto out_err;
- }
/* change the handle size to multiple of sizeof(u32) */
handle_dwords = handle->handle_bytes >> 2;
- path->dentry = exportfs_decode_fh(path->mnt,
+ path->dentry = exportfs_decode_fh_raw(mnt,
(struct fid *)handle->f_handle,
handle_dwords, handle->handle_type,
- vfs_dentry_acceptable, NULL);
- if (IS_ERR(path->dentry)) {
- retval = PTR_ERR(path->dentry);
- goto out_mnt;
+ ctx->fh_flags,
+ vfs_dentry_acceptable, ctx);
+ if (IS_ERR_OR_NULL(path->dentry)) {
+ if (path->dentry == ERR_PTR(-ENOMEM))
+ return -ENOMEM;
+ return -ESTALE;
}
+ path->mnt = mntget(mnt);
return 0;
-out_mnt:
- mntput(path->mnt);
-out_err:
- return retval;
+}
+
+/*
+ * Allow relaxed permissions of file handles if the caller has the
+ * ability to mount the filesystem or create a bind-mount of the
+ * provided @mountdirfd.
+ *
+ * In both cases the caller may be able to get an unobstructed way to
+ * the encoded file handle. If the caller is only able to create a
+ * bind-mount we need to verify that there are no locked mounts on top
+ * of it that could prevent us from getting to the encoded file.
+ *
+ * In principle, locked mounts can prevent the caller from mounting the
+ * filesystem but that only applies to procfs and sysfs neither of which
+ * support decoding file handles.
+ */
+static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
+ unsigned int o_flags)
+{
+ struct path *root = &ctx->root;
+
+ /*
+ * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
+ * confusing api in the face of disconnected non-dir dentries.
+ *
+ * There's only one dentry for each directory inode (VFS rule)...
+ */
+ if (!(o_flags & O_DIRECTORY))
+ return false;
+
+ if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+ ctx->flags = HANDLE_CHECK_PERMS;
+ else if (is_mounted(root->mnt) &&
+ ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
+ CAP_SYS_ADMIN) &&
+ !has_locked_children(real_mount(root->mnt), root->dentry))
+ ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
+ else
+ return false;
+
+ /* Are we able to override DAC permissions? */
+ if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
+ return false;
+
+ ctx->fh_flags = EXPORT_FH_DIR_ONLY;
+ return true;
}
static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
- struct path *path)
+ struct path *path, unsigned int o_flags)
{
int retval = 0;
struct file_handle f_handle;
struct file_handle *handle = NULL;
+ struct handle_to_path_ctx ctx = {};
- /*
- * With handle we don't look at the execute bit on the
- * directory. Ideally we would like CAP_DAC_SEARCH.
- * But we don't have that
- */
- if (!capable(CAP_DAC_READ_SEARCH)) {
- retval = -EPERM;
+ retval = get_path_from_fd(mountdirfd, &ctx.root);
+ if (retval)
goto out_err;
+
+ if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
+ retval = -EPERM;
+ goto out_path;
}
+
if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
retval = -EFAULT;
- goto out_err;
+ goto out_path;
}
if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
(f_handle.handle_bytes == 0)) {
retval = -EINVAL;
- goto out_err;
+ goto out_path;
}
handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
GFP_KERNEL);
if (!handle) {
retval = -ENOMEM;
- goto out_err;
+ goto out_path;
}
/* copy the full handle */
*handle = f_handle;
@@ -207,10 +307,12 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
goto out_handle;
}
- retval = do_handle_to_path(mountdirfd, handle, path);
+ retval = do_handle_to_path(handle, path, &ctx);
out_handle:
kfree(handle);
+out_path:
+ path_put(&ctx.root);
out_err:
return retval;
}
@@ -223,7 +325,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
struct file *file;
int fd;
- retval = handle_to_path(mountdirfd, ufh, &path);
+ retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
if (retval)
return retval;
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index a4d6ca0b8971..24727ec34e5a 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -308,6 +308,40 @@ int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p,
}
EXPORT_SYMBOL(fs_param_is_fd);
+int fs_param_is_uid(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ kuid_t uid;
+
+ if (fs_param_is_u32(log, p, param, result) != 0)
+ return fs_param_bad_value(log, param);
+
+ uid = make_kuid(current_user_ns(), result->uint_32);
+ if (!uid_valid(uid))
+ return inval_plog(log, "Invalid uid '%s'", param->string);
+
+ result->uid = uid;
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_uid);
+
+int fs_param_is_gid(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ kgid_t gid;
+
+ if (fs_param_is_u32(log, p, param, result) != 0)
+ return fs_param_bad_value(log, param);
+
+ gid = make_kgid(current_user_ns(), result->uint_32);
+ if (!gid_valid(gid))
+ return inval_plog(log, "Invalid gid '%s'", param->string);
+
+ result->gid = gid;
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_gid);
+
int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p,
struct fs_parameter *param, struct fs_parse_result *result)
{
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 6593ae518115..ed2dd000622e 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -220,10 +220,6 @@ static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
if (!mount_capable(fc))
return -EPERM;
- /* require the new mount api */
- if (exclusive && fc->ops == &legacy_fs_context_ops)
- return -EOPNOTSUPP;
-
fc->phase = FS_CONTEXT_CREATING;
fc->exclusive = exclusive;
@@ -411,6 +407,7 @@ SYSCALL_DEFINE5(fsconfig,
case FSCONFIG_SET_PATH:
case FSCONFIG_SET_PATH_EMPTY:
case FSCONFIG_SET_FD:
+ case FSCONFIG_CMD_CREATE_EXCL:
ret = -EOPNOTSUPP;
goto out_f;
}
@@ -451,7 +448,7 @@ SYSCALL_DEFINE5(fsconfig,
fallthrough;
case FSCONFIG_SET_PATH:
param.type = fs_value_is_filename;
- param.name = getname_flags(_value, lookup_flags, NULL);
+ param.name = getname_flags(_value, lookup_flags);
if (IS_ERR(param.name)) {
ret = PTR_ERR(param.name);
goto out_key;
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index 3d192b80a561..04cfd8fee992 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -146,8 +146,8 @@ int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
* be stripped.
*/
if (fc->posix_acl &&
- !vfsgid_in_group_p(i_gid_into_vfsgid(&nop_mnt_idmap, inode)) &&
- !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID))
+ !in_group_or_capable(&nop_mnt_idmap, inode,
+ i_gid_into_vfsgid(&nop_mnt_idmap, inode)))
extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID;
ret = fuse_setxattr(inode, name, value, size, 0, extra_flags);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 99e44ea7d875..d8ab4e93916f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -740,8 +740,8 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
fsparam_string ("source", OPT_SOURCE),
fsparam_u32 ("fd", OPT_FD),
fsparam_u32oct ("rootmode", OPT_ROOTMODE),
- fsparam_u32 ("user_id", OPT_USER_ID),
- fsparam_u32 ("group_id", OPT_GROUP_ID),
+ fsparam_uid ("user_id", OPT_USER_ID),
+ fsparam_gid ("group_id", OPT_GROUP_ID),
fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS),
fsparam_flag ("allow_other", OPT_ALLOW_OTHER),
fsparam_u32 ("max_read", OPT_MAX_READ),
@@ -755,6 +755,8 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
struct fs_parse_result result;
struct fuse_fs_context *ctx = fsc->fs_private;
int opt;
+ kuid_t kuid;
+ kgid_t kgid;
if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
/*
@@ -799,16 +801,26 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
break;
case OPT_USER_ID:
- ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
- if (!uid_valid(ctx->user_id))
+ kuid = result.uid;
+ /*
+ * The requested uid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kuid_has_mapping(fsc->user_ns, kuid))
return invalfc(fsc, "Invalid user_id");
+ ctx->user_id = kuid;
ctx->user_id_present = true;
break;
case OPT_GROUP_ID:
- ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
- if (!gid_valid(ctx->group_id))
+ kgid = result.gid;
+ /*
+ * The requested gid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kgid_has_mapping(fsc->user_ns, kgid))
return invalfc(fsc, "Invalid group_id");
+ ctx->group_id = kgid;
ctx->group_id_present = true;
break;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 8c34798a0715..744e10b46904 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -200,6 +200,7 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
HFS_I(inode)->flags = 0;
HFS_I(inode)->rsrc_inode = NULL;
HFS_I(inode)->fs_blocks = 0;
+ HFS_I(inode)->tz_secondswest = sys_tz.tz_minuteswest * 60;
if (S_ISDIR(mode)) {
inode->i_size = 2;
HFS_SB(sb)->folder_count++;
@@ -275,6 +276,8 @@ void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
for (count = 0, i = 0; i < 3; i++)
count += be16_to_cpu(ext[i].count);
HFS_I(inode)->first_blocks = count;
+ HFS_I(inode)->cached_start = 0;
+ HFS_I(inode)->cached_blocks = 0;
inode->i_size = HFS_I(inode)->phys_size = log_size;
HFS_I(inode)->fs_blocks = (log_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6764afa98a6f..eeac99765f0d 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -28,6 +28,7 @@
static struct kmem_cache *hfs_inode_cachep;
+MODULE_DESCRIPTION("Apple Macintosh file system support");
MODULE_LICENSE("GPL");
static int hfs_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index ca2ba8c9f82e..901e83d65d20 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -25,19 +25,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
fd->key = ptr + tree->max_key_len + 2;
hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n",
tree->cnid, __builtin_return_address(0));
- switch (tree->cnid) {
- case HFSPLUS_CAT_CNID:
- mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
- break;
- case HFSPLUS_EXT_CNID:
- mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
- break;
- case HFSPLUS_ATTR_CNID:
- mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
- break;
- default:
- BUG();
- }
+ mutex_lock_nested(&tree->tree_lock,
+ hfsplus_btree_lock_class(tree));
return 0;
}
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 3c572e44f2ad..9c51867dddc5 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -430,7 +430,8 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid,
hfsplus_free_extents(sb, ext_entry, total_blocks - start,
total_blocks);
total_blocks = start;
- mutex_lock(&fd.tree->tree_lock);
+ mutex_lock_nested(&fd.tree->tree_lock,
+ hfsplus_btree_lock_class(fd.tree));
} while (total_blocks > blocks);
hfs_find_exit(&fd);
@@ -592,7 +593,8 @@ void hfsplus_file_truncate(struct inode *inode)
alloc_cnt, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->first_extents);
hip->first_blocks = blk_cnt;
- mutex_lock(&fd.tree->tree_lock);
+ mutex_lock_nested(&fd.tree->tree_lock,
+ hfsplus_btree_lock_class(fd.tree));
break;
}
res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
@@ -606,7 +608,8 @@ void hfsplus_file_truncate(struct inode *inode)
hfsplus_free_extents(sb, hip->cached_extents,
alloc_cnt - start, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->cached_extents);
- mutex_lock(&fd.tree->tree_lock);
+ mutex_lock_nested(&fd.tree->tree_lock,
+ hfsplus_btree_lock_class(fd.tree));
if (blk_cnt > start) {
hip->extent_state |= HFSPLUS_EXT_DIRTY;
break;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 012a3d003fbe..9e78f181c24f 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -553,6 +553,27 @@ static inline __be32 __hfsp_ut2mt(time64_t ut)
return cpu_to_be32(lower_32_bits(ut) + HFSPLUS_UTC_OFFSET);
}
+static inline enum hfsplus_btree_mutex_classes
+hfsplus_btree_lock_class(struct hfs_btree *tree)
+{
+ enum hfsplus_btree_mutex_classes class;
+
+ switch (tree->cnid) {
+ case HFSPLUS_CAT_CNID:
+ class = CATALOG_BTREE_MUTEX;
+ break;
+ case HFSPLUS_EXT_CNID:
+ class = EXTENTS_BTREE_MUTEX;
+ break;
+ case HFSPLUS_ATTR_CNID:
+ class = ATTR_BTREE_MUTEX;
+ break;
+ default:
+ BUG();
+ }
+ return class;
+}
+
/* compatibility */
#define hfsp_mt2ut(t) (struct timespec64){ .tv_sec = __hfsp_mt2ut(t) }
#define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec)
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5661a2e24d03..40d04dba13ac 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -40,7 +40,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
/* Directory containing the bootable system */
vh->finder_info[0] = bvh->finder_info[0] =
- cpu_to_be32(parent_ino(dentry));
+ cpu_to_be32(d_parent_ino(dentry));
/*
* Bootloader. Just using the inode here breaks in the case of
@@ -51,7 +51,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
/* Per spec, the OS X system folder - same as finder_info[0] here */
vh->finder_info[5] = bvh->finder_info[5] =
- cpu_to_be32(parent_ino(dentry));
+ cpu_to_be32(d_parent_ino(dentry));
mutex_unlock(&sbi->vh_mutex);
return 0;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 5a400259ae74..9a1a93e3888b 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -696,7 +696,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
return err;
}
- strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
+ strbuf = kzalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
if (!strbuf) {
res = -ENOMEM;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index a73d27c4dd58..3eb747d26924 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -16,11 +16,16 @@
#include <linux/seq_file.h>
#include <linux/writeback.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
#include <linux/namei.h>
#include "hostfs.h"
#include <init.h>
#include <kern.h>
+struct hostfs_fs_info {
+ char *host_root_path;
+};
+
struct hostfs_inode_info {
int fd;
fmode_t mode;
@@ -90,8 +95,10 @@ static char *__dentry_name(struct dentry *dentry, char *name)
char *p = dentry_path_raw(dentry, name, PATH_MAX);
char *root;
size_t len;
+ struct hostfs_fs_info *fsi;
- root = dentry->d_sb->s_fs_info;
+ fsi = dentry->d_sb->s_fs_info;
+ root = fsi->host_root_path;
len = strlen(root);
if (IS_ERR(p)) {
__putname(name);
@@ -196,8 +203,10 @@ static int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
long long f_bavail;
long long f_files;
long long f_ffree;
+ struct hostfs_fs_info *fsi;
- err = do_statfs(dentry->d_sb->s_fs_info,
+ fsi = dentry->d_sb->s_fs_info;
+ err = do_statfs(fsi->host_root_path,
&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
&sf->f_namelen);
@@ -245,7 +254,11 @@ static void hostfs_free_inode(struct inode *inode)
static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
{
- const char *root_path = root->d_sb->s_fs_info;
+ struct hostfs_fs_info *fsi;
+ const char *root_path;
+
+ fsi = root->d_sb->s_fs_info;
+ root_path = fsi->host_root_path;
size_t offset = strlen(root_ino) + 1;
if (strlen(root_path) > offset)
@@ -432,31 +445,20 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
static int hostfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
char *buffer;
- loff_t start = page_offset(page);
+ loff_t start = folio_pos(folio);
int bytes_read, ret = 0;
- buffer = kmap_local_page(page);
+ buffer = kmap_local_folio(folio, 0);
bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
PAGE_SIZE);
- if (bytes_read < 0) {
- ClearPageUptodate(page);
- SetPageError(page);
+ if (bytes_read < 0)
ret = bytes_read;
- goto out;
- }
-
- memset(buffer + bytes_read, 0, PAGE_SIZE - bytes_read);
-
- ClearPageError(page);
- SetPageUptodate(page);
-
- out:
- flush_dcache_page(page);
+ else
+ buffer = folio_zero_tail(folio, bytes_read, buffer);
kunmap_local(buffer);
- unlock_page(page);
+ folio_end_read(folio, ret == 0);
return ret;
}
@@ -922,10 +924,11 @@ static const struct inode_operations hostfs_link_iops = {
.get_link = hostfs_get_link,
};
-static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
+static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct hostfs_fs_info *fsi = sb->s_fs_info;
+ const char *host_root = fc->source;
struct inode *root_inode;
- char *host_root_path, *req_root = d;
int err;
sb->s_blocksize = 1024;
@@ -939,15 +942,15 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
return err;
/* NULL is printed as '(null)' by printf(): avoid that. */
- if (req_root == NULL)
- req_root = "";
+ if (fc->source == NULL)
+ host_root = "";
- sb->s_fs_info = host_root_path =
- kasprintf(GFP_KERNEL, "%s/%s", root_ino, req_root);
- if (host_root_path == NULL)
+ fsi->host_root_path =
+ kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root);
+ if (fsi->host_root_path == NULL)
return -ENOMEM;
- root_inode = hostfs_iget(sb, host_root_path);
+ root_inode = hostfs_iget(sb, fsi->host_root_path);
if (IS_ERR(root_inode))
return PTR_ERR(root_inode);
@@ -955,7 +958,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
char *name;
iput(root_inode);
- name = follow_link(host_root_path);
+ name = follow_link(fsi->host_root_path);
if (IS_ERR(name))
return PTR_ERR(name);
@@ -972,11 +975,38 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
return 0;
}
-static struct dentry *hostfs_read_sb(struct file_system_type *type,
- int flags, const char *dev_name,
- void *data)
+static int hostfs_fc_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, hostfs_fill_super);
+}
+
+static void hostfs_fc_free(struct fs_context *fc)
{
- return mount_nodev(type, flags, data, hostfs_fill_sb_common);
+ struct hostfs_fs_info *fsi = fc->s_fs_info;
+
+ if (!fsi)
+ return;
+
+ kfree(fsi->host_root_path);
+ kfree(fsi);
+}
+
+static const struct fs_context_operations hostfs_context_ops = {
+ .get_tree = hostfs_fc_get_tree,
+ .free = hostfs_fc_free,
+};
+
+static int hostfs_init_fs_context(struct fs_context *fc)
+{
+ struct hostfs_fs_info *fsi;
+
+ fsi = kzalloc(sizeof(*fsi), GFP_KERNEL);
+ if (!fsi)
+ return -ENOMEM;
+
+ fc->s_fs_info = fsi;
+ fc->ops = &hostfs_context_ops;
+ return 0;
}
static void hostfs_kill_sb(struct super_block *s)
@@ -986,11 +1016,11 @@ static void hostfs_kill_sb(struct super_block *s)
}
static struct file_system_type hostfs_type = {
- .owner = THIS_MODULE,
- .name = "hostfs",
- .mount = hostfs_read_sb,
- .kill_sb = hostfs_kill_sb,
- .fs_flags = 0,
+ .owner = THIS_MODULE,
+ .name = "hostfs",
+ .init_fs_context = hostfs_init_fs_context,
+ .kill_sb = hostfs_kill_sb,
+ .fs_flags = 0,
};
MODULE_ALIAS_FS("hostfs");
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 9184b4584b01..d0edf9ed33b6 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -472,9 +472,8 @@ out:
static int hpfs_symlink_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- char *link = page_address(page);
- struct inode *i = page->mapping->host;
+ char *link = folio_address(folio);
+ struct inode *i = folio->mapping->host;
struct fnode *fnode;
struct buffer_head *bh;
int err;
@@ -485,17 +484,9 @@ static int hpfs_symlink_read_folio(struct file *file, struct folio *folio)
goto fail;
err = hpfs_read_ea(i->i_sb, fnode, "SYMLINK", link, PAGE_SIZE);
brelse(bh);
- if (err)
- goto fail;
- hpfs_unlock(i->i_sb);
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
-
fail:
hpfs_unlock(i->i_sb);
- SetPageError(page);
- unlock_page(page);
+ folio_end_read(folio, err == 0);
return err;
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 314834a078e9..e73717daa5f9 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -793,4 +793,5 @@ static void __exit exit_hpfs_fs(void)
module_init(init_hpfs_fs)
module_exit(exit_hpfs_fs)
+MODULE_DESCRIPTION("OS/2 HPFS file system support");
MODULE_LICENSE("GPL");
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 412f295acebe..81dab95f67ed 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -73,13 +73,13 @@ enum hugetlb_param {
};
static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
fsparam_u32oct("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -1376,15 +1376,11 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
switch (opt) {
case Opt_uid:
- ctx->uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(ctx->uid))
- goto bad_val;
+ ctx->uid = result.uid;
return 0;
case Opt_gid:
- ctx->gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(ctx->gid))
- goto bad_val;
+ ctx->gid = result.gid;
return 0;
case Opt_mode:
diff --git a/fs/inode.c b/fs/inode.c
index 3a41f83a4ba5..f356fe2ec2b6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -162,6 +162,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
+ inode->i_state = 0;
atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
@@ -231,6 +232,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
if (unlikely(security_inode_alloc(inode)))
return -ENOMEM;
+
this_cpu_inc(nr_inodes);
return 0;
@@ -886,36 +888,45 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
return freed;
}
-static void __wait_on_freeing_inode(struct inode *inode);
+static void __wait_on_freeing_inode(struct inode *inode, bool locked);
/*
* Called with the inode lock held.
*/
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
int (*test)(struct inode *, void *),
- void *data)
+ void *data, bool locked)
{
struct inode *inode = NULL;
+ if (locked)
+ lockdep_assert_held(&inode_hash_lock);
+ else
+ lockdep_assert_not_held(&inode_hash_lock);
+
+ rcu_read_lock();
repeat:
- hlist_for_each_entry(inode, head, i_hash) {
+ hlist_for_each_entry_rcu(inode, head, i_hash) {
if (inode->i_sb != sb)
continue;
if (!test(inode, data))
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
- __wait_on_freeing_inode(inode);
+ __wait_on_freeing_inode(inode, locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return ERR_PTR(-ESTALE);
}
__iget(inode);
spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return inode;
}
+ rcu_read_unlock();
return NULL;
}
@@ -924,29 +935,39 @@ repeat:
* iget_locked for details.
*/
static struct inode *find_inode_fast(struct super_block *sb,
- struct hlist_head *head, unsigned long ino)
+ struct hlist_head *head, unsigned long ino,
+ bool locked)
{
struct inode *inode = NULL;
+ if (locked)
+ lockdep_assert_held(&inode_hash_lock);
+ else
+ lockdep_assert_not_held(&inode_hash_lock);
+
+ rcu_read_lock();
repeat:
- hlist_for_each_entry(inode, head, i_hash) {
+ hlist_for_each_entry_rcu(inode, head, i_hash) {
if (inode->i_ino != ino)
continue;
if (inode->i_sb != sb)
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
- __wait_on_freeing_inode(inode);
+ __wait_on_freeing_inode(inode, locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return ERR_PTR(-ESTALE);
}
__iget(inode);
spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return inode;
}
+ rcu_read_unlock();
return NULL;
}
@@ -1004,14 +1025,7 @@ EXPORT_SYMBOL(get_next_ino);
*/
struct inode *new_inode_pseudo(struct super_block *sb)
{
- struct inode *inode = alloc_inode(sb);
-
- if (inode) {
- spin_lock(&inode->i_lock);
- inode->i_state = 0;
- spin_unlock(&inode->i_lock);
- }
- return inode;
+ return alloc_inode(sb);
}
/**
@@ -1161,7 +1175,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
again:
spin_lock(&inode_hash_lock);
- old = find_inode(inode->i_sb, head, test, data);
+ old = find_inode(inode->i_sb, head, test, data, true);
if (unlikely(old)) {
/*
* Uhhuh, somebody else created the same inode under us.
@@ -1235,7 +1249,6 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
struct inode *new = alloc_inode(sb);
if (new) {
- new->i_state = 0;
inode = inode_insert5(new, hashval, test, set, data);
if (unlikely(inode != new))
destroy_inode(new);
@@ -1246,6 +1259,47 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
EXPORT_SYMBOL(iget5_locked);
/**
+ * iget5_locked_rcu - obtain an inode from a mounted file system
+ * @sb: super block of file system
+ * @hashval: hash value (usually inode number) to get
+ * @test: callback used for comparisons between inodes
+ * @set: callback used to initialize a new struct inode
+ * @data: opaque data pointer to pass to @test and @set
+ *
+ * This is equivalent to iget5_locked, except the @test callback must
+ * tolerate the inode not being stable, including being mid-teardown.
+ */
+struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
+ int (*test)(struct inode *, void *),
+ int (*set)(struct inode *, void *), void *data)
+{
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+ struct inode *inode, *new;
+
+again:
+ inode = find_inode(sb, head, test, data, false);
+ if (inode) {
+ if (IS_ERR(inode))
+ return NULL;
+ wait_on_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
+ return inode;
+ }
+
+ new = alloc_inode(sb);
+ if (new) {
+ inode = inode_insert5(new, hashval, test, set, data);
+ if (unlikely(inode != new))
+ destroy_inode(new);
+ }
+ return inode;
+}
+EXPORT_SYMBOL_GPL(iget5_locked_rcu);
+
+/**
* iget_locked - obtain an inode from a mounted file system
* @sb: super block of file system
* @ino: inode number to get
@@ -1263,9 +1317,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
again:
- spin_lock(&inode_hash_lock);
- inode = find_inode_fast(sb, head, ino);
- spin_unlock(&inode_hash_lock);
+ inode = find_inode_fast(sb, head, ino, false);
if (inode) {
if (IS_ERR(inode))
return NULL;
@@ -1283,7 +1335,7 @@ again:
spin_lock(&inode_hash_lock);
/* We released the lock, so.. */
- old = find_inode_fast(sb, head, ino);
+ old = find_inode_fast(sb, head, ino, true);
if (!old) {
inode->i_ino = ino;
spin_lock(&inode->i_lock);
@@ -1419,7 +1471,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
struct inode *inode;
spin_lock(&inode_hash_lock);
- inode = find_inode(sb, head, test, data);
+ inode = find_inode(sb, head, test, data, true);
spin_unlock(&inode_hash_lock);
return IS_ERR(inode) ? NULL : inode;
@@ -1474,7 +1526,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
struct inode *inode;
again:
spin_lock(&inode_hash_lock);
- inode = find_inode_fast(sb, head, ino);
+ inode = find_inode_fast(sb, head, ino, true);
spin_unlock(&inode_hash_lock);
if (inode) {
@@ -2235,17 +2287,21 @@ EXPORT_SYMBOL(inode_needs_sync);
* wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
* will DTRT.
*/
-static void __wait_on_freeing_inode(struct inode *inode)
+static void __wait_on_freeing_inode(struct inode *inode, bool locked)
{
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_hash_lock);
+ rcu_read_unlock();
+ if (locked)
+ spin_unlock(&inode_hash_lock);
schedule();
finish_wait(wq, &wait.wq_entry);
- spin_lock(&inode_hash_lock);
+ if (locked)
+ spin_lock(&inode_hash_lock);
+ rcu_read_lock();
}
static __initdata unsigned long ihash_entries;
@@ -2538,6 +2594,7 @@ bool in_group_or_capable(struct mnt_idmap *idmap,
return true;
return false;
}
+EXPORT_SYMBOL(in_group_or_capable);
/**
* mode_strip_sgid - handle the sgid bit for non-directories
diff --git a/fs/internal.h b/fs/internal.h
index ab2225136f60..cdd73209eecb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -17,6 +17,7 @@ struct fs_context;
struct pipe_inode_info;
struct iov_iter;
struct mnt_idmap;
+struct ns_common;
/*
* block/bdev.c
@@ -239,6 +240,7 @@ extern void mnt_pin_kill(struct mount *m);
* fs/nsfs.c
*/
extern const struct dentry_operations ns_dentry_operations;
+int open_namespace(struct ns_common *ns);
/*
* fs/stat.c:
@@ -247,6 +249,8 @@ extern const struct dentry_operations ns_dentry_operations;
int getname_statx_lookup_flags(int flags);
int do_statx(int dfd, struct filename *filename, unsigned int flags,
unsigned int mask, struct statx __user *buffer);
+int do_statx_fd(int fd, unsigned int flags, unsigned int mask,
+ struct statx __user *buffer);
/*
* fs/splice.c:
@@ -321,3 +325,15 @@ struct stashed_operations {
int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
struct path *path);
void stashed_dentry_prune(struct dentry *dentry);
+/**
+ * path_mounted - check whether path is mounted
+ * @path: path to check
+ *
+ * Determine whether @path refers to the root of a mount.
+ *
+ * Return: true if @path is the root of a mount, false if not.
+ */
+static inline bool path_mounted(const struct path *path)
+{
+ return path->mnt->mnt_root == path->dentry;
+}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d46558990279..f420c53d86ac 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -307,8 +307,6 @@ static void iomap_finish_folio_read(struct folio *folio, size_t off,
spin_unlock_irqrestore(&ifs->state_lock, flags);
}
- if (error)
- folio_set_error(folio);
if (finished)
folio_end_read(folio, uptodate);
}
@@ -444,6 +442,24 @@ done:
return pos - orig_pos + plen;
}
+static loff_t iomap_read_folio_iter(const struct iomap_iter *iter,
+ struct iomap_readpage_ctx *ctx)
+{
+ struct folio *folio = ctx->cur_folio;
+ size_t offset = offset_in_folio(folio, iter->pos);
+ loff_t length = min_t(loff_t, folio_size(folio) - offset,
+ iomap_length(iter));
+ loff_t done, ret;
+
+ for (done = 0; done < length; done += ret) {
+ ret = iomap_readpage_iter(iter, ctx, done);
+ if (ret <= 0)
+ return ret;
+ }
+
+ return done;
+}
+
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
{
struct iomap_iter iter = {
@@ -459,10 +475,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
trace_iomap_readpage(iter.inode, 1);
while ((ret = iomap_iter(&iter, ops)) > 0)
- iter.processed = iomap_readpage_iter(&iter, &ctx, 0);
-
- if (ret < 0)
- folio_set_error(folio);
+ iter.processed = iomap_read_folio_iter(&iter, &ctx);
if (ctx.bio) {
submit_bio(ctx.bio);
@@ -698,7 +711,6 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
if (folio_test_uptodate(folio))
return 0;
- folio_clear_error(folio);
do {
iomap_adjust_read_range(iter->inode, folio, &block_start,
@@ -878,37 +890,22 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
size_t copied, struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- loff_t old_size = iter->inode->i_size;
- size_t written;
if (srcmap->type == IOMAP_INLINE) {
iomap_write_end_inline(iter, folio, pos, copied);
- written = copied;
- } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
- written = block_write_end(NULL, iter->inode->i_mapping, pos,
- len, copied, &folio->page, NULL);
- WARN_ON_ONCE(written != copied && written != 0);
- } else {
- written = __iomap_write_end(iter->inode, pos, len, copied,
- folio) ? copied : 0;
+ return true;
}
- /*
- * Update the in-memory inode size after copying the data into the page
- * cache. It's up to the file system to write the updated size to disk,
- * preferably after I/O completion so that no stale data is exposed.
- * Only once that's done can we unlock and release the folio.
- */
- if (pos + written > old_size) {
- i_size_write(iter->inode, pos + written);
- iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
- }
- __iomap_put_folio(iter, pos, written, folio);
+ if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
+ size_t bh_written;
- if (old_size < pos)
- pagecache_isize_extended(iter->inode, old_size, pos);
+ bh_written = block_write_end(NULL, iter->inode->i_mapping, pos,
+ len, copied, &folio->page, NULL);
+ WARN_ON_ONCE(bh_written != copied && bh_written != 0);
+ return bh_written == copied;
+ }
- return written == copied;
+ return __iomap_write_end(iter->inode, pos, len, copied, folio);
}
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
@@ -923,6 +920,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
do {
struct folio *folio;
+ loff_t old_size;
size_t offset; /* Offset into folio */
size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
@@ -974,6 +972,23 @@ retry:
written = iomap_write_end(iter, pos, bytes, copied, folio) ?
copied : 0;
+ /*
+ * Update the in-memory inode size after copying the data into
+ * the page cache. It's up to the file system to write the
+ * updated size to disk, preferably after I/O completion so that
+ * no stale data is exposed. Only once that's done can we
+ * unlock and release the folio.
+ */
+ old_size = iter->inode->i_size;
+ if (pos + written > old_size) {
+ i_size_write(iter->inode, pos + written);
+ iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
+ }
+ __iomap_put_folio(iter, pos, written, folio);
+
+ if (old_size < pos)
+ pagecache_isize_extended(iter->inode, old_size, pos);
+
cond_resched();
if (unlikely(written == 0)) {
/*
@@ -1344,6 +1359,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
bytes = folio_size(folio) - offset;
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
+ __iomap_put_folio(iter, pos, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
@@ -1409,6 +1425,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
folio_mark_accessed(folio);
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
+ __iomap_put_folio(iter, pos, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
@@ -1539,8 +1556,6 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
/* walk all folios in bio, ending page IO on them */
bio_for_each_folio_all(fi, bio) {
- if (error)
- folio_set_error(fi.folio);
iomap_finish_folio_write(inode, fi.folio, fi.length);
folio_count++;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 93b1077a380a..ed548efdd9bb 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -326,8 +326,8 @@ static const struct fs_parameter_spec isofs_param_spec[] = {
fsparam_u32 ("session", Opt_session),
fsparam_u32 ("sbsector", Opt_sb),
fsparam_enum ("check", Opt_check, isofs_param_check),
- fsparam_u32 ("uid", Opt_uid),
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
/* Note: mode/dmode historically accepted %u not strictly %o */
fsparam_u32 ("mode", Opt_mode),
fsparam_u32 ("dmode", Opt_dmode),
@@ -344,8 +344,6 @@ static int isofs_parse_param(struct fs_context *fc,
struct isofs_options *popt = fc->fs_private;
struct fs_parse_result result;
int opt;
- kuid_t uid;
- kgid_t gid;
unsigned int n;
/* There are no remountable options */
@@ -409,17 +407,11 @@ static int isofs_parse_param(struct fs_context *fc,
case Opt_ignore:
break;
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return -EINVAL;
- popt->uid = uid;
+ popt->uid = result.uid;
popt->uid_set = 1;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return -EINVAL;
- popt->gid = gid;
+ popt->gid = result.gid;
popt->gid_set = 1;
break;
case Opt_mode:
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index d6c17ad69dee..dbf911126e61 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -688,11 +688,10 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode,
*/
static int rock_ridge_symlink_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct iso_inode_info *ei = ISOFS_I(inode);
struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
- char *link = page_address(page);
+ char *link = folio_address(folio);
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
struct buffer_head *bh;
char *rpnt = link;
@@ -779,9 +778,10 @@ repeat:
goto fail;
brelse(bh);
*rpnt = '\0';
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
+ ret = 0;
+end:
+ folio_end_read(folio, ret == 0);
+ return ret;
/* error exit from macro */
out:
@@ -795,9 +795,8 @@ out_bad_span:
fail:
brelse(bh);
error:
- SetPageError(page);
- unlock_page(page);
- return -EIO;
+ ret = -EIO;
+ goto end;
}
const struct address_space_operations isofs_symlink_aops = {
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 62ea76da7fdf..e12cb145147e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -95,13 +95,8 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_SHIFT,
PAGE_SIZE);
- if (ret) {
- ClearPageUptodate(pg);
- SetPageError(pg);
- } else {
+ if (!ret)
SetPageUptodate(pg);
- ClearPageError(pg);
- }
flush_dcache_page(pg);
kunmap(pg);
@@ -304,10 +299,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
kunmap(pg);
- if (ret) {
- /* There was an error writing. */
- SetPageError(pg);
- }
+ if (ret)
+ mapping_set_error(mapping, ret);
/* Adjust writtenlen for the padding we did, so we don't confuse our caller */
writtenlen -= min(writtenlen, (start - aligned_start));
@@ -330,7 +323,6 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
it gets reread */
jffs2_dbg(1, "%s(): Not all bytes written. Marking page !uptodate\n",
__func__);
- SetPageError(pg);
ClearPageUptodate(pg);
}
diff --git a/fs/libfs.c b/fs/libfs.c
index b635ee5adbcc..8aa34870449f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1854,6 +1854,80 @@ static const struct dentry_operations generic_ci_dentry_ops = {
.d_revalidate = fscrypt_d_revalidate,
#endif
};
+
+/**
+ * generic_ci_match() - Match a name (case-insensitively) with a dirent.
+ * This is a filesystem helper for comparison with directory entries.
+ * generic_ci_d_compare should be used in VFS' ->d_compare instead.
+ *
+ * @parent: Inode of the parent of the dirent under comparison
+ * @name: name under lookup.
+ * @folded_name: Optional pre-folded name under lookup
+ * @de_name: Dirent name.
+ * @de_name_len: dirent name length.
+ *
+ * Test whether a case-insensitive directory entry matches the filename
+ * being searched. If @folded_name is provided, it is used instead of
+ * recalculating the casefold of @name.
+ *
+ * Return: > 0 if the directory entry matches, 0 if it doesn't match, or
+ * < 0 on error.
+ */
+int generic_ci_match(const struct inode *parent,
+ const struct qstr *name,
+ const struct qstr *folded_name,
+ const u8 *de_name, u32 de_name_len)
+{
+ const struct super_block *sb = parent->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
+ struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
+ struct qstr dirent = QSTR_INIT(de_name, de_name_len);
+ int res = 0;
+
+ if (IS_ENCRYPTED(parent)) {
+ const struct fscrypt_str encrypted_name =
+ FSTR_INIT((u8 *) de_name, de_name_len);
+
+ if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)))
+ return -EINVAL;
+
+ decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+ if (!decrypted_name.name)
+ return -ENOMEM;
+ res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
+ &decrypted_name);
+ if (res < 0) {
+ kfree(decrypted_name.name);
+ return res;
+ }
+ dirent.name = decrypted_name.name;
+ dirent.len = decrypted_name.len;
+ }
+
+ /*
+ * Attempt a case-sensitive match first. It is cheaper and
+ * should cover most lookups, including all the sane
+ * applications that expect a case-sensitive filesystem.
+ */
+
+ if (dirent.len == name->len &&
+ !memcmp(name->name, dirent.name, dirent.len))
+ goto out;
+
+ if (folded_name->name)
+ res = utf8_strncasecmp_folded(um, folded_name, &dirent);
+ else
+ res = utf8_strncasecmp(um, name, &dirent);
+
+out:
+ kfree(decrypted_name.name);
+ if (res < 0 && sb_has_strict_encoding(sb)) {
+ pr_err_ratelimited("Directory contains filename that is invalid UTF-8");
+ return 0;
+ }
+ return !res;
+}
+EXPORT_SYMBOL(generic_ci_match);
#endif
#ifdef CONFIG_FS_ENCRYPTION
diff --git a/fs/locks.c b/fs/locks.c
index c360d1992d21..bdd94c32256f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1367,9 +1367,9 @@ retry:
locks_wake_up_blocks(&left->c);
}
out:
+ trace_posix_lock_inode(inode, request, error);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
- trace_posix_lock_inode(inode, request, error);
/*
* Free any unused locks.
*/
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 7f9a2d8aa420..1c3df63162ef 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -730,5 +730,6 @@ static void __exit exit_minix_fs(void)
module_init(init_minix_fs)
module_exit(exit_minix_fs)
+MODULE_DESCRIPTION("Minix file system");
MODULE_LICENSE("GPL");
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index d6031acc34f0..a944a0f17b53 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,8 +213,7 @@ static int minix_rename(struct mnt_idmap *idmap,
if (!new_de)
goto out_dir;
err = minix_set_link(new_de, new_page, old_inode);
- kunmap(new_page);
- put_page(new_page);
+ unmap_and_put_page(new_page, new_de);
if (err)
goto out_dir;
inode_set_ctime_current(new_inode);
diff --git a/fs/mount.h b/fs/mount.h
index 4a42fc68f4cc..ad4b1ddebb54 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -16,6 +16,8 @@ struct mnt_namespace {
u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
+ struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
+ refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;
struct mnt_pcp {
@@ -152,3 +154,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
}
extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
+bool has_locked_children(struct mount *mnt, struct dentry *dentry);
diff --git a/fs/mpage.c b/fs/mpage.c
index fa8b99a199fa..b5b5ddf9d513 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -48,13 +48,8 @@ static void mpage_read_end_io(struct bio *bio)
struct folio_iter fi;
int err = blk_status_to_errno(bio->bi_status);
- bio_for_each_folio_all(fi, bio) {
- if (err)
- folio_set_error(fi.folio);
- else
- folio_mark_uptodate(fi.folio);
- folio_unlock(fi.folio);
- }
+ bio_for_each_folio_all(fi, bio)
+ folio_end_read(fi.folio, err == 0);
bio_put(bio);
}
@@ -65,10 +60,8 @@ static void mpage_write_end_io(struct bio *bio)
int err = blk_status_to_errno(bio->bi_status);
bio_for_each_folio_all(fi, bio) {
- if (err) {
- folio_set_error(fi.folio);
+ if (err)
mapping_set_error(fi.folio->mapping, err);
- }
folio_end_writeback(fi.folio);
}
diff --git a/fs/namei.c b/fs/namei.c
index 1e05a0f3f04d..3a4c40e12f78 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -126,7 +126,7 @@
#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
struct filename *
-getname_flags(const char __user *filename, int flags, int *empty)
+getname_flags(const char __user *filename, int flags)
{
struct filename *result;
char *kname;
@@ -148,9 +148,20 @@ getname_flags(const char __user *filename, int flags, int *empty)
result->name = kname;
len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
- if (unlikely(len < 0)) {
- __putname(result);
- return ERR_PTR(len);
+ /*
+ * Handle both empty path and copy failure in one go.
+ */
+ if (unlikely(len <= 0)) {
+ if (unlikely(len < 0)) {
+ __putname(result);
+ return ERR_PTR(len);
+ }
+
+ /* The empty path is special. */
+ if (!(flags & LOOKUP_EMPTY)) {
+ __putname(result);
+ return ERR_PTR(-ENOENT);
+ }
}
/*
@@ -180,6 +191,12 @@ getname_flags(const char __user *filename, int flags, int *empty)
kfree(result);
return ERR_PTR(len);
}
+ /* The empty path is special. */
+ if (unlikely(!len) && !(flags & LOOKUP_EMPTY)) {
+ __putname(kname);
+ kfree(result);
+ return ERR_PTR(-ENOENT);
+ }
if (unlikely(len == PATH_MAX)) {
__putname(kname);
kfree(result);
@@ -188,16 +205,6 @@ getname_flags(const char __user *filename, int flags, int *empty)
}
atomic_set(&result->refcnt, 1);
- /* The empty path is special. */
- if (unlikely(!len)) {
- if (empty)
- *empty = 1;
- if (!(flags & LOOKUP_EMPTY)) {
- putname(result);
- return ERR_PTR(-ENOENT);
- }
- }
-
result->uptr = filename;
result->aname = NULL;
audit_getname(result);
@@ -209,13 +216,13 @@ getname_uflags(const char __user *filename, int uflags)
{
int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
- return getname_flags(filename, flags, NULL);
+ return getname_flags(filename, flags);
}
struct filename *
getname(const char __user * filename)
{
- return getname_flags(filename, 0, NULL);
+ return getname_flags(filename, 0);
}
struct filename *
@@ -1233,29 +1240,48 @@ int may_linkat(struct mnt_idmap *idmap, const struct path *link)
*
* Returns 0 if the open is allowed, -ve on error.
*/
-static int may_create_in_sticky(struct mnt_idmap *idmap,
- struct nameidata *nd, struct inode *const inode)
+static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd,
+ struct inode *const inode)
{
umode_t dir_mode = nd->dir_mode;
- vfsuid_t dir_vfsuid = nd->dir_vfsuid;
+ vfsuid_t dir_vfsuid = nd->dir_vfsuid, i_vfsuid;
- if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
- (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
- likely(!(dir_mode & S_ISVTX)) ||
- vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) ||
- vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid()))
+ if (likely(!(dir_mode & S_ISVTX)))
return 0;
- if (likely(dir_mode & 0002) ||
- (dir_mode & 0020 &&
- ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
- (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
- const char *operation = S_ISFIFO(inode->i_mode) ?
- "sticky_create_fifo" :
- "sticky_create_regular";
- audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
+ if (S_ISREG(inode->i_mode) && !sysctl_protected_regular)
+ return 0;
+
+ if (S_ISFIFO(inode->i_mode) && !sysctl_protected_fifos)
+ return 0;
+
+ i_vfsuid = i_uid_into_vfsuid(idmap, inode);
+
+ if (vfsuid_eq(i_vfsuid, dir_vfsuid))
+ return 0;
+
+ if (vfsuid_eq_kuid(i_vfsuid, current_fsuid()))
+ return 0;
+
+ if (likely(dir_mode & 0002)) {
+ audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create");
return -EACCES;
}
+
+ if (dir_mode & 0020) {
+ if (sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) {
+ audit_log_path_denied(AUDIT_ANOM_CREAT,
+ "sticky_create_fifo");
+ return -EACCES;
+ }
+
+ if (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode)) {
+ audit_log_path_denied(AUDIT_ANOM_CREAT,
+ "sticky_create_regular");
+ return -EACCES;
+ }
+ }
+
return 0;
}
@@ -1712,17 +1738,26 @@ static struct dentry *lookup_slow(const struct qstr *name,
}
static inline int may_lookup(struct mnt_idmap *idmap,
- struct nameidata *nd)
+ struct nameidata *restrict nd)
{
- if (nd->flags & LOOKUP_RCU) {
- int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
- if (!err) // success, keep going
- return 0;
- if (!try_to_unlazy(nd))
- return -ECHILD; // redo it all non-lazy
- if (err != -ECHILD) // hard error
- return err;
- }
+ int err, mask;
+
+ mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0;
+ err = inode_permission(idmap, nd->inode, mask | MAY_EXEC);
+ if (likely(!err))
+ return 0;
+
+ // If we failed, and we weren't in LOOKUP_RCU, it's final
+ if (!(nd->flags & LOOKUP_RCU))
+ return err;
+
+ // Drop out of RCU mode to make sure it wasn't transient
+ if (!try_to_unlazy(nd))
+ return -ECHILD; // redo it all non-lazy
+
+ if (err != -ECHILD) // hard error
+ return err;
+
return inode_permission(idmap, nd->inode, MAY_EXEC);
}
@@ -2163,21 +2198,39 @@ EXPORT_SYMBOL(hashlen_string);
/*
* Calculate the length and hash of the path component, and
- * return the "hash_len" as the result.
+ * return the length as the result.
*/
-static inline u64 hash_name(const void *salt, const char *name)
+static inline const char *hash_name(struct nameidata *nd,
+ const char *name,
+ unsigned long *lastword)
{
- unsigned long a = 0, b, x = 0, y = (unsigned long)salt;
+ unsigned long a, b, x, y = (unsigned long)nd->path.dentry;
unsigned long adata, bdata, mask, len;
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
- len = 0;
- goto inside;
+ /*
+ * The first iteration is special, because it can result in
+ * '.' and '..' and has no mixing other than the final fold.
+ */
+ a = load_unaligned_zeropad(name);
+ b = a ^ REPEAT_BYTE('/');
+ if (has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)) {
+ adata = prep_zero_mask(a, adata, &constants);
+ bdata = prep_zero_mask(b, bdata, &constants);
+ mask = create_zero_mask(adata | bdata);
+ a &= zero_bytemask(mask);
+ *lastword = a;
+ len = find_zero(mask);
+ nd->last.hash = fold_hash(a, y);
+ nd->last.len = len;
+ return name + len;
+ }
+ len = 0;
+ x = 0;
do {
HASH_MIX(x, y, a);
len += sizeof(unsigned long);
-inside:
a = load_unaligned_zeropad(name+len);
b = a ^ REPEAT_BYTE('/');
} while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
@@ -2185,11 +2238,25 @@ inside:
adata = prep_zero_mask(a, adata, &constants);
bdata = prep_zero_mask(b, bdata, &constants);
mask = create_zero_mask(adata | bdata);
- x ^= a & zero_bytemask(mask);
+ a &= zero_bytemask(mask);
+ x ^= a;
+ len += find_zero(mask);
+ *lastword = 0; // Multi-word components cannot be DOT or DOTDOT
- return hashlen_create(fold_hash(x, y), len + find_zero(mask));
+ nd->last.hash = fold_hash(x, y);
+ nd->last.len = len;
+ return name + len;
}
+/*
+ * Note that the 'last' word is always zero-masked, but
+ * was loaded as a possibly big-endian word.
+ */
+#ifdef __BIG_ENDIAN
+ #define LAST_WORD_IS_DOT (0x2eul << (BITS_PER_LONG-8))
+ #define LAST_WORD_IS_DOTDOT (0x2e2eul << (BITS_PER_LONG-16))
+#endif
+
#else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
/* Return the hash of a string of known length */
@@ -2222,22 +2289,35 @@ EXPORT_SYMBOL(hashlen_string);
* We know there's a real path component here of at least
* one character.
*/
-static inline u64 hash_name(const void *salt, const char *name)
+static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword)
{
- unsigned long hash = init_name_hash(salt);
- unsigned long len = 0, c;
+ unsigned long hash = init_name_hash(nd->path.dentry);
+ unsigned long len = 0, c, last = 0;
c = (unsigned char)*name;
do {
+ last = (last << 8) + c;
len++;
hash = partial_name_hash(c, hash);
c = (unsigned char)name[len];
} while (c && c != '/');
- return hashlen_create(end_name_hash(hash), len);
+
+ // This is reliable for DOT or DOTDOT, since the component
+ // cannot contain NUL characters - top bits being zero means
+ // we cannot have had any other pathnames.
+ *lastword = last;
+ nd->last.hash = end_name_hash(hash);
+ nd->last.len = len;
+ return name + len;
}
#endif
+#ifndef LAST_WORD_IS_DOT
+ #define LAST_WORD_IS_DOT 0x2e
+ #define LAST_WORD_IS_DOTDOT 0x2e2e
+#endif
+
/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
@@ -2266,45 +2346,38 @@ static int link_path_walk(const char *name, struct nameidata *nd)
for(;;) {
struct mnt_idmap *idmap;
const char *link;
- u64 hash_len;
- int type;
+ unsigned long lastword;
idmap = mnt_idmap(nd->path.mnt);
err = may_lookup(idmap, nd);
if (err)
return err;
- hash_len = hash_name(nd->path.dentry, name);
+ nd->last.name = name;
+ name = hash_name(nd, name, &lastword);
- type = LAST_NORM;
- if (name[0] == '.') switch (hashlen_len(hash_len)) {
- case 2:
- if (name[1] == '.') {
- type = LAST_DOTDOT;
- nd->state |= ND_JUMPED;
- }
- break;
- case 1:
- type = LAST_DOT;
- }
- if (likely(type == LAST_NORM)) {
- struct dentry *parent = nd->path.dentry;
+ switch(lastword) {
+ case LAST_WORD_IS_DOTDOT:
+ nd->last_type = LAST_DOTDOT;
+ nd->state |= ND_JUMPED;
+ break;
+
+ case LAST_WORD_IS_DOT:
+ nd->last_type = LAST_DOT;
+ break;
+
+ default:
+ nd->last_type = LAST_NORM;
nd->state &= ~ND_JUMPED;
+
+ struct dentry *parent = nd->path.dentry;
if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- struct qstr this = { { .hash_len = hash_len }, .name = name };
- err = parent->d_op->d_hash(parent, &this);
+ err = parent->d_op->d_hash(parent, &nd->last);
if (err < 0)
return err;
- hash_len = this.hash_len;
- name = this.name;
}
}
- nd->last.hash_len = hash_len;
- nd->last.name = name;
- nd->last_type = type;
-
- name += hashlen_len(hash_len);
if (!*name)
goto OK;
/*
@@ -2922,16 +2995,16 @@ int path_pts(struct path *path)
}
#endif
-int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
- struct path *path, int *empty)
+int user_path_at(int dfd, const char __user *name, unsigned flags,
+ struct path *path)
{
- struct filename *filename = getname_flags(name, flags, empty);
+ struct filename *filename = getname_flags(name, flags);
int ret = filename_lookup(dfd, filename, flags, path, NULL);
putname(filename);
return ret;
}
-EXPORT_SYMBOL(user_path_at_empty);
+EXPORT_SYMBOL(user_path_at);
int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
struct inode *inode)
diff --git a/fs/namespace.c b/fs/namespace.c
index 5a51315c6678..221db9de4729 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -70,7 +70,8 @@ static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
/* Don't allow confusion with old 32bit mount ID */
-static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32);
+#define MNT_UNIQUE_ID_OFFSET (1ULL << 32)
+static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET);
static struct hlist_head *mount_hashtable __ro_after_init;
static struct hlist_head *mountpoint_hashtable __ro_after_init;
@@ -78,6 +79,8 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
+static DEFINE_RWLOCK(mnt_ns_tree_lock);
+static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
struct mount_kattr {
unsigned int attr_set;
@@ -103,6 +106,109 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
+static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
+{
+ u64 seq_b = ns->seq;
+
+ if (seq < seq_b)
+ return -1;
+ if (seq > seq_b)
+ return 1;
+ return 0;
+}
+
+static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
+{
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
+}
+
+static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
+{
+ struct mnt_namespace *ns_a = node_to_mnt_ns(a);
+ struct mnt_namespace *ns_b = node_to_mnt_ns(b);
+ u64 seq_a = ns_a->seq;
+
+ return mnt_ns_cmp(seq_a, ns_b) < 0;
+}
+
+static void mnt_ns_tree_add(struct mnt_namespace *ns)
+{
+ guard(write_lock)(&mnt_ns_tree_lock);
+ rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
+}
+
+static void mnt_ns_release(struct mnt_namespace *ns)
+{
+ lockdep_assert_not_held(&mnt_ns_tree_lock);
+
+ /* keep alive for {list,stat}mount() */
+ if (refcount_dec_and_test(&ns->passive)) {
+ put_user_ns(ns->user_ns);
+ kfree(ns);
+ }
+}
+DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
+
+static void mnt_ns_tree_remove(struct mnt_namespace *ns)
+{
+ /* remove from global mount namespace list */
+ if (!is_anon_ns(ns)) {
+ guard(write_lock)(&mnt_ns_tree_lock);
+ rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
+ }
+
+ mnt_ns_release(ns);
+}
+
+/*
+ * Returns the mount namespace which either has the specified id, or has the
+ * next smallest id afer the specified one.
+ */
+static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
+{
+ struct rb_node *node = mnt_ns_tree.rb_node;
+ struct mnt_namespace *ret = NULL;
+
+ lockdep_assert_held(&mnt_ns_tree_lock);
+
+ while (node) {
+ struct mnt_namespace *n = node_to_mnt_ns(node);
+
+ if (mnt_ns_id <= n->seq) {
+ ret = node_to_mnt_ns(node);
+ if (mnt_ns_id == n->seq)
+ break;
+ node = node->rb_left;
+ } else {
+ node = node->rb_right;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Lookup a mount namespace by id and take a passive reference count. Taking a
+ * passive reference means the mount namespace can be emptied if e.g., the last
+ * task holding an active reference exits. To access the mounts of the
+ * namespace the @namespace_sem must first be acquired. If the namespace has
+ * already shut down before acquiring @namespace_sem, {list,stat}mount() will
+ * see that the mount rbtree of the namespace is empty.
+ */
+static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
+{
+ struct mnt_namespace *ns;
+
+ guard(read_lock)(&mnt_ns_tree_lock);
+ ns = mnt_ns_find_id_at(mnt_ns_id);
+ if (!ns || ns->seq != mnt_ns_id)
+ return NULL;
+
+ refcount_inc(&ns->passive);
+ return ns;
+}
+
static inline void lock_mount_hash(void)
{
write_seqlock(&mount_lock);
@@ -1448,6 +1554,30 @@ static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id)
return ret;
}
+/*
+ * Returns the mount which either has the specified mnt_id, or has the next
+ * greater id before the specified one.
+ */
+static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id)
+{
+ struct rb_node *node = ns->mounts.rb_node;
+ struct mount *ret = NULL;
+
+ while (node) {
+ struct mount *m = node_to_mount(node);
+
+ if (mnt_id >= m->mnt_id_unique) {
+ ret = node_to_mount(node);
+ if (mnt_id == m->mnt_id_unique)
+ break;
+ node = node->rb_right;
+ } else {
+ node = node->rb_left;
+ }
+ }
+ return ret;
+}
+
#ifdef CONFIG_PROC_FS
/* iterator; we want it to have access to namespace_sem, thus here... */
@@ -1846,19 +1976,6 @@ bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
-/**
- * path_mounted - check whether path is mounted
- * @path: path to check
- *
- * Determine whether @path refers to the root of a mount.
- *
- * Return: true if @path is the root of a mount, false if not.
- */
-static inline bool path_mounted(const struct path *path)
-{
- return path->mnt->mnt_root == path->dentry;
-}
-
static void warn_mandlock(void)
{
pr_warn_once("=======================================================\n"
@@ -1966,69 +2083,72 @@ static bool mnt_ns_loop(struct dentry *dentry)
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
-struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
+struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
int flag)
{
- struct mount *res, *p, *q, *r, *parent;
+ struct mount *res, *src_parent, *src_root_child, *src_mnt,
+ *dst_parent, *dst_mnt;
- if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
+ if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(src_root))
return ERR_PTR(-EINVAL);
if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
return ERR_PTR(-EINVAL);
- res = q = clone_mnt(mnt, dentry, flag);
- if (IS_ERR(q))
- return q;
+ res = dst_mnt = clone_mnt(src_root, dentry, flag);
+ if (IS_ERR(dst_mnt))
+ return dst_mnt;
- q->mnt_mountpoint = mnt->mnt_mountpoint;
+ src_parent = src_root;
+ dst_mnt->mnt_mountpoint = src_root->mnt_mountpoint;
- p = mnt;
- list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
- struct mount *s;
- if (!is_subdir(r->mnt_mountpoint, dentry))
+ list_for_each_entry(src_root_child, &src_root->mnt_mounts, mnt_child) {
+ if (!is_subdir(src_root_child->mnt_mountpoint, dentry))
continue;
- for (s = r; s; s = next_mnt(s, r)) {
+ for (src_mnt = src_root_child; src_mnt;
+ src_mnt = next_mnt(src_mnt, src_root_child)) {
if (!(flag & CL_COPY_UNBINDABLE) &&
- IS_MNT_UNBINDABLE(s)) {
- if (s->mnt.mnt_flags & MNT_LOCKED) {
+ IS_MNT_UNBINDABLE(src_mnt)) {
+ if (src_mnt->mnt.mnt_flags & MNT_LOCKED) {
/* Both unbindable and locked. */
- q = ERR_PTR(-EPERM);
+ dst_mnt = ERR_PTR(-EPERM);
goto out;
} else {
- s = skip_mnt_tree(s);
+ src_mnt = skip_mnt_tree(src_mnt);
continue;
}
}
if (!(flag & CL_COPY_MNT_NS_FILE) &&
- is_mnt_ns_file(s->mnt.mnt_root)) {
- s = skip_mnt_tree(s);
+ is_mnt_ns_file(src_mnt->mnt.mnt_root)) {
+ src_mnt = skip_mnt_tree(src_mnt);
continue;
}
- while (p != s->mnt_parent) {
- p = p->mnt_parent;
- q = q->mnt_parent;
+ while (src_parent != src_mnt->mnt_parent) {
+ src_parent = src_parent->mnt_parent;
+ dst_mnt = dst_mnt->mnt_parent;
}
- p = s;
- parent = q;
- q = clone_mnt(p, p->mnt.mnt_root, flag);
- if (IS_ERR(q))
+
+ src_parent = src_mnt;
+ dst_parent = dst_mnt;
+ dst_mnt = clone_mnt(src_mnt, src_mnt->mnt.mnt_root, flag);
+ if (IS_ERR(dst_mnt))
goto out;
lock_mount_hash();
- list_add_tail(&q->mnt_list, &res->mnt_list);
- attach_mnt(q, parent, p->mnt_mp, false);
+ list_add_tail(&dst_mnt->mnt_list, &res->mnt_list);
+ attach_mnt(dst_mnt, dst_parent, src_parent->mnt_mp, false);
unlock_mount_hash();
}
}
return res;
+
out:
if (res) {
lock_mount_hash();
umount_tree(res, UMOUNT_SYNC);
unlock_mount_hash();
}
- return q;
+ return dst_mnt;
}
/* Caller should check returned pointer for errors */
@@ -2078,7 +2198,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
-static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
@@ -3709,8 +3829,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
if (!is_anon_ns(ns))
ns_free_inum(&ns->ns);
dec_mnt_namespaces(ns->ucounts);
- put_user_ns(ns->user_ns);
- kfree(ns);
+ mnt_ns_tree_remove(ns);
}
/*
@@ -3749,7 +3868,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
if (!anon)
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
refcount_set(&new_ns->ns.count, 1);
+ refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
+ RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
@@ -3826,6 +3947,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old);
}
+ mnt_ns_tree_add(new_ns);
namespace_unlock();
if (rootmnt)
@@ -4843,6 +4965,40 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq)
return 0;
}
+static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
+{
+ s->sm.mask |= STATMOUNT_MNT_NS_ID;
+ s->sm.mnt_ns_id = ns->seq;
+}
+
+static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
+{
+ struct vfsmount *mnt = s->mnt;
+ struct super_block *sb = mnt->mnt_sb;
+ int err;
+
+ if (sb->s_op->show_options) {
+ size_t start = seq->count;
+
+ err = sb->s_op->show_options(seq, mnt->mnt_root);
+ if (err)
+ return err;
+
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
+
+ if (seq->count == start)
+ return 0;
+
+ /* skip leading comma */
+ memmove(seq->buf + start, seq->buf + start + 1,
+ seq->count - start - 1);
+ seq->count--;
+ }
+
+ return 0;
+}
+
static int statmount_string(struct kstatmount *s, u64 flag)
{
int ret;
@@ -4863,6 +5019,10 @@ static int statmount_string(struct kstatmount *s, u64 flag)
sm->mnt_point = seq->count;
ret = statmount_mnt_point(s, seq);
break;
+ case STATMOUNT_MNT_OPTS:
+ sm->mnt_opts = seq->count;
+ ret = statmount_mnt_opts(s, seq);
+ break;
default:
WARN_ON_ONCE(true);
return -EINVAL;
@@ -4903,23 +5063,84 @@ static int copy_statmount_to_user(struct kstatmount *s)
return 0;
}
-static int do_statmount(struct kstatmount *s)
+static struct mount *listmnt_next(struct mount *curr, bool reverse)
{
- struct mount *m = real_mount(s->mnt);
+ struct rb_node *node;
+
+ if (reverse)
+ node = rb_prev(&curr->mnt_node);
+ else
+ node = rb_next(&curr->mnt_node);
+
+ return node_to_mount(node);
+}
+
+static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
+{
+ struct mount *first, *child;
+
+ rwsem_assert_held(&namespace_sem);
+
+ /* We're looking at our own ns, just use get_fs_root. */
+ if (ns == current->nsproxy->mnt_ns) {
+ get_fs_root(current->fs, root);
+ return 0;
+ }
+
+ /*
+ * We have to find the first mount in our ns and use that, however it
+ * may not exist, so handle that properly.
+ */
+ if (RB_EMPTY_ROOT(&ns->mounts))
+ return -ENOENT;
+
+ first = child = ns->root;
+ for (;;) {
+ child = listmnt_next(child, false);
+ if (!child)
+ return -ENOENT;
+ if (child->mnt_parent == first)
+ break;
+ }
+
+ root->mnt = mntget(&child->mnt);
+ root->dentry = dget(root->mnt->mnt_root);
+ return 0;
+}
+
+static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
+ struct mnt_namespace *ns)
+{
+ struct path root __free(path_put) = {};
+ struct mount *m;
int err;
+ /* Has the namespace already been emptied? */
+ if (mnt_ns_id && RB_EMPTY_ROOT(&ns->mounts))
+ return -ENOENT;
+
+ s->mnt = lookup_mnt_in_ns(mnt_id, ns);
+ if (!s->mnt)
+ return -ENOENT;
+
+ err = grab_requested_root(ns, &root);
+ if (err)
+ return err;
+
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
- if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
- !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
+ m = real_mount(s->mnt);
+ if (!is_path_reachable(m, m->mnt.mnt_root, &root) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
err = security_sb_statfs(s->mnt->mnt_root);
if (err)
return err;
+ s->root = root;
if (s->mask & STATMOUNT_SB_BASIC)
statmount_sb_basic(s);
@@ -4938,6 +5159,12 @@ static int do_statmount(struct kstatmount *s)
if (!err && s->mask & STATMOUNT_MNT_POINT)
err = statmount_string(s, STATMOUNT_MNT_POINT);
+ if (!err && s->mask & STATMOUNT_MNT_OPTS)
+ err = statmount_string(s, STATMOUNT_MNT_OPTS);
+
+ if (!err && s->mask & STATMOUNT_MNT_NS_ID)
+ statmount_mnt_ns_id(s, ns);
+
if (err)
return err;
@@ -4955,6 +5182,9 @@ static inline bool retry_statmount(const long ret, size_t *seq_size)
return true;
}
+#define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \
+ STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS)
+
static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
struct statmount __user *buf, size_t bufsize,
size_t seq_size)
@@ -4966,10 +5196,18 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
ks->mask = kreq->param;
ks->buf = buf;
ks->bufsize = bufsize;
- ks->seq.size = seq_size;
- ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);
- if (!ks->seq.buf)
- return -ENOMEM;
+
+ if (ks->mask & STATMOUNT_STRING_REQ) {
+ if (bufsize == sizeof(ks->sm))
+ return -EOVERFLOW;
+
+ ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);
+ if (!ks->seq.buf)
+ return -ENOMEM;
+
+ ks->seq.size = seq_size;
+ }
+
return 0;
}
@@ -4979,7 +5217,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
int ret;
size_t usize;
- BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0);
+ BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER1);
ret = get_user(usize, &req->size);
if (ret)
@@ -4994,16 +5232,32 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
return ret;
if (kreq->spare != 0)
return -EINVAL;
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
return 0;
}
+/*
+ * If the user requested a specific mount namespace id, look that up and return
+ * that, or if not simply grab a passive reference on our mount namespace and
+ * return that.
+ */
+static struct mnt_namespace *grab_requested_mnt_ns(u64 mnt_ns_id)
+{
+ if (mnt_ns_id)
+ return lookup_mnt_ns(mnt_ns_id);
+ refcount_inc(&current->nsproxy->mnt_ns->passive);
+ return current->nsproxy->mnt_ns;
+}
+
SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
struct statmount __user *, buf, size_t, bufsize,
unsigned int, flags)
{
- struct vfsmount *mnt;
+ struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
+ struct kstatmount *ks __free(kfree) = NULL;
struct mnt_id_req kreq;
- struct kstatmount ks;
/* We currently support retrieval of 3 strings. */
size_t seq_size = 3 * PATH_MAX;
int ret;
@@ -5015,64 +5269,88 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
if (ret)
return ret;
+ ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
+ if (!ns)
+ return -ENOENT;
+
+ if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ return -ENOENT;
+
+ ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT);
+ if (!ks)
+ return -ENOMEM;
+
retry:
- ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size);
+ ret = prepare_kstatmount(ks, &kreq, buf, bufsize, seq_size);
if (ret)
return ret;
- down_read(&namespace_sem);
- mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns);
- if (!mnt) {
- up_read(&namespace_sem);
- kvfree(ks.seq.buf);
- return -ENOENT;
- }
-
- ks.mnt = mnt;
- get_fs_root(current->fs, &ks.root);
- ret = do_statmount(&ks);
- path_put(&ks.root);
- up_read(&namespace_sem);
+ scoped_guard(rwsem_read, &namespace_sem)
+ ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);
if (!ret)
- ret = copy_statmount_to_user(&ks);
- kvfree(ks.seq.buf);
+ ret = copy_statmount_to_user(ks);
+ kvfree(ks->seq.buf);
if (retry_statmount(ret, &seq_size))
goto retry;
return ret;
}
-static struct mount *listmnt_next(struct mount *curr)
+static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
+ u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids,
+ bool reverse)
{
- return node_to_mount(rb_next(&curr->mnt_node));
-}
-
-static ssize_t do_listmount(struct mount *first, struct path *orig,
- u64 mnt_parent_id, u64 __user *mnt_ids,
- size_t nr_mnt_ids, const struct path *root)
-{
- struct mount *r;
+ struct path root __free(path_put) = {};
+ struct path orig;
+ struct mount *r, *first;
ssize_t ret;
+ rwsem_assert_held(&namespace_sem);
+
+ ret = grab_requested_root(ns, &root);
+ if (ret)
+ return ret;
+
+ if (mnt_parent_id == LSMT_ROOT) {
+ orig = root;
+ } else {
+ orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
+ if (!orig.mnt)
+ return -ENOENT;
+ orig.dentry = orig.mnt->mnt_root;
+ }
+
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
- if (!is_path_reachable(real_mount(orig->mnt), orig->dentry, root) &&
- !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
+ if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
- ret = security_sb_statfs(orig->dentry);
+ ret = security_sb_statfs(orig.dentry);
if (ret)
return ret;
- for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r)) {
+ if (!last_mnt_id) {
+ if (reverse)
+ first = node_to_mount(rb_last(&ns->mounts));
+ else
+ first = node_to_mount(rb_first(&ns->mounts));
+ } else {
+ if (reverse)
+ first = mnt_find_id_at_reverse(ns, last_mnt_id - 1);
+ else
+ first = mnt_find_id_at(ns, last_mnt_id + 1);
+ }
+
+ for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r, reverse)) {
if (r->mnt_id_unique == mnt_parent_id)
continue;
- if (!is_path_reachable(r, r->mnt.mnt_root, orig))
+ if (!is_path_reachable(r, r->mnt.mnt_root, &orig))
continue;
- if (put_user(r->mnt_id_unique, mnt_ids))
- return -EFAULT;
+ *mnt_ids = r->mnt_id_unique;
mnt_ids++;
nr_mnt_ids--;
ret++;
@@ -5080,22 +5358,26 @@ static ssize_t do_listmount(struct mount *first, struct path *orig,
return ret;
}
-SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
- mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
+SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
+ u64 __user *, mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
{
- struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ u64 *kmnt_ids __free(kvfree) = NULL;
+ const size_t maxcount = 1000000;
+ struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct mnt_id_req kreq;
- struct mount *first;
- struct path root, orig;
- u64 mnt_parent_id, last_mnt_id;
- const size_t maxcount = (size_t)-1 >> 3;
+ u64 last_mnt_id;
ssize_t ret;
- if (flags)
+ if (flags & ~LISTMOUNT_REVERSE)
return -EINVAL;
+ /*
+ * If the mount namespace really has more than 1 million mounts the
+ * caller must iterate over the mount namespace (and reconsider their
+ * system design...).
+ */
if (unlikely(nr_mnt_ids > maxcount))
- return -EFAULT;
+ return -EOVERFLOW;
if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
return -EFAULT;
@@ -5103,33 +5385,37 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
ret = copy_mnt_id_req(req, &kreq);
if (ret)
return ret;
- mnt_parent_id = kreq.mnt_id;
+
last_mnt_id = kreq.param;
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
- down_read(&namespace_sem);
- get_fs_root(current->fs, &root);
- if (mnt_parent_id == LSMT_ROOT) {
- orig = root;
- } else {
- ret = -ENOENT;
- orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
- if (!orig.mnt)
- goto err;
- orig.dentry = orig.mnt->mnt_root;
- }
- if (!last_mnt_id)
- first = node_to_mount(rb_first(&ns->mounts));
- else
- first = mnt_find_id_at(ns, last_mnt_id + 1);
+ kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kmnt_ids),
+ GFP_KERNEL_ACCOUNT);
+ if (!kmnt_ids)
+ return -ENOMEM;
+
+ ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
+ if (!ns)
+ return -ENOENT;
+
+ if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ return -ENOENT;
+
+ scoped_guard(rwsem_read, &namespace_sem)
+ ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
+ nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
+ if (ret <= 0)
+ return ret;
+
+ if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids)))
+ return -EFAULT;
- ret = do_listmount(first, &orig, mnt_parent_id, mnt_ids, nr_mnt_ids, &root);
-err:
- path_put(&root);
- up_read(&namespace_sem);
return ret;
}
-
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
@@ -5157,6 +5443,8 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
+
+ mnt_ns_tree_add(ns);
}
void __init mnt_init(void)
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a6bb03bea920..4c0401dbbfcf 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
- _debug("no unlock");
+ kdebug("no unlock");
else
folio_unlock(folio);
}
@@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl)
struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
int ret;
- _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
+ kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
if (readahead_count(ractl) == 0)
return;
@@ -268,7 +268,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct folio *sink = NULL;
int ret;
- _enter("%lx", folio->index);
+ kenter("%lx", folio->index);
rreq = netfs_alloc_request(mapping, file,
folio_file_pos(folio), folio_size(folio),
@@ -508,7 +508,7 @@ retry:
have_folio:
*_folio = folio;
- _leave(" = 0");
+ kleave(" = 0");
return 0;
error_put:
@@ -518,7 +518,7 @@ error:
folio_unlock(folio);
folio_put(folio);
}
- _leave(" = %d", ret);
+ kleave(" = %d", ret);
return ret;
}
EXPORT_SYMBOL(netfs_write_begin);
@@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t flen = folio_size(folio);
int ret;
- _enter("%zx @%llx", flen, start);
+ kenter("%zx @%llx", flen, start);
ret = -ENOMEM;
@@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
error:
- _leave(" = %d", ret);
+ kleave(" = %d", ret);
return ret;
}
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index d583af7a2209..ecbc99ec7d36 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -56,7 +56,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
struct netfs_group *group = netfs_folio_group(folio);
loff_t pos = folio_file_pos(folio);
- _enter("");
+ kenter("");
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
return NETFS_FLUSH_CONTENT;
@@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
*/
howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
flen, offset, part, maybe_trouble);
- _debug("howto %u", howto);
+ kdebug("howto %u", howto);
switch (howto) {
case NETFS_JUST_PREFETCH:
ret = netfs_prefetch_for_write(file, folio, offset, part);
if (ret < 0) {
- _debug("prefetch = %zd", ret);
+ kdebug("prefetch = %zd", ret);
goto error_folio_unlock;
}
break;
@@ -418,7 +418,7 @@ out:
}
iocb->ki_pos += written;
- _leave(" = %zd [%zd]", written, ret);
+ kleave(" = %zd [%zd]", written, ret);
return written ? written : ret;
error_folio_unlock:
@@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct netfs_inode *ictx = netfs_inode(inode);
ssize_t ret;
- _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
@@ -529,7 +529,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
vm_fault_t ret = VM_FAULT_RETRY;
int err;
- _enter("%lx", folio->index);
+ kenter("%lx", folio->index);
sb_start_pagefault(inode->i_sb);
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 10a1e4da6bda..b6debac6205f 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
size_t orig_count = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- _enter("");
+ kenter("");
if (!orig_count)
return 0; /* Don't update atime */
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 88f2adfab75e..792ef17bae21 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
size_t len = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- _enter("");
+ kenter("");
/* We're going to need a bounce buffer if what we transmit is going to
* be different in some way to the source buffer, e.g. because it gets
@@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
*/
// TODO
- _debug("uw %llx-%llx", start, end);
+ kdebug("uw %llx-%llx", start, end);
wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start,
iocb->ki_flags & IOCB_DIRECT ?
@@ -96,7 +96,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
- _debug("begin = %zd", ret);
+ kdebug("begin = %zd", ret);
goto out;
}
@@ -143,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t pos = iocb->ki_pos;
unsigned long long end = pos + iov_iter_count(from) - 1;
- _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
+ kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c
index 9397ed39b0b4..288a73c3072d 100644
--- a/fs/netfs/fscache_cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache,
{
int n_accesses;
- _enter("{%s,%s}", ops->name, cache->name);
+ kenter("{%s,%s}", ops->name, cache->name);
BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING);
@@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache,
up_write(&fscache_addremove_sem);
pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name);
- _leave(" = 0 [%s]", cache->name);
+ kleave(" = 0 [%s]", cache->name);
return 0;
}
EXPORT_SYMBOL(fscache_add_cache);
diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c
index bce2492186d0..4d1e8bf4c615 100644
--- a/fs/netfs/fscache_cookie.c
+++ b/fs/netfs/fscache_cookie.c
@@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
{
struct fscache_cookie *cookie;
- _enter("V=%x", volume->debug_id);
+ kenter("V=%x", volume->debug_id);
if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255)
return NULL;
@@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
trace_fscache_acquire(cookie);
fscache_stat(&fscache_n_acquires_ok);
- _leave(" = c=%08x", cookie->debug_id);
+ kleave(" = c=%08x", cookie->debug_id);
return cookie;
}
EXPORT_SYMBOL(__fscache_acquire_cookie);
@@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed;
bool need_withdraw = false;
- _enter("");
+ kenter("");
if (!cookie->volume->cache_priv) {
fscache_create_volume(cookie->volume, true);
@@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
if (cookie->state != FSCACHE_COOKIE_STATE_FAILED)
fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT);
need_withdraw = true;
- _leave(" [fail]");
+ kleave(" [fail]");
goto out;
}
@@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify)
bool queue = false;
int n_active;
- _enter("c=%08x", cookie->debug_id);
+ kenter("c=%08x", cookie->debug_id);
if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
"Trying to use relinquished cookie\n"))
@@ -636,7 +636,7 @@ again:
spin_unlock(&cookie->lock);
if (queue)
fscache_queue_cookie(cookie, fscache_cookie_get_use_work);
- _leave("");
+ kleave("");
}
EXPORT_SYMBOL(__fscache_use_cookie);
@@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie)
enum fscache_cookie_state state;
bool wake = false;
- _enter("c=%x", cookie->debug_id);
+ kenter("c=%x", cookie->debug_id);
again:
spin_lock(&cookie->lock);
@@ -820,7 +820,7 @@ out:
spin_unlock(&cookie->lock);
if (wake)
wake_up_cookie_state(cookie);
- _leave("");
+ kleave("");
}
static void fscache_cookie_worker(struct work_struct *work)
@@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie)
set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags);
spin_unlock(&cookie->lock);
fscache_stat(&fscache_n_cookies_lru_expired);
- _debug("lru c=%x", cookie->debug_id);
+ kdebug("lru c=%x", cookie->debug_id);
__fscache_withdraw_cookie(cookie);
}
@@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
if (retire)
fscache_stat(&fscache_n_relinquishes_retire);
- _enter("c=%08x{%d},%d",
+ kenter("c=%08x{%d},%d",
cookie->debug_id, atomic_read(&cookie->n_active), retire);
if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
@@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
{
bool is_caching;
- _enter("c=%x", cookie->debug_id);
+ kenter("c=%x", cookie->debug_id);
fscache_stat(&fscache_n_invalidates);
@@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */
default:
spin_unlock(&cookie->lock);
- _leave(" [no %u]", cookie->state);
+ kleave(" [no %u]", cookie->state);
return;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
fallthrough;
case FSCACHE_COOKIE_STATE_CREATING:
spin_unlock(&cookie->lock);
- _leave(" [look %x]", cookie->inval_counter);
+ kleave(" [look %x]", cookie->inval_counter);
return;
case FSCACHE_COOKIE_STATE_ACTIVE:
@@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
if (is_caching)
fscache_queue_cookie(cookie, fscache_cookie_get_inval_work);
- _leave(" [inv]");
+ kleave(" [inv]");
return;
}
}
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index 38637e5c9b57..bf4eaeec44fb 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
again:
if (!fscache_cache_is_live(cookie->volume->cache)) {
- _leave(" [broken]");
+ kleave(" [broken]");
return false;
}
state = fscache_cookie_state(cookie);
- _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_CREATING:
@@ -52,7 +52,7 @@ again:
case FSCACHE_COOKIE_STATE_DROPPED:
case FSCACHE_COOKIE_STATE_RELINQUISHING:
default:
- _leave(" [not live]");
+ kleave(" [not live]");
return false;
}
@@ -92,7 +92,7 @@ again:
spin_lock(&cookie->lock);
state = fscache_cookie_state(cookie);
- _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -140,7 +140,7 @@ failed:
cres->cache_priv = NULL;
cres->ops = NULL;
fscache_end_cookie_access(cookie, fscache_access_io_not_live);
- _leave(" = -ENOBUFS");
+ kleave(" = -ENOBUFS");
return -ENOBUFS;
}
@@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
if (len == 0)
goto abandon;
- _enter("%llx,%zx", start, len);
+ kenter("%llx,%zx", start, len);
wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS);
if (!wreq)
diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c
index 42e98bb523e3..bf9b33d26e31 100644
--- a/fs/netfs/fscache_main.c
+++ b/fs/netfs/fscache_main.c
@@ -99,7 +99,7 @@ error_wq:
*/
void __exit fscache_exit(void)
{
- _enter("");
+ kenter("");
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c
index cdf991bdd9de..2e2a405ca9b0 100644
--- a/fs/netfs/fscache_volume.c
+++ b/fs/netfs/fscache_volume.c
@@ -27,6 +27,19 @@ struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
return volume;
}
+struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where)
+{
+ int ref;
+
+ if (!__refcount_inc_not_zero(&volume->ref, &ref))
+ return NULL;
+
+ trace_fscache_volume(volume->debug_id, ref + 1, where);
+ return volume;
+}
+EXPORT_SYMBOL(fscache_try_get_volume);
+
static void fscache_see_volume(struct fscache_volume *volume,
enum fscache_volume_trace where)
{
@@ -251,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
fscache_see_volume(volume, fscache_volume_new_acquire);
fscache_stat(&fscache_n_volumes);
up_write(&fscache_addremove_sem);
- _leave(" = v=%x", volume->debug_id);
+ kleave(" = v=%x", volume->debug_id);
return volume;
err_vol:
@@ -420,6 +433,7 @@ void fscache_put_volume(struct fscache_volume *volume,
fscache_free_volume(volume);
}
}
+EXPORT_SYMBOL(fscache_put_volume);
/*
* Relinquish a volume representation cookie.
@@ -452,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume)
{
int n_accesses;
- _debug("withdraw V=%x", volume->debug_id);
+ kdebug("withdraw V=%x", volume->debug_id);
/* Allow wakeups on dec-to-0 */
n_accesses = atomic_dec_return(&volume->n_accesses);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index acd9ca14e264..21e46bc9aa49 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -34,7 +34,6 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
/*
* main.c
*/
-extern unsigned int netfs_debug;
extern struct list_head netfs_io_requests;
extern spinlock_t netfs_proc_lock;
extern mempool_t netfs_request_pool;
@@ -344,8 +343,6 @@ extern const struct seq_operations fscache_volumes_seq_ops;
struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
enum fscache_volume_trace where);
-void fscache_put_volume(struct fscache_volume *volume,
- enum fscache_volume_trace where);
bool fscache_begin_volume_access(struct fscache_volume *volume,
struct fscache_cookie *cookie,
enum fscache_access_trace why);
@@ -356,42 +353,12 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait);
* debug tracing
*/
#define dbgprintk(FMT, ...) \
- printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
+ pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
-#ifdef __KDEBUG
-#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
-#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
-#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
-
-#elif defined(CONFIG_NETFS_DEBUG)
-#define _enter(FMT, ...) \
-do { \
- if (netfs_debug) \
- kenter(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _leave(FMT, ...) \
-do { \
- if (netfs_debug) \
- kleave(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _debug(FMT, ...) \
-do { \
- if (netfs_debug) \
- kdebug(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#else
-#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
-#endif
-
/*
* assertions
*/
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c93851b98368..c7576481c321 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
if (count == remaining)
return;
- _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+ kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
rreq->debug_id, subreq->debug_index,
iov_iter_count(&subreq->io_iter), subreq->transferred,
subreq->len, rreq->i_size,
@@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
int u;
- _enter("R=%x[%x]{%llx,%lx},%zd",
+ kenter("R=%x[%x]{%llx,%lx},%zd",
rreq->debug_id, subreq->debug_index,
subreq->start, subreq->flags, transferred_or_error);
@@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
struct netfs_inode *ictx = netfs_inode(rreq->inode);
size_t lsize;
- _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
+ kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
if (rreq->origin != NETFS_DIO_READ) {
source = netfs_cache_prepare_read(subreq, rreq->i_size);
@@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
subreq->start = rreq->start + rreq->submitted;
subreq->len = io_iter->count;
- _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
+ kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
/* Call out to the cache to find out what it can do with the remaining
@@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
struct iov_iter io_iter;
int ret;
- _enter("R=%x %llx-%llx",
+ kenter("R=%x %llx-%llx",
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
if (rreq->len == 0) {
@@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
atomic_set(&rreq->nr_outstanding, 1);
io_iter = rreq->io_iter;
do {
- _debug("submit %llx + %llx >= %llx",
+ kdebug("submit %llx + %llx >= %llx",
rreq->start, rreq->submitted, rreq->i_size);
if (rreq->origin == NETFS_DIO_READ &&
rreq->start + rreq->submitted >= rreq->i_size)
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 5f0f438e5d21..db824c372842 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -20,10 +20,6 @@ MODULE_LICENSE("GPL");
EXPORT_TRACEPOINT_SYMBOL(netfs_sreq);
-unsigned netfs_debug;
-module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
-
static struct kmem_cache *netfs_request_slab;
static struct kmem_cache *netfs_subrequest_slab;
mempool_t netfs_request_pool;
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 83e644bd518f..172808e83ca8 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -26,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio)
struct fscache_cookie *cookie = netfs_i_cookie(ictx);
bool need_use = false;
- _enter("");
+ kenter("");
if (!filemap_dirty_folio(mapping, folio))
return false;
@@ -99,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
struct netfs_folio *finfo;
size_t flen = folio_size(folio);
- _enter("{%lx},%zx,%zx", folio->index, offset, length);
+ kenter("{%lx},%zx,%zx", folio->index, offset, length);
if (!folio_test_private(folio))
return;
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 426cf87aaf2e..488147439fe0 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
{
struct list_head *next;
- _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
+ kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
if (list_empty(&stream->subrequests))
return;
@@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
unsigned int notes;
int s;
- _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
+ kenter("%llx-%llx", wreq->start, wreq->start + wreq->len);
trace_netfs_collect(wreq);
trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
@@ -409,7 +409,7 @@ reassess_streams:
front = stream->front;
while (front) {
trace_netfs_collect_sreq(wreq, front);
- //_debug("sreq [%x] %llx %zx/%zx",
+ //kdebug("sreq [%x] %llx %zx/%zx",
// front->debug_index, front->start, front->transferred, front->len);
/* Stall if there may be a discontinuity. */
@@ -598,7 +598,7 @@ reassess_streams:
out:
netfs_put_group_many(wreq->group, wreq->nr_group_rel);
wreq->nr_group_rel = 0;
- _leave(" = %x", notes);
+ kleave(" = %x", notes);
return;
need_retry:
@@ -606,7 +606,7 @@ need_retry:
* that any partially completed op will have had any wholly transferred
* folios removed from it.
*/
- _debug("retry");
+ kdebug("retry");
netfs_retry_writes(wreq);
goto out;
}
@@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work)
size_t transferred;
int s;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
netfs_see_request(wreq, netfs_rreq_trace_see_work);
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
@@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work)
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
- _debug("finished");
+ kdebug("finished");
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
@@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
struct netfs_io_request *wreq = subreq->rreq;
struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
- _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
+ kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index ec6cf8707fb0..d7c971df8866 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
if (IS_ERR(wreq))
return wreq;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
@@ -159,7 +159,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
subreq->max_nr_segs = INT_MAX;
subreq->stream_nr = stream->stream_nr;
- _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
+ kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
@@ -215,7 +215,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
{
struct netfs_io_request *wreq = subreq->rreq;
- _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
+ kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
return netfs_write_subrequest_terminated(subreq, subreq->error, false);
@@ -272,11 +272,11 @@ int netfs_advance_write(struct netfs_io_request *wreq,
size_t part;
if (!stream->avail) {
- _leave("no write");
+ kleave("no write");
return len;
}
- _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
+ kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
if (subreq && start != subreq->start + subreq->len) {
netfs_issue_write(wreq, stream);
@@ -288,7 +288,7 @@ int netfs_advance_write(struct netfs_io_request *wreq,
subreq = stream->construct;
part = min(subreq->max_len - subreq->len, len);
- _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
+ kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
subreq->len += part;
subreq->nr_segs++;
@@ -319,7 +319,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
bool to_eof = false, streamw = false;
bool debug = false;
- _enter("");
+ kenter("");
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
@@ -329,7 +329,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (fpos >= i_size) {
/* mmap beyond eof. */
- _debug("beyond eof");
+ kdebug("beyond eof");
folio_start_writeback(folio);
folio_unlock(folio);
wreq->nr_group_rel += netfs_folio_written_back(folio);
@@ -363,7 +363,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
flen -= foff;
- _debug("folio %zx %zx %zx", foff, flen, fsize);
+ kdebug("folio %zx %zx %zx", foff, flen, fsize);
/* Deal with discontinuities in the stream of dirty pages. These can
* arise from a number of sources:
@@ -487,7 +487,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
for (int s = 0; s < NR_IO_STREAMS; s++)
netfs_issue_write(wreq, &wreq->io_streams[s]);
- _leave(" = 0");
+ kleave(" = 0");
return 0;
}
@@ -522,7 +522,7 @@ int netfs_writepages(struct address_space *mapping,
netfs_stat(&netfs_n_wh_writepages);
do {
- _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
+ kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
/* It appears we don't have to handle cyclic writeback wrapping. */
WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted);
@@ -546,14 +546,14 @@ int netfs_writepages(struct address_space *mapping,
mutex_unlock(&ictx->wb_lock);
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
couldnt_start:
netfs_kill_dirty_pages(mapping, wbc, folio);
out:
mutex_unlock(&ictx->wb_lock);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
}
EXPORT_SYMBOL(netfs_writepages);
@@ -590,7 +590,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache)
{
- _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
+ kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
@@ -624,7 +624,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
struct netfs_inode *ictx = netfs_inode(wreq->inode);
int ret;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
if (writethrough_cache)
netfs_write_folio(wreq, wbc, writethrough_cache);
@@ -657,7 +657,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
loff_t start = wreq->start;
int error = 0;
- _enter("%zx", len);
+ kenter("%zx", len);
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
@@ -665,7 +665,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
while (len) {
// TODO: Prepare content encryption
- _debug("unbuffered %zx", len);
+ kdebug("unbuffered %zx", len);
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
@@ -684,6 +684,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
if (list_empty(&upload->subrequests))
netfs_wake_write_collector(wreq, false);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a142287d86f6..cca80b5f54e0 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -122,8 +122,6 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
{
struct folio *folio = nfs_page_to_folio(req);
- if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
- folio_set_error(folio);
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE))
if (nfs_netfs_folio_unlock(folio))
folio_unlock(folio);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 13818129d268..1c62a5a9f51d 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -32,15 +32,7 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio)
int error;
error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE);
- if (error < 0)
- goto error;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
-
-error:
- folio_set_error(folio);
- folio_unlock(folio);
+ folio_end_read(folio, error == 0);
return error;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2329cbb0e446..a91463ab87a0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -311,7 +311,6 @@ static void nfs_mapping_set_error(struct folio *folio, int error)
{
struct address_space *mapping = folio_file_mapping(folio);
- folio_set_error(folio);
filemap_set_wb_err(mapping, error);
if (mapping->host)
errseq_set(&mapping->host->i_sb->s_wb_err,
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0b75305fb5f5..dd4e11a703aa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -247,7 +247,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
dentry = dget(exp->ex_path.dentry);
else {
dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
- data_left, fileid_type,
+ data_left, fileid_type, 0,
nfsd_acceptable, exp);
if (IS_ERR_OR_NULL(dentry)) {
trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index dddfa604491a..4a29b0138d75 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -383,11 +383,39 @@ found:
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
{
- struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);
+ struct folio *folio;
+ struct nilfs_dir_entry *de, *next_de;
+ size_t limit;
+ char *msg;
+ de = nilfs_get_folio(dir, 0, &folio);
if (IS_ERR(de))
return NULL;
- return nilfs_next_entry(de);
+
+ limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
+ if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
+ !nilfs_match(1, ".", de))) {
+ msg = "missing '.'";
+ goto fail;
+ }
+
+ next_de = nilfs_next_entry(de);
+ /*
+ * If "next_de" has not reached the end of the chunk, there is
+ * at least one more record. Check whether it matches "..".
+ */
+ if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
+ !nilfs_match(2, "..", next_de))) {
+ msg = "missing '..'";
+ goto fail;
+ }
+ *foliop = folio;
+ return next_de;
+
+fail:
+ nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
+ folio_release_kmap(folio, de);
+ return NULL;
}
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
diff --git a/fs/nls/mac-celtic.c b/fs/nls/mac-celtic.c
index 266c2d7d50bd..2963f3299d7e 100644
--- a/fs/nls/mac-celtic.c
+++ b/fs/nls/mac-celtic.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_macceltic(void)
module_init(init_nls_macceltic)
module_exit(exit_nls_macceltic)
+MODULE_DESCRIPTION("NLS Codepage macceltic");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-centeuro.c b/fs/nls/mac-centeuro.c
index 9789c6057551..43b20f4bdb67 100644
--- a/fs/nls/mac-centeuro.c
+++ b/fs/nls/mac-centeuro.c
@@ -528,4 +528,5 @@ static void __exit exit_nls_maccenteuro(void)
module_init(init_nls_maccenteuro)
module_exit(exit_nls_maccenteuro)
+MODULE_DESCRIPTION("NLS Codepage maccenteuro");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-croatian.c b/fs/nls/mac-croatian.c
index bb19e7a07d43..62730d6a64e5 100644
--- a/fs/nls/mac-croatian.c
+++ b/fs/nls/mac-croatian.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_maccroatian(void)
module_init(init_nls_maccroatian)
module_exit(exit_nls_maccroatian)
+MODULE_DESCRIPTION("NLS Codepage maccroatian");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-cyrillic.c b/fs/nls/mac-cyrillic.c
index 2a7dea36acba..7a5c4d16aac8 100644
--- a/fs/nls/mac-cyrillic.c
+++ b/fs/nls/mac-cyrillic.c
@@ -493,4 +493,5 @@ static void __exit exit_nls_maccyrillic(void)
module_init(init_nls_maccyrillic)
module_exit(exit_nls_maccyrillic)
+MODULE_DESCRIPTION("NLS Codepage maccyrillic");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-gaelic.c b/fs/nls/mac-gaelic.c
index 77b001653588..3d22f03a90b6 100644
--- a/fs/nls/mac-gaelic.c
+++ b/fs/nls/mac-gaelic.c
@@ -563,4 +563,5 @@ static void __exit exit_nls_macgaelic(void)
module_init(init_nls_macgaelic)
module_exit(exit_nls_macgaelic)
+MODULE_DESCRIPTION("NLS Codepage macgaelic");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-greek.c b/fs/nls/mac-greek.c
index 1eccf499e2eb..de3aa9ddb5b1 100644
--- a/fs/nls/mac-greek.c
+++ b/fs/nls/mac-greek.c
@@ -493,4 +493,5 @@ static void __exit exit_nls_macgreek(void)
module_init(init_nls_macgreek)
module_exit(exit_nls_macgreek)
+MODULE_DESCRIPTION("NLS Codepage macgreek");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-iceland.c b/fs/nls/mac-iceland.c
index cbd0875c6d69..0bba83f9d415 100644
--- a/fs/nls/mac-iceland.c
+++ b/fs/nls/mac-iceland.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_maciceland(void)
module_init(init_nls_maciceland)
module_exit(exit_nls_maciceland)
+MODULE_DESCRIPTION("NLS Codepage maciceland");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-inuit.c b/fs/nls/mac-inuit.c
index fba8357aaf03..493386832dfd 100644
--- a/fs/nls/mac-inuit.c
+++ b/fs/nls/mac-inuit.c
@@ -528,4 +528,5 @@ static void __exit exit_nls_macinuit(void)
module_init(init_nls_macinuit)
module_exit(exit_nls_macinuit)
+MODULE_DESCRIPTION("NLS Codepage macinuit");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-roman.c b/fs/nls/mac-roman.c
index b6a98a5208cd..d3c082173c20 100644
--- a/fs/nls/mac-roman.c
+++ b/fs/nls/mac-roman.c
@@ -633,4 +633,5 @@ static void __exit exit_nls_macroman(void)
module_init(init_nls_macroman)
module_exit(exit_nls_macroman)
+MODULE_DESCRIPTION("NLS Codepage macroman");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-romanian.c b/fs/nls/mac-romanian.c
index 25547f023638..a7735852f2d5 100644
--- a/fs/nls/mac-romanian.c
+++ b/fs/nls/mac-romanian.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_macromanian(void)
module_init(init_nls_macromanian)
module_exit(exit_nls_macromanian)
+MODULE_DESCRIPTION("NLS Codepage macromanian");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-turkish.c b/fs/nls/mac-turkish.c
index b5454bc7b7fa..d77e9b6b7d7c 100644
--- a/fs/nls/mac-turkish.c
+++ b/fs/nls/mac-turkish.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_macturkish(void)
module_init(init_nls_macturkish)
module_exit(exit_nls_macturkish)
+MODULE_DESCRIPTION("NLS Codepage macturkish");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_ascii.c b/fs/nls/nls_ascii.c
index a2620650d5e4..068143d71284 100644
--- a/fs/nls/nls_ascii.c
+++ b/fs/nls/nls_ascii.c
@@ -163,4 +163,5 @@ static void __exit exit_nls_ascii(void)
module_init(init_nls_ascii)
module_exit(exit_nls_ascii)
+MODULE_DESCRIPTION("NLS ASCII (United States)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index a026dbd3593f..18d597e49a19 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -545,4 +545,5 @@ EXPORT_SYMBOL(unload_nls);
EXPORT_SYMBOL(load_nls);
EXPORT_SYMBOL(load_nls_default);
+MODULE_DESCRIPTION("Base file system native language support");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp1250.c b/fs/nls/nls_cp1250.c
index ace3e19d3407..e22a57a4b828 100644
--- a/fs/nls/nls_cp1250.c
+++ b/fs/nls/nls_cp1250.c
@@ -343,4 +343,5 @@ static void __exit exit_nls_cp1250(void)
module_init(init_nls_cp1250)
module_exit(exit_nls_cp1250)
+MODULE_DESCRIPTION("NLS Windows CP1250 (Slavic/Central European Languages)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp1251.c b/fs/nls/nls_cp1251.c
index 9273ddfd08a1..6f46d339f23c 100644
--- a/fs/nls/nls_cp1251.c
+++ b/fs/nls/nls_cp1251.c
@@ -298,4 +298,5 @@ static void __exit exit_nls_cp1251(void)
module_init(init_nls_cp1251)
module_exit(exit_nls_cp1251)
+MODULE_DESCRIPTION("NLS Windows CP1251 (Bulgarian, Belarusian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp1255.c b/fs/nls/nls_cp1255.c
index 1caf5dfed85b..299e089d4301 100644
--- a/fs/nls/nls_cp1255.c
+++ b/fs/nls/nls_cp1255.c
@@ -380,5 +380,6 @@ static void __exit exit_nls_cp1255(void)
module_init(init_nls_cp1255)
module_exit(exit_nls_cp1255)
+MODULE_DESCRIPTION("NLS Hebrew charsets (ISO-8859-8, CP1255)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(iso8859-8);
diff --git a/fs/nls/nls_cp437.c b/fs/nls/nls_cp437.c
index 7ddb830da3fd..ab880499ea32 100644
--- a/fs/nls/nls_cp437.c
+++ b/fs/nls/nls_cp437.c
@@ -384,4 +384,5 @@ static void __exit exit_nls_cp437(void)
module_init(init_nls_cp437)
module_exit(exit_nls_cp437)
+MODULE_DESCRIPTION("NLS Codepage 437 (United States, Canada)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp737.c b/fs/nls/nls_cp737.c
index c593f683a0cd..5c37618296e9 100644
--- a/fs/nls/nls_cp737.c
+++ b/fs/nls/nls_cp737.c
@@ -347,4 +347,5 @@ static void __exit exit_nls_cp737(void)
module_init(init_nls_cp737)
module_exit(exit_nls_cp737)
+MODULE_DESCRIPTION("NLS Codepage 737 (Greek)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp775.c b/fs/nls/nls_cp775.c
index 554c863745f2..51ccc908901f 100644
--- a/fs/nls/nls_cp775.c
+++ b/fs/nls/nls_cp775.c
@@ -316,4 +316,5 @@ static void __exit exit_nls_cp775(void)
module_init(init_nls_cp775)
module_exit(exit_nls_cp775)
+MODULE_DESCRIPTION("NLS Codepage 775 (Baltic Rim)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp850.c b/fs/nls/nls_cp850.c
index 56cccd14b40b..5f9b9507a8b6 100644
--- a/fs/nls/nls_cp850.c
+++ b/fs/nls/nls_cp850.c
@@ -312,4 +312,5 @@ static void __exit exit_nls_cp850(void)
module_init(init_nls_cp850)
module_exit(exit_nls_cp850)
+MODULE_DESCRIPTION("NLS Codepage 850 (Europe)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp852.c b/fs/nls/nls_cp852.c
index 7cdc05ac1d40..fc513a5e8358 100644
--- a/fs/nls/nls_cp852.c
+++ b/fs/nls/nls_cp852.c
@@ -334,4 +334,5 @@ static void __exit exit_nls_cp852(void)
module_init(init_nls_cp852)
module_exit(exit_nls_cp852)
+MODULE_DESCRIPTION("NLS Codepage 852 (Central/Eastern Europe)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp855.c b/fs/nls/nls_cp855.c
index 7426eea05663..a43be58adb36 100644
--- a/fs/nls/nls_cp855.c
+++ b/fs/nls/nls_cp855.c
@@ -296,4 +296,5 @@ static void __exit exit_nls_cp855(void)
module_init(init_nls_cp855)
module_exit(exit_nls_cp855)
+MODULE_DESCRIPTION("NLS Codepage 855 (Cyrillic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp857.c b/fs/nls/nls_cp857.c
index 098309733ebd..772cd4195bad 100644
--- a/fs/nls/nls_cp857.c
+++ b/fs/nls/nls_cp857.c
@@ -298,4 +298,5 @@ static void __exit exit_nls_cp857(void)
module_init(init_nls_cp857)
module_exit(exit_nls_cp857)
+MODULE_DESCRIPTION("NLS Codepage 857 (Turkish)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp860.c b/fs/nls/nls_cp860.c
index 84224478e731..36cf4ca11966 100644
--- a/fs/nls/nls_cp860.c
+++ b/fs/nls/nls_cp860.c
@@ -361,4 +361,5 @@ static void __exit exit_nls_cp860(void)
module_init(init_nls_cp860)
module_exit(exit_nls_cp860)
+MODULE_DESCRIPTION("NLS Codepage 860 (Portuguese)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp861.c b/fs/nls/nls_cp861.c
index dc873e4be092..b7397d079f8f 100644
--- a/fs/nls/nls_cp861.c
+++ b/fs/nls/nls_cp861.c
@@ -384,4 +384,5 @@ static void __exit exit_nls_cp861(void)
module_init(init_nls_cp861)
module_exit(exit_nls_cp861)
+MODULE_DESCRIPTION("NLS Codepage 861 (Icelandic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp862.c b/fs/nls/nls_cp862.c
index d5263e3c5566..fd3b95d1e95d 100644
--- a/fs/nls/nls_cp862.c
+++ b/fs/nls/nls_cp862.c
@@ -418,4 +418,5 @@ static void __exit exit_nls_cp862(void)
module_init(init_nls_cp862)
module_exit(exit_nls_cp862)
+MODULE_DESCRIPTION("NLS Codepage 862 (Hebrew)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp863.c b/fs/nls/nls_cp863.c
index 051c9832e36a..813ae7944249 100644
--- a/fs/nls/nls_cp863.c
+++ b/fs/nls/nls_cp863.c
@@ -378,4 +378,5 @@ static void __exit exit_nls_cp863(void)
module_init(init_nls_cp863)
module_exit(exit_nls_cp863)
+MODULE_DESCRIPTION("NLS Codepage 863 (Canadian French)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp864.c b/fs/nls/nls_cp864.c
index 97eb1273b2f7..d9eb6d5cd47a 100644
--- a/fs/nls/nls_cp864.c
+++ b/fs/nls/nls_cp864.c
@@ -404,4 +404,5 @@ static void __exit exit_nls_cp864(void)
module_init(init_nls_cp864)
module_exit(exit_nls_cp864)
+MODULE_DESCRIPTION("NLS Codepage 864 (Arabic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp865.c b/fs/nls/nls_cp865.c
index 111214228525..2678ffd98bb6 100644
--- a/fs/nls/nls_cp865.c
+++ b/fs/nls/nls_cp865.c
@@ -384,4 +384,5 @@ static void __exit exit_nls_cp865(void)
module_init(init_nls_cp865)
module_exit(exit_nls_cp865)
+MODULE_DESCRIPTION("NLS Codepage 865 (Norwegian, Danish)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp866.c b/fs/nls/nls_cp866.c
index ffdcbc3fc38d..7e93d0a3802a 100644
--- a/fs/nls/nls_cp866.c
+++ b/fs/nls/nls_cp866.c
@@ -302,4 +302,5 @@ static void __exit exit_nls_cp866(void)
module_init(init_nls_cp866)
module_exit(exit_nls_cp866)
+MODULE_DESCRIPTION("NLS Codepage 866 (Cyrillic/Russian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp869.c b/fs/nls/nls_cp869.c
index 3b5a34589354..4491737dd5cb 100644
--- a/fs/nls/nls_cp869.c
+++ b/fs/nls/nls_cp869.c
@@ -312,4 +312,5 @@ static void __exit exit_nls_cp869(void)
module_init(init_nls_cp869)
module_exit(exit_nls_cp869)
+MODULE_DESCRIPTION("NLS Codepage 869 (Greek)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp874.c b/fs/nls/nls_cp874.c
index 8dfaa10710fa..4fcfbf8ca72c 100644
--- a/fs/nls/nls_cp874.c
+++ b/fs/nls/nls_cp874.c
@@ -271,5 +271,6 @@ static void __exit exit_nls_cp874(void)
module_init(init_nls_cp874)
module_exit(exit_nls_cp874)
+MODULE_DESCRIPTION("NLS Thai charset (CP874, TIS-620)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(tis-620);
diff --git a/fs/nls/nls_cp932.c b/fs/nls/nls_cp932.c
index 67b7398e8483..e5e6270fcca6 100644
--- a/fs/nls/nls_cp932.c
+++ b/fs/nls/nls_cp932.c
@@ -7929,5 +7929,6 @@ static void __exit exit_nls_cp932(void)
module_init(init_nls_cp932)
module_exit(exit_nls_cp932)
+MODULE_DESCRIPTION("NLS Japanese charset (Shift-JIS)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(sjis);
diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c
index c96546cfec9f..91d0a15fd7f9 100644
--- a/fs/nls/nls_cp936.c
+++ b/fs/nls/nls_cp936.c
@@ -11107,5 +11107,6 @@ static void __exit exit_nls_cp936(void)
module_init(init_nls_cp936)
module_exit(exit_nls_cp936)
+MODULE_DESCRIPTION("NLS Simplified Chinese charset (CP936, GB2312)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(gb2312);
diff --git a/fs/nls/nls_cp949.c b/fs/nls/nls_cp949.c
index 199171e97aa4..3ae03c76d59c 100644
--- a/fs/nls/nls_cp949.c
+++ b/fs/nls/nls_cp949.c
@@ -13942,5 +13942,6 @@ static void __exit exit_nls_cp949(void)
module_init(init_nls_cp949)
module_exit(exit_nls_cp949)
+MODULE_DESCRIPTION("NLS Korean charset (CP949, EUC-KR)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(euc-kr);
diff --git a/fs/nls/nls_cp950.c b/fs/nls/nls_cp950.c
index 8e1418708209..e968aa80198d 100644
--- a/fs/nls/nls_cp950.c
+++ b/fs/nls/nls_cp950.c
@@ -9478,5 +9478,6 @@ static void __exit exit_nls_cp950(void)
module_init(init_nls_cp950)
module_exit(exit_nls_cp950)
+MODULE_DESCRIPTION("NLS Traditional Chinese charset (Big5)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(big5);
diff --git a/fs/nls/nls_euc-jp.c b/fs/nls/nls_euc-jp.c
index 162b3f160353..0191cc9d955e 100644
--- a/fs/nls/nls_euc-jp.c
+++ b/fs/nls/nls_euc-jp.c
@@ -577,4 +577,5 @@ static void __exit exit_nls_euc_jp(void)
module_init(init_nls_euc_jp)
module_exit(exit_nls_euc_jp)
+MODULE_DESCRIPTION("NLS Japanese charset (EUC-JP)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-1.c b/fs/nls/nls_iso8859-1.c
index 69ac020d43b1..a181be488f7d 100644
--- a/fs/nls/nls_iso8859-1.c
+++ b/fs/nls/nls_iso8859-1.c
@@ -254,4 +254,5 @@ static void __exit exit_nls_iso8859_1(void)
module_init(init_nls_iso8859_1)
module_exit(exit_nls_iso8859_1)
+MODULE_DESCRIPTION("NLS ISO 8859-1 (Latin 1; Western European Languages)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-13.c b/fs/nls/nls_iso8859-13.c
index afb3f8f275f0..8e2be5bfeaf1 100644
--- a/fs/nls/nls_iso8859-13.c
+++ b/fs/nls/nls_iso8859-13.c
@@ -282,4 +282,5 @@ static void __exit exit_nls_iso8859_13(void)
module_init(init_nls_iso8859_13)
module_exit(exit_nls_iso8859_13)
+MODULE_DESCRIPTION("NLS ISO 8859-13 (Latin 7; Baltic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-14.c b/fs/nls/nls_iso8859-14.c
index 046370f0b6f0..c789eccb8a69 100644
--- a/fs/nls/nls_iso8859-14.c
+++ b/fs/nls/nls_iso8859-14.c
@@ -338,4 +338,5 @@ static void __exit exit_nls_iso8859_14(void)
module_init(init_nls_iso8859_14)
module_exit(exit_nls_iso8859_14)
+MODULE_DESCRIPTION("NLS ISO 8859-14 (Latin 8; Celtic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-15.c b/fs/nls/nls_iso8859-15.c
index 7e34a841a056..ffec649176fb 100644
--- a/fs/nls/nls_iso8859-15.c
+++ b/fs/nls/nls_iso8859-15.c
@@ -304,4 +304,5 @@ static void __exit exit_nls_iso8859_15(void)
module_init(init_nls_iso8859_15)
module_exit(exit_nls_iso8859_15)
+MODULE_DESCRIPTION("NLS ISO 8859-15 (Latin 9; Western European Languages with Euro)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-2.c b/fs/nls/nls_iso8859-2.c
index 7dd571181741..d352334d0314 100644
--- a/fs/nls/nls_iso8859-2.c
+++ b/fs/nls/nls_iso8859-2.c
@@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_2(void)
module_init(init_nls_iso8859_2)
module_exit(exit_nls_iso8859_2)
+MODULE_DESCRIPTION("NLS ISO 8859-2 (Latin 2; Slavic/Central European Languages)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-3.c b/fs/nls/nls_iso8859-3.c
index 740b75ec4493..09990e6634d2 100644
--- a/fs/nls/nls_iso8859-3.c
+++ b/fs/nls/nls_iso8859-3.c
@@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_3(void)
module_init(init_nls_iso8859_3)
module_exit(exit_nls_iso8859_3)
+MODULE_DESCRIPTION("NLS ISO 8859-3 (Latin 3; Esperanto, Galician, Maltese, Turkish)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-4.c b/fs/nls/nls_iso8859-4.c
index 8826021e32f5..92795224912e 100644
--- a/fs/nls/nls_iso8859-4.c
+++ b/fs/nls/nls_iso8859-4.c
@@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_4(void)
module_init(init_nls_iso8859_4)
module_exit(exit_nls_iso8859_4)
+MODULE_DESCRIPTION("NLS ISO 8859-4 (Latin 4; old Baltic charset)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-5.c b/fs/nls/nls_iso8859-5.c
index 7c04057a1ad8..32309315307a 100644
--- a/fs/nls/nls_iso8859-5.c
+++ b/fs/nls/nls_iso8859-5.c
@@ -269,4 +269,5 @@ static void __exit exit_nls_iso8859_5(void)
module_init(init_nls_iso8859_5)
module_exit(exit_nls_iso8859_5)
+MODULE_DESCRIPTION("NLS ISO 8859-5 (Cyrillic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-6.c b/fs/nls/nls_iso8859-6.c
index d4a881400d74..c18183469d2a 100644
--- a/fs/nls/nls_iso8859-6.c
+++ b/fs/nls/nls_iso8859-6.c
@@ -260,4 +260,5 @@ static void __exit exit_nls_iso8859_6(void)
module_init(init_nls_iso8859_6)
module_exit(exit_nls_iso8859_6)
+MODULE_DESCRIPTION("NLS ISO 8859-6 (Arabic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-7.c b/fs/nls/nls_iso8859-7.c
index 37b75d825a75..3652d6832864 100644
--- a/fs/nls/nls_iso8859-7.c
+++ b/fs/nls/nls_iso8859-7.c
@@ -314,4 +314,5 @@ static void __exit exit_nls_iso8859_7(void)
module_init(init_nls_iso8859_7)
module_exit(exit_nls_iso8859_7)
+MODULE_DESCRIPTION("NLS ISO 8859-7 (Modern Greek)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-9.c b/fs/nls/nls_iso8859-9.c
index 557b98250d37..11a67834b855 100644
--- a/fs/nls/nls_iso8859-9.c
+++ b/fs/nls/nls_iso8859-9.c
@@ -269,4 +269,5 @@ static void __exit exit_nls_iso8859_9(void)
module_init(init_nls_iso8859_9)
module_exit(exit_nls_iso8859_9)
+MODULE_DESCRIPTION("NLS ISO 8859-9 (Latin 5; Turkish)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_koi8-r.c b/fs/nls/nls_koi8-r.c
index 811f232fccfb..e3dca27a3803 100644
--- a/fs/nls/nls_koi8-r.c
+++ b/fs/nls/nls_koi8-r.c
@@ -320,4 +320,5 @@ static void __exit exit_nls_koi8_r(void)
module_init(init_nls_koi8_r)
module_exit(exit_nls_koi8_r)
+MODULE_DESCRIPTION("NLS KOI8-R (Russian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_koi8-ru.c b/fs/nls/nls_koi8-ru.c
index a80a741a8676..07afcd9e58c0 100644
--- a/fs/nls/nls_koi8-ru.c
+++ b/fs/nls/nls_koi8-ru.c
@@ -79,4 +79,5 @@ static void __exit exit_nls_koi8_ru(void)
module_init(init_nls_koi8_ru)
module_exit(exit_nls_koi8_ru)
+MODULE_DESCRIPTION("NLS KOI8-RU (Belarusian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_koi8-u.c b/fs/nls/nls_koi8-u.c
index 7e029e4c188a..f60645758c1a 100644
--- a/fs/nls/nls_koi8-u.c
+++ b/fs/nls/nls_koi8-u.c
@@ -327,4 +327,5 @@ static void __exit exit_nls_koi8_u(void)
module_init(init_nls_koi8_u)
module_exit(exit_nls_koi8_u)
+MODULE_DESCRIPTION("NLS KOI8-U (Ukrainian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_ucs2_utils.c b/fs/nls/nls_ucs2_utils.c
index a69781c54dd8..d4564b79d7bf 100644
--- a/fs/nls/nls_ucs2_utils.c
+++ b/fs/nls/nls_ucs2_utils.c
@@ -16,6 +16,7 @@
#include <asm/unaligned.h>
#include "nls_ucs2_utils.h"
+MODULE_DESCRIPTION("NLS UCS-2");
MODULE_LICENSE("GPL");
/*
diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c
index afcfbc4a14db..a0fa0610eaac 100644
--- a/fs/nls/nls_utf8.c
+++ b/fs/nls/nls_utf8.c
@@ -64,4 +64,5 @@ static void __exit exit_nls_utf8(void)
module_init(init_nls_utf8)
module_exit(exit_nls_utf8)
+MODULE_DESCRIPTION("NLS UTF-8");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 07e22a15ef02..a4a925dce331 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -8,10 +8,12 @@
#include <linux/magic.h>
#include <linux/ktime.h>
#include <linux/seq_file.h>
+#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/nsfs.h>
#include <linux/uaccess.h>
+#include "mount.h"
#include "internal.h"
static struct vfsmount *nsfs_mnt;
@@ -82,40 +84,47 @@ int ns_get_path(struct path *path, struct task_struct *task,
return ns_get_path_cb(path, ns_get_path_task, &args);
}
-int open_related_ns(struct ns_common *ns,
- struct ns_common *(*get_ns)(struct ns_common *ns))
+/**
+ * open_namespace - open a namespace
+ * @ns: the namespace to open
+ *
+ * This will consume a reference to @ns indendent of success or failure.
+ *
+ * Return: A file descriptor on success or a negative error code on failure.
+ */
+int open_namespace(struct ns_common *ns)
{
- struct path path = {};
- struct ns_common *relative;
+ struct path path __free(path_put) = {};
struct file *f;
int err;
- int fd;
- fd = get_unused_fd_flags(O_CLOEXEC);
+ /* call first to consume reference */
+ err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
+ if (err < 0)
+ return err;
+
+ CLASS(get_unused_fd, fd)(O_CLOEXEC);
if (fd < 0)
return fd;
- relative = get_ns(ns);
- if (IS_ERR(relative)) {
- put_unused_fd(fd);
- return PTR_ERR(relative);
- }
+ f = dentry_open(&path, O_RDONLY, current_cred());
+ if (IS_ERR(f))
+ return PTR_ERR(f);
- err = path_from_stashed(&relative->stashed, nsfs_mnt, relative, &path);
- if (err < 0) {
- put_unused_fd(fd);
- return err;
- }
+ fd_install(fd, f);
+ return take_fd(fd);
+}
- f = dentry_open(&path, O_RDONLY, current_cred());
- path_put(&path);
- if (IS_ERR(f)) {
- put_unused_fd(fd);
- fd = PTR_ERR(f);
- } else
- fd_install(fd, f);
+int open_related_ns(struct ns_common *ns,
+ struct ns_common *(*get_ns)(struct ns_common *ns))
+{
+ struct ns_common *relative;
+
+ relative = get_ns(ns);
+ if (IS_ERR(relative))
+ return PTR_ERR(relative);
- return fd;
+ return open_namespace(relative);
}
EXPORT_SYMBOL_GPL(open_related_ns);
@@ -123,9 +132,12 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
struct user_namespace *user_ns;
+ struct pid_namespace *pid_ns;
+ struct task_struct *tsk;
struct ns_common *ns = get_proc_ns(file_inode(filp));
uid_t __user *argp;
uid_t uid;
+ int ret;
switch (ioctl) {
case NS_GET_USERNS:
@@ -143,9 +155,69 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
argp = (uid_t __user *) arg;
uid = from_kuid_munged(current_user_ns(), user_ns->owner);
return put_user(uid, argp);
+ case NS_GET_MNTNS_ID: {
+ struct mnt_namespace *mnt_ns;
+ __u64 __user *idp;
+ __u64 id;
+
+ if (ns->ops->type != CLONE_NEWNS)
+ return -EINVAL;
+
+ mnt_ns = container_of(ns, struct mnt_namespace, ns);
+ idp = (__u64 __user *)arg;
+ id = mnt_ns->seq;
+ return put_user(id, idp);
+ }
+ case NS_GET_PID_FROM_PIDNS:
+ fallthrough;
+ case NS_GET_TGID_FROM_PIDNS:
+ fallthrough;
+ case NS_GET_PID_IN_PIDNS:
+ fallthrough;
+ case NS_GET_TGID_IN_PIDNS:
+ if (ns->ops->type != CLONE_NEWPID)
+ return -EINVAL;
+
+ ret = -ESRCH;
+ pid_ns = container_of(ns, struct pid_namespace, ns);
+
+ rcu_read_lock();
+
+ if (ioctl == NS_GET_PID_IN_PIDNS ||
+ ioctl == NS_GET_TGID_IN_PIDNS)
+ tsk = find_task_by_vpid(arg);
+ else
+ tsk = find_task_by_pid_ns(arg, pid_ns);
+ if (!tsk)
+ break;
+
+ switch (ioctl) {
+ case NS_GET_PID_FROM_PIDNS:
+ ret = task_pid_vnr(tsk);
+ break;
+ case NS_GET_TGID_FROM_PIDNS:
+ ret = task_tgid_vnr(tsk);
+ break;
+ case NS_GET_PID_IN_PIDNS:
+ ret = task_pid_nr_ns(tsk, pid_ns);
+ break;
+ case NS_GET_TGID_IN_PIDNS:
+ ret = task_tgid_nr_ns(tsk, pid_ns);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ rcu_read_unlock();
+
+ if (!ret)
+ ret = -ESRCH;
+ break;
default:
- return -ENOTTY;
+ ret = -ENOTTY;
}
+
+ return ret;
}
int ns_get_name(char *buf, size_t size, struct task_struct *task,
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 27fbde2701b6..c5b688c5f984 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -259,8 +259,8 @@ enum Opt {
// clang-format off
static const struct fs_parameter_spec ntfs_fs_parameters[] = {
- fsparam_u32("uid", Opt_uid),
- fsparam_u32("gid", Opt_gid),
+ fsparam_uid("uid", Opt_uid),
+ fsparam_gid("gid", Opt_gid),
fsparam_u32oct("umask", Opt_umask),
fsparam_u32oct("dmask", Opt_dmask),
fsparam_u32oct("fmask", Opt_fmask),
@@ -319,14 +319,10 @@ static int ntfs_fs_parse_param(struct fs_context *fc,
switch (opt) {
case Opt_uid:
- opts->fs_uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(opts->fs_uid))
- return invalf(fc, "ntfs3: Invalid value for uid.");
+ opts->fs_uid = result.uid;
break;
case Opt_gid:
- opts->fs_gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(opts->fs_gid))
- return invalf(fc, "ntfs3: Invalid value for gid.");
+ opts->fs_gid = result.gid;
break;
case Opt_umask:
if (result.uint_32 & ~07777)
diff --git a/fs/open.c b/fs/open.c
index 278b3edcda44..22adbef7ecc2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -247,6 +247,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
long ret;
+ loff_t sum;
if (offset < 0 || len <= 0)
return -EINVAL;
@@ -319,8 +320,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
return -ENODEV;
- /* Check for wrap through zero too */
- if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
+ /* Check for wraparound */
+ if (check_add_overflow(offset, len, &sum))
+ return -EFBIG;
+
+ if (sum > inode->i_sb->s_maxbytes)
return -EFBIG;
if (!file->f_op->fallocate)
@@ -982,12 +986,11 @@ static int do_dentry_open(struct file *f,
*/
if (f->f_mode & FMODE_WRITE) {
/*
- * Paired with smp_mb() in collapse_file() to ensure nr_thps
- * is up to date and the update to i_writecount by
- * get_write_access() is visible. Ensures subsequent insertion
- * of THPs into the page cache will fail.
+ * Depends on full fence from get_write_access() to synchronize
+ * against collapse_file() regarding i_writecount and nr_thps
+ * updates. Ensures subsequent insertion of THPs into the page
+ * cache will fail.
*/
- smp_mb();
if (filemap_nr_thps(inode->i_mapping)) {
struct address_space *mapping = inode->i_mapping;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a7b527ea50d3..26ecda0e4d19 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -471,4 +471,5 @@ static void __exit exit_openprom_fs(void)
module_init(init_openprom_fs)
module_exit(exit_openprom_fs)
+MODULE_DESCRIPTION("OpenPROM filesystem support");
MODULE_LICENSE("GPL");
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 085912268442..fdb9b65db1de 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -56,7 +56,6 @@ static int orangefs_writepage_locked(struct page *page,
ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen,
len, wr, NULL, NULL);
if (ret < 0) {
- SetPageError(page);
mapping_set_error(page->mapping, ret);
} else {
ret = 0;
@@ -119,7 +118,6 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow,
0, &wr, NULL, NULL);
if (ret < 0) {
for (i = 0; i < ow->npages; i++) {
- SetPageError(ow->pages[i]);
mapping_set_error(ow->pages[i]->mapping, ret);
if (PagePrivate(ow->pages[i])) {
wrp = (struct orangefs_write_range *)
@@ -303,15 +301,10 @@ static int orangefs_read_folio(struct file *file, struct folio *folio)
iov_iter_zero(~0U, &iter);
/* takes care of potential aliasing */
flush_dcache_folio(folio);
- if (ret < 0) {
- folio_set_error(folio);
- } else {
- folio_mark_uptodate(folio);
+ if (ret > 0)
ret = 0;
- }
- /* unlock the folio after the ->read_folio() routine completes */
- folio_unlock(folio);
- return ret;
+ folio_end_read(folio, ret == 0);
+ return ret;
}
static int orangefs_write_begin(struct file *file,
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index b501dc07f922..edcca4beb765 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -274,10 +274,8 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
gossip_err("orangefs error: asked for %d pages, only got %d.\n",
bufmap->page_count, ret);
- for (i = 0; i < ret; i++) {
- SetPageError(bufmap->page_array[i]);
+ for (i = 0; i < ret; i++)
unpin_user_page(bufmap->page_array[i]);
- }
return -ENOMEM;
}
diff --git a/fs/pidfs.c b/fs/pidfs.c
index dbb9d854d1c5..c9cb14181def 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -11,10 +11,16 @@
#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/pseudo_fs.h>
+#include <linux/ptrace.h>
#include <linux/seq_file.h>
#include <uapi/linux/pidfd.h>
+#include <linux/ipc_namespace.h>
+#include <linux/time_namespace.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
#include "internal.h"
+#include "mount.h"
#ifdef CONFIG_PROC_FS
/**
@@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
return poll_flags;
}
+static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct task_struct *task __free(put_task) = NULL;
+ struct nsproxy *nsp __free(put_nsproxy) = NULL;
+ struct pid *pid = pidfd_pid(file);
+ struct ns_common *ns_common;
+
+ if (arg)
+ return -EINVAL;
+
+ task = get_pid_task(pid, PIDTYPE_PID);
+ if (!task)
+ return -ESRCH;
+
+ scoped_guard(task_lock, task) {
+ nsp = task->nsproxy;
+ if (nsp)
+ get_nsproxy(nsp);
+ }
+ if (!nsp)
+ return -ESRCH; /* just pretend it didn't exist */
+
+ /*
+ * We're trying to open a file descriptor to the namespace so perform a
+ * filesystem cred ptrace check. Also, we mirror nsfs behavior.
+ */
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+ return -EACCES;
+
+ switch (cmd) {
+ /* Namespaces that hang of nsproxy. */
+ case PIDFD_GET_CGROUP_NAMESPACE:
+ get_cgroup_ns(nsp->cgroup_ns);
+ ns_common = to_ns_common(nsp->cgroup_ns);
+ break;
+ case PIDFD_GET_IPC_NAMESPACE:
+ get_ipc_ns(nsp->ipc_ns);
+ ns_common = to_ns_common(nsp->ipc_ns);
+ break;
+ case PIDFD_GET_MNT_NAMESPACE:
+ get_mnt_ns(nsp->mnt_ns);
+ ns_common = to_ns_common(nsp->mnt_ns);
+ break;
+ case PIDFD_GET_NET_NAMESPACE:
+ ns_common = to_ns_common(nsp->net_ns);
+ get_net_ns(ns_common);
+ break;
+ case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
+ get_pid_ns(nsp->pid_ns_for_children);
+ ns_common = to_ns_common(nsp->pid_ns_for_children);
+ break;
+ case PIDFD_GET_TIME_NAMESPACE:
+ get_time_ns(nsp->time_ns);
+ ns_common = to_ns_common(nsp->time_ns);
+ break;
+ case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
+ get_time_ns(nsp->time_ns_for_children);
+ ns_common = to_ns_common(nsp->time_ns_for_children);
+ break;
+ case PIDFD_GET_UTS_NAMESPACE:
+ get_uts_ns(nsp->uts_ns);
+ ns_common = to_ns_common(nsp->uts_ns);
+ break;
+ /* Namespaces that don't hang of nsproxy. */
+ case PIDFD_GET_USER_NAMESPACE:
+ rcu_read_lock();
+ ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
+ rcu_read_unlock();
+ break;
+ case PIDFD_GET_PID_NAMESPACE:
+ rcu_read_lock();
+ ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
+ rcu_read_unlock();
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ /* open_namespace() unconditionally consumes the reference */
+ return open_namespace(ns_common);
+}
+
static const struct file_operations pidfs_file_operations = {
.poll = pidfd_poll,
#ifdef CONFIG_PROC_FS
.show_fdinfo = pidfd_show_fdinfo,
#endif
+ .unlocked_ioctl = pidfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
struct pid *pidfd_pid(const struct file *file)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 775ce0bcf08c..c02f1e63f82d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -202,8 +202,8 @@ int proc_alloc_inum(unsigned int *inum)
{
int i;
- i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1,
- GFP_KERNEL);
+ i = ida_alloc_max(&proc_inum_ida, UINT_MAX - PROC_DYNAMIC_FIRST,
+ GFP_KERNEL);
if (i < 0)
return i;
@@ -213,7 +213,7 @@ int proc_alloc_inum(unsigned int *inum)
void proc_free_inum(unsigned int inum)
{
- ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
+ ida_free(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
}
static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b1c2c0b82116..9553e77c9d31 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -21,7 +21,7 @@
#define list_for_each_table_entry(entry, header) \
entry = header->ctl_table; \
- for (size_t i = 0 ; i < header->ctl_table_size && entry->procname; ++i, entry++)
+ for (size_t i = 0 ; i < header->ctl_table_size; ++i, entry++)
static const struct dentry_operations proc_sys_dentry_operations;
static const struct file_operations proc_sys_file_operations;
@@ -476,12 +476,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
make_empty_dir_inode(inode);
}
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
if (root->set_ownership)
root->set_ownership(head, &inode->i_uid, &inode->i_gid);
- else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
return inode;
}
@@ -951,14 +949,14 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set,
char *new_name;
new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
- sizeof(struct ctl_table)*2 + namelen + 1,
+ sizeof(struct ctl_table) + namelen + 1,
GFP_KERNEL);
if (!new)
return NULL;
node = (struct ctl_node *)(new + 1);
table = (struct ctl_table *)(node + 1);
- new_name = (char *)(table + 2);
+ new_name = (char *)(table + 1);
memcpy(new_name, name, namelen);
table[0].procname = new_name;
table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
@@ -1093,6 +1091,7 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
static int sysctl_check_table_array(const char *path, struct ctl_table *table)
{
+ unsigned int extra;
int err = 0;
if ((table->proc_handler == proc_douintvec) ||
@@ -1104,6 +1103,19 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
if (table->proc_handler == proc_dou8vec_minmax) {
if (table->maxlen != sizeof(u8))
err |= sysctl_err(path, table, "array not allowed");
+
+ if (table->extra1) {
+ extra = *(unsigned int *) table->extra1;
+ if (extra > 255U)
+ err |= sysctl_err(path, table,
+ "range value too large for proc_dou8vec_minmax");
+ }
+ if (table->extra2) {
+ extra = *(unsigned int *) table->extra2;
+ if (extra > 255U)
+ err |= sysctl_err(path, table,
+ "range value too large for proc_dou8vec_minmax");
+ }
}
if (table->proc_handler == proc_dobool) {
@@ -1119,6 +1131,8 @@ static int sysctl_check_table(const char *path, struct ctl_table_header *header)
struct ctl_table *entry;
int err = 0;
list_for_each_table_entry(entry, header) {
+ if (!entry->procname)
+ err |= sysctl_err(path, entry, "procname is null");
if ((entry->proc_handler == proc_dostring) ||
(entry->proc_handler == proc_dobool) ||
(entry->proc_handler == proc_dointvec) ||
@@ -1154,18 +1168,16 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table_
struct ctl_table_header *links;
struct ctl_node *node;
char *link_name;
- int nr_entries, name_bytes;
+ int name_bytes;
name_bytes = 0;
- nr_entries = 0;
list_for_each_table_entry(entry, head) {
- nr_entries++;
name_bytes += strlen(entry->procname) + 1;
}
links = kzalloc(sizeof(struct ctl_table_header) +
- sizeof(struct ctl_node)*nr_entries +
- sizeof(struct ctl_table)*(nr_entries + 1) +
+ sizeof(struct ctl_node)*head->ctl_table_size +
+ sizeof(struct ctl_table)*head->ctl_table_size +
name_bytes,
GFP_KERNEL);
@@ -1173,8 +1185,8 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table_
return NULL;
node = (struct ctl_node *)(links + 1);
- link_table = (struct ctl_table *)(node + nr_entries);
- link_name = (char *)&link_table[nr_entries + 1];
+ link_table = (struct ctl_table *)(node + head->ctl_table_size);
+ link_name = (char *)(link_table + head->ctl_table_size);
link = link_table;
list_for_each_table_entry(entry, head) {
@@ -1188,7 +1200,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table_
}
init_header(links, dir->header.root, dir->header.set, node, link_table,
head->ctl_table_size);
- links->nreg = nr_entries;
+ links->nreg = head->ctl_table_size;
return links;
}
@@ -1300,28 +1312,23 @@ static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path)
* __register_sysctl_table - register a leaf sysctl table
* @set: Sysctl tree to register on
* @path: The path to the directory the sysctl table is in.
- * @table: the top-level table structure without any child. This table
- * should not be free'd after registration. So it should not be
- * used on stack. It can either be a global or dynamically allocated
- * by the caller and free'd later after sysctl unregistration.
+ *
+ * @table: the top-level table structure. This table should not be free'd
+ * after registration. So it should not be used on stack. It can either
+ * be a global or dynamically allocated by the caller and free'd later
+ * after sysctl unregistration.
* @table_size : The number of elements in table
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
- * array. A completely 0 filled entry terminates the table.
+ * array.
*
* The members of the &struct ctl_table structure are used as follows:
- *
* procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
* enter a sysctl file
- *
- * data - a pointer to data for use by proc_handler
- *
- * maxlen - the maximum size in bytes of the data
- *
- * mode - the file permissions for the /proc/sys file
- *
- * child - must be %NULL.
- *
+ * data - a pointer to data for use by proc_handler
+ * maxlen - the maximum size in bytes of the data
+ * mode - the file permissions for the /proc/sys file
+ * type - Defines the target type (described in struct definition)
* proc_handler - the text handler routine (described below)
*
* extra1, extra2 - extra pointers usable by the proc handler routines
@@ -1329,8 +1336,7 @@ static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path)
* [0] https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
*
* Leaf nodes in the sysctl tree will be represented by a single file
- * under /proc; non-leaf nodes (where child is not NULL) are not allowed,
- * sysctl_check_table() verifies this.
+ * under /proc; non-leaf nodes are not allowed.
*
* There must be a proc_handler routine for any terminal nodes.
* Several default handlers are available to cover common cases -
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 0a808951b7d3..e133b507ddf3 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -61,7 +61,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
return security_sb_show_options(m, sb);
}
-static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
+static void show_vfsmnt_opts(struct seq_file *m, struct vfsmount *mnt)
{
static const struct proc_fs_opts mnt_opts[] = {
{ MNT_NOSUID, ",nosuid" },
@@ -124,7 +124,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
err = show_sb_opts(m, sb);
if (err)
goto out;
- show_mnt_opts(m, mnt);
+ show_vfsmnt_opts(m, mnt);
if (sb->s_op->show_options)
err = sb->s_op->show_options(m, mnt_path.dentry);
seq_puts(m, " 0 0\n");
@@ -153,7 +153,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
goto out;
seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
- show_mnt_opts(m, mnt);
+ show_vfsmnt_opts(m, mnt);
/* Tagged fields ("foo:X" or "bar") */
if (IS_MNT_SHARED(r))
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index de8cf5d75f34..65b2473e22ff 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -241,7 +241,7 @@ err:
/* get information of pstore/blk */
int pstore_blk_get_config(struct pstore_blk_config *info)
{
- strncpy(info->device, blkdev, 80);
+ strscpy(info->device, blkdev);
info->max_reason = max_reason;
info->kmsg_size = check_size(kmsg_size, 4096);
info->pmsg_size = check_size(pmsg_size, 4096);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 03425928d2fb..3497ede88aa0 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -761,4 +761,5 @@ static void __exit pstore_exit(void)
module_exit(pstore_exit)
MODULE_AUTHOR("Tony Luck <tony.luck@intel.com>");
+MODULE_DESCRIPTION("Persistent Storage - platform driver interface");
MODULE_LICENSE("GPL");
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index d79841e94428..e399e2dd3a12 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -430,5 +430,6 @@ static void __exit exit_qnx4_fs(void)
module_init(init_qnx4_fs)
module_exit(exit_qnx4_fs)
+MODULE_DESCRIPTION("QNX4 file system");
MODULE_LICENSE("GPL");
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index d62fbef838b6..4f1735b882b1 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -694,4 +694,5 @@ static void __exit exit_qnx6_fs(void)
module_init(init_qnx6_fs)
module_exit(exit_qnx6_fs)
+MODULE_DESCRIPTION("QNX6 file system");
MODULE_LICENSE("GPL");
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 627eb2f72ef3..a2b256dac36e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2246,9 +2246,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
int cnt;
struct quota_info *dqopt = sb_dqopt(sb);
- /* s_umount should be held in exclusive mode */
- if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
- up_read(&sb->s_umount);
+ rwsem_assert_held_write(&sb->s_umount);
/* Cannot turn off usage accounting without turning off limits, or
* suspend quotas and simultaneously turn quotas off. */
@@ -2510,9 +2508,7 @@ int dquot_resume(struct super_block *sb, int type)
int ret = 0, cnt;
unsigned int flags;
- /* s_umount should be held in exclusive mode */
- if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
- up_read(&sb->s_umount);
+ rwsem_assert_held_write(&sb->s_umount);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
diff --git a/fs/read_write.c b/fs/read_write.c
index ef6339391351..90e283b31ca1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -730,7 +730,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
- ret = kiocb_set_rw_flags(&kiocb, flags);
+ ret = kiocb_set_rw_flags(&kiocb, flags, type);
if (ret)
return ret;
kiocb.ki_pos = (ppos ? *ppos : 0);
@@ -1736,3 +1736,19 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
return 0;
}
+
+bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos)
+{
+ size_t len = iov_iter_count(iter);
+
+ if (!iter_is_ubuf(iter))
+ return false;
+
+ if (!is_power_of_2(len))
+ return false;
+
+ if (!IS_ALIGNED(pos, len))
+ return false;
+
+ return true;
+}
diff --git a/fs/readdir.c b/fs/readdir.c
index 278bc0254732..d6c82421902a 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -22,8 +22,6 @@
#include <linux/compat.h>
#include <linux/uaccess.h>
-#include <asm/unaligned.h>
-
/*
* Some filesystems were never converted to '->iterate_shared()'
* and their directory iterators want the inode lock held for
@@ -72,7 +70,7 @@ int wrap_directory_iterator(struct file *file,
EXPORT_SYMBOL(wrap_directory_iterator);
/*
- * Note the "unsafe_put_user() semantics: we goto a
+ * Note the "unsafe_put_user()" semantics: we goto a
* label for errors.
*/
#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index c1daedc50f4c..9b43a81a6488 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2699,7 +2699,6 @@ fail:
}
bh = bh->b_this_page;
} while (bh != head);
- folio_set_error(folio);
BUG_ON(folio_test_writeback(folio));
folio_start_writeback(folio);
folio_unlock(folio);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2cbb92462074..68758b6fed94 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -101,19 +101,15 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos);
*/
static int romfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
loff_t offset, size;
unsigned long fillsize, pos;
void *buf;
int ret;
- buf = kmap(page);
- if (!buf)
- return -ENOMEM;
+ buf = kmap_local_folio(folio, 0);
- /* 32 bit warning -- but not for us :) */
- offset = page_offset(page);
+ offset = folio_pos(folio);
size = i_size_read(inode);
fillsize = 0;
ret = 0;
@@ -125,20 +121,14 @@ static int romfs_read_folio(struct file *file, struct folio *folio)
ret = romfs_dev_read(inode->i_sb, pos, buf, fillsize);
if (ret < 0) {
- SetPageError(page);
fillsize = 0;
ret = -EIO;
}
}
- if (fillsize < PAGE_SIZE)
- memset(buf + fillsize, 0, PAGE_SIZE - fillsize);
- if (ret == 0)
- SetPageUptodate(page);
-
- flush_dcache_page(page);
- kunmap(page);
- unlock_page(page);
+ buf = folio_zero_tail(folio, fillsize, buf);
+ kunmap_local(buf);
+ folio_end_read(folio, ret == 0);
return ret;
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 557b68e99d0a..a865941724c0 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1918,8 +1918,8 @@ require use of the stronger protocol */
#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
-#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP)
-#define CIFSSEC_MAX (CIFSSEC_MUST_NTLMV2)
+#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL)
+#define CIFSSEC_MAX (CIFSSEC_MAY_SIGN | CIFSSEC_MUST_KRB5 | CIFSSEC_MAY_SEAL)
#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
/*
*****************************************************************
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 3bbac925d076..bc926ab2555b 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -128,12 +128,14 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_flag("compress", Opt_compress),
fsparam_flag("witness", Opt_witness),
+ /* Mount options which take uid or gid */
+ fsparam_uid("backupuid", Opt_backupuid),
+ fsparam_gid("backupgid", Opt_backupgid),
+ fsparam_uid("uid", Opt_uid),
+ fsparam_uid("cruid", Opt_cruid),
+ fsparam_gid("gid", Opt_gid),
+
/* Mount options which take numeric value */
- fsparam_u32("backupuid", Opt_backupuid),
- fsparam_u32("backupgid", Opt_backupgid),
- fsparam_u32("uid", Opt_uid),
- fsparam_u32("cruid", Opt_cruid),
- fsparam_u32("gid", Opt_gid),
fsparam_u32("file_mode", Opt_file_mode),
fsparam_u32("dirmode", Opt_dirmode),
fsparam_u32("dir_mode", Opt_dirmode),
@@ -951,8 +953,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
int i, opt;
bool is_smb3 = !strcmp(fc->fs_type->name, "smb3");
bool skip_parsing = false;
- kuid_t uid;
- kgid_t gid;
cifs_dbg(FYI, "CIFS: parsing cifs mount option '%s'\n", param->key);
@@ -1083,38 +1083,23 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- goto cifs_parse_mount_err;
- ctx->linux_uid = uid;
+ ctx->linux_uid = result.uid;
ctx->uid_specified = true;
break;
case Opt_cruid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- goto cifs_parse_mount_err;
- ctx->cred_uid = uid;
+ ctx->cred_uid = result.uid;
ctx->cruid_specified = true;
break;
case Opt_backupuid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- goto cifs_parse_mount_err;
- ctx->backupuid = uid;
+ ctx->backupuid = result.uid;
ctx->backupuid_specified = true;
break;
case Opt_backupgid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- goto cifs_parse_mount_err;
- ctx->backupgid = gid;
+ ctx->backupgid = result.gid;
ctx->backupgid_specified = true;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- goto cifs_parse_mount_err;
- ctx->linux_gid = gid;
+ ctx->linux_gid = result.gid;
ctx->gid_specified = true;
break;
case Opt_port:
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 8d10be1fe18a..c3ee42188d25 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -917,6 +917,40 @@ struct smb2_query_directory_rsp {
__u8 Buffer[];
} __packed;
+/* DeviceType Flags */
+#define FILE_DEVICE_CD_ROM 0x00000002
+#define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003
+#define FILE_DEVICE_DFS 0x00000006
+#define FILE_DEVICE_DISK 0x00000007
+#define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008
+#define FILE_DEVICE_FILE_SYSTEM 0x00000009
+#define FILE_DEVICE_NAMED_PIPE 0x00000011
+#define FILE_DEVICE_NETWORK 0x00000012
+#define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014
+#define FILE_DEVICE_NULL 0x00000015
+#define FILE_DEVICE_PARALLEL_PORT 0x00000016
+#define FILE_DEVICE_PRINTER 0x00000018
+#define FILE_DEVICE_SERIAL_PORT 0x0000001b
+#define FILE_DEVICE_STREAMS 0x0000001e
+#define FILE_DEVICE_TAPE 0x0000001f
+#define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020
+#define FILE_DEVICE_VIRTUAL_DISK 0x00000024
+#define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028
+
+/* Device Characteristics */
+#define FILE_REMOVABLE_MEDIA 0x00000001
+#define FILE_READ_ONLY_DEVICE 0x00000002
+#define FILE_FLOPPY_DISKETTE 0x00000004
+#define FILE_WRITE_ONCE_MEDIA 0x00000008
+#define FILE_REMOTE_DEVICE 0x00000010
+#define FILE_DEVICE_IS_MOUNTED 0x00000020
+#define FILE_VIRTUAL_VOLUME 0x00000040
+#define FILE_DEVICE_SECURE_OPEN 0x00000100
+#define FILE_CHARACTERISTIC_TS_DEVICE 0x00001000
+#define FILE_CHARACTERISTIC_WEBDAV_DEVICE 0x00002000
+#define FILE_PORTABLE_DEVICE 0x00004000
+#define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000
+
/*
* Maximum number of iovs we need for a set-info request.
* The largest one is rename/hardlink
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index e7e07891781b..840c71c66b30 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2051,15 +2051,22 @@ out_err1:
* @access: file access flags
* @disposition: file disposition flags
* @may_flags: set with MAY_ flags
+ * @is_dir: is creating open flags for directory
*
* Return: file open flags
*/
static int smb2_create_open_flags(bool file_present, __le32 access,
__le32 disposition,
- int *may_flags)
+ int *may_flags,
+ bool is_dir)
{
int oflags = O_NONBLOCK | O_LARGEFILE;
+ if (is_dir) {
+ access &= ~FILE_WRITE_DESIRE_ACCESS_LE;
+ ksmbd_debug(SMB, "Discard write access to a directory\n");
+ }
+
if (access & FILE_READ_DESIRED_ACCESS_LE &&
access & FILE_WRITE_DESIRE_ACCESS_LE) {
oflags |= O_RDWR;
@@ -3167,7 +3174,9 @@ int smb2_open(struct ksmbd_work *work)
open_flags = smb2_create_open_flags(file_present, daccess,
req->CreateDisposition,
- &may_flags);
+ &may_flags,
+ req->CreateOptions & FILE_DIRECTORY_FILE_LE ||
+ (file_present && S_ISDIR(d_inode(path.dentry)->i_mode)));
if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
if (open_flags & (O_CREAT | O_TRUNC)) {
@@ -5314,8 +5323,13 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info = (struct filesystem_device_info *)rsp->Buffer;
- info->DeviceType = cpu_to_le32(stfs.f_type);
- info->DeviceCharacteristics = cpu_to_le32(0x00000020);
+ info->DeviceType = cpu_to_le32(FILE_DEVICE_DISK);
+ info->DeviceCharacteristics =
+ cpu_to_le32(FILE_DEVICE_IS_MOUNTED);
+ if (!test_tree_conn_flag(work->tcon,
+ KSMBD_TREE_CONN_FLAG_WRITABLE))
+ info->DeviceCharacteristics |=
+ cpu_to_le32(FILE_READ_ONLY_DEVICE);
rsp->OutputBufferLength = cpu_to_le32(8);
break;
}
diff --git a/fs/stat.c b/fs/stat.c
index 70bd3e888cfa..89ce1be56310 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -90,6 +90,37 @@ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat)
EXPORT_SYMBOL(generic_fill_statx_attr);
/**
+ * generic_fill_statx_atomic_writes - Fill in atomic writes statx attributes
+ * @stat: Where to fill in the attribute flags
+ * @unit_min: Minimum supported atomic write length in bytes
+ * @unit_max: Maximum supported atomic write length in bytes
+ *
+ * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
+ * atomic write unit_min and unit_max values.
+ */
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+ unsigned int unit_min,
+ unsigned int unit_max)
+{
+ /* Confirm that the request type is known */
+ stat->result_mask |= STATX_WRITE_ATOMIC;
+
+ /* Confirm that the file attribute type is known */
+ stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC;
+
+ if (unit_min) {
+ stat->atomic_write_unit_min = unit_min;
+ stat->atomic_write_unit_max = unit_max;
+ /* Initially only allow 1x segment */
+ stat->atomic_write_segments_max = 1;
+
+ /* Confirm atomic writes are actually supported */
+ stat->attributes |= STATX_ATTR_WRITE_ATOMIC;
+ }
+}
+EXPORT_SYMBOL_GPL(generic_fill_statx_atomic_writes);
+
+/**
* vfs_getattr_nosec - getattr without security checks
* @path: file to get attributes from
* @stat: structure to return attributes in
@@ -214,6 +245,43 @@ int getname_statx_lookup_flags(int flags)
return lookup_flags;
}
+static int vfs_statx_path(struct path *path, int flags, struct kstat *stat,
+ u32 request_mask)
+{
+ int error = vfs_getattr(path, stat, request_mask, flags);
+
+ if (request_mask & STATX_MNT_ID_UNIQUE) {
+ stat->mnt_id = real_mount(path->mnt)->mnt_id_unique;
+ stat->result_mask |= STATX_MNT_ID_UNIQUE;
+ } else {
+ stat->mnt_id = real_mount(path->mnt)->mnt_id;
+ stat->result_mask |= STATX_MNT_ID;
+ }
+
+ if (path_mounted(path))
+ stat->attributes |= STATX_ATTR_MOUNT_ROOT;
+ stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
+
+ /*
+ * If this is a block device inode, override the filesystem
+ * attributes with the block device specific parameters that need to be
+ * obtained from the bdev backing inode.
+ */
+ if (S_ISBLK(stat->mode))
+ bdev_statx(path, stat, request_mask);
+
+ return error;
+}
+
+static int vfs_statx_fd(int fd, int flags, struct kstat *stat,
+ u32 request_mask)
+{
+ CLASS(fd_raw, f)(fd);
+ if (!f.file)
+ return -EBADF;
+ return vfs_statx_path(&f.file->f_path, flags, stat, request_mask);
+}
+
/**
* vfs_statx - Get basic and extra attributes by filename
* @dfd: A file descriptor representing the base dir for a relative filename
@@ -243,36 +311,13 @@ static int vfs_statx(int dfd, struct filename *filename, int flags,
retry:
error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
if (error)
- goto out;
-
- error = vfs_getattr(&path, stat, request_mask, flags);
-
- if (request_mask & STATX_MNT_ID_UNIQUE) {
- stat->mnt_id = real_mount(path.mnt)->mnt_id_unique;
- stat->result_mask |= STATX_MNT_ID_UNIQUE;
- } else {
- stat->mnt_id = real_mount(path.mnt)->mnt_id;
- stat->result_mask |= STATX_MNT_ID;
- }
-
- if (path.mnt->mnt_root == path.dentry)
- stat->attributes |= STATX_ATTR_MOUNT_ROOT;
- stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
-
- /* Handle STATX_DIOALIGN for block devices. */
- if (request_mask & STATX_DIOALIGN) {
- struct inode *inode = d_backing_inode(path.dentry);
-
- if (S_ISBLK(inode->i_mode))
- bdev_statx_dioalign(inode, stat);
- }
-
+ return error;
+ error = vfs_statx_path(&path, flags, stat, request_mask);
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
-out:
return error;
}
@@ -289,18 +334,10 @@ int vfs_fstatat(int dfd, const char __user *filename,
* If AT_EMPTY_PATH is set, we expect the common case to be that
* empty path, and avoid doing all the extra pathname work.
*/
- if (dfd >= 0 && flags == AT_EMPTY_PATH) {
- char c;
-
- ret = get_user(c, filename);
- if (unlikely(ret))
- return ret;
+ if (flags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename))
+ return vfs_fstat(dfd, stat);
- if (likely(!c))
- return vfs_fstat(dfd, stat);
- }
-
- name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
+ name = getname_flags(filename, getname_statx_lookup_flags(statx_flags));
ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
putname(name);
@@ -488,34 +525,39 @@ static int do_readlinkat(int dfd, const char __user *pathname,
char __user *buf, int bufsiz)
{
struct path path;
+ struct filename *name;
int error;
- int empty = 0;
unsigned int lookup_flags = LOOKUP_EMPTY;
if (bufsiz <= 0)
return -EINVAL;
retry:
- error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty);
- if (!error) {
- struct inode *inode = d_backing_inode(path.dentry);
-
- error = empty ? -ENOENT : -EINVAL;
- /*
- * AFS mountpoints allow readlink(2) but are not symlinks
- */
- if (d_is_symlink(path.dentry) || inode->i_op->readlink) {
- error = security_inode_readlink(path.dentry);
- if (!error) {
- touch_atime(&path);
- error = vfs_readlink(path.dentry, buf, bufsiz);
- }
- }
- path_put(&path);
- if (retry_estale(error, lookup_flags)) {
- lookup_flags |= LOOKUP_REVAL;
- goto retry;
+ name = getname_flags(pathname, lookup_flags);
+ error = filename_lookup(dfd, name, lookup_flags, &path, NULL);
+ if (unlikely(error)) {
+ putname(name);
+ return error;
+ }
+
+ /*
+ * AFS mountpoints allow readlink(2) but are not symlinks
+ */
+ if (d_is_symlink(path.dentry) ||
+ d_backing_inode(path.dentry)->i_op->readlink) {
+ error = security_inode_readlink(path.dentry);
+ if (!error) {
+ touch_atime(&path);
+ error = vfs_readlink(path.dentry, buf, bufsiz);
}
+ } else {
+ error = (name->name[0] == '\0') ? -ENOENT : -EINVAL;
+ }
+ path_put(&path);
+ putname(name);
+ if (retry_estale(error, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
}
return error;
}
@@ -659,6 +701,9 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_dio_mem_align = stat->dio_mem_align;
tmp.stx_dio_offset_align = stat->dio_offset_align;
tmp.stx_subvol = stat->subvol;
+ tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
+ tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
+ tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
@@ -674,7 +719,8 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
return -EINVAL;
- /* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
+ /*
+ * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
* from userland.
*/
mask &= ~STATX_CHANGE_COOKIE;
@@ -686,16 +732,41 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
return cp_statx(&stat, buffer);
}
+int do_statx_fd(int fd, unsigned int flags, unsigned int mask,
+ struct statx __user *buffer)
+{
+ struct kstat stat;
+ int error;
+
+ if (mask & STATX__RESERVED)
+ return -EINVAL;
+ if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+ return -EINVAL;
+
+ /*
+ * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
+ * from userland.
+ */
+ mask &= ~STATX_CHANGE_COOKIE;
+
+ error = vfs_statx_fd(fd, flags, &stat, mask);
+ if (error)
+ return error;
+
+ return cp_statx(&stat, buffer);
+}
+
/**
* sys_statx - System call to get enhanced stats
* @dfd: Base directory to pathwalk from *or* fd to stat.
- * @filename: File to stat or "" with AT_EMPTY_PATH
+ * @filename: File to stat or either NULL or "" with AT_EMPTY_PATH
* @flags: AT_* flags to control pathwalk.
* @mask: Parts of statx struct actually required.
* @buffer: Result buffer.
*
* Note that fstat() can be emulated by setting dfd to the fd of interest,
- * supplying "" as the filename and setting AT_EMPTY_PATH in the flags.
+ * supplying "" (or preferably NULL) as the filename and setting AT_EMPTY_PATH
+ * in the flags.
*/
SYSCALL_DEFINE5(statx,
int, dfd, const char __user *, filename, unsigned, flags,
@@ -703,9 +774,24 @@ SYSCALL_DEFINE5(statx,
struct statx __user *, buffer)
{
int ret;
+ unsigned lflags;
struct filename *name;
- name = getname_flags(filename, getname_statx_lookup_flags(flags), NULL);
+ /*
+ * Short-circuit handling of NULL and "" paths.
+ *
+ * For a NULL path we require and accept only the AT_EMPTY_PATH flag
+ * (possibly |'d with AT_STATX flags).
+ *
+ * However, glibc on 32-bit architectures implements fstatat as statx
+ * with the "" pathname and AT_NO_AUTOMOUNT | AT_EMPTY_PATH flags.
+ * Supporting this results in the uglification below.
+ */
+ lflags = flags & ~(AT_NO_AUTOMOUNT | AT_STATX_SYNC_TYPE);
+ if (lflags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename))
+ return do_statx_fd(dfd, flags & ~AT_NO_AUTOMOUNT, mask, buffer);
+
+ name = getname_flags(filename, getname_statx_lookup_flags(flags));
ret = do_statx(dfd, name, flags, mask, buffer);
putname(name);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 3365a30dc1e0..5c0d07ddbda2 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -591,4 +591,5 @@ static void __exit exit_sysv_fs(void)
module_init(init_sysv_fs)
module_exit(exit_sysv_fs)
+MODULE_DESCRIPTION("SystemV Filesystem");
MODULE_LICENSE("GPL");
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 7c29f4afc23d..1028ab6d9a74 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -296,9 +296,9 @@ enum {
};
static const struct fs_parameter_spec tracefs_param_specs[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_u32oct ("mode", Opt_mode),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -306,8 +306,6 @@ static int tracefs_parse_param(struct fs_context *fc, struct fs_parameter *param
{
struct tracefs_fs_info *opts = fc->s_fs_info;
struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
int opt;
opt = fs_parse(fc, tracefs_param_specs, param, &result);
@@ -316,16 +314,10 @@ static int tracefs_parse_param(struct fs_context *fc, struct fs_parameter *param
switch (opt) {
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return invalf(fc, "Unknown uid");
- opts->uid = uid;
+ opts->uid = result.uid;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return invalf(fc, "Unknown gid");
- opts->gid = gid;
+ opts->gid = result.gid;
break;
case Opt_mode:
opts->mode = result.uint_32 & S_IALLUGO;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 27c85d92d1dc..61f25d3cf3f7 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -188,7 +188,6 @@ Eend:
"offset=%lu",
dir->i_ino, (page->index<<PAGE_SHIFT)+offs);
fail:
- SetPageError(page);
return false;
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index eee7320ab0b0..17e409ceaa33 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
goto out;
features = uffdio_api.features;
ret = -EINVAL;
- if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
+ if (uffdio_api.api != UFFD_API)
goto err_out;
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
@@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
#endif
+
+ ret = -EINVAL;
+ if (features & ~uffdio_api.features)
+ goto err_out;
+
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index 118dedef8ebe..fdb4da24d662 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -228,26 +228,19 @@ const struct inode_operations vboxsf_reg_iops = {
static int vboxsf_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
struct vboxsf_handle *sf_handle = file->private_data;
- loff_t off = page_offset(page);
+ loff_t off = folio_pos(folio);
u32 nread = PAGE_SIZE;
u8 *buf;
int err;
- buf = kmap(page);
+ buf = kmap_local_folio(folio, 0);
err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
- if (err == 0) {
- memset(&buf[nread], 0, PAGE_SIZE - nread);
- flush_dcache_page(page);
- SetPageUptodate(page);
- } else {
- SetPageError(page);
- }
+ buf = folio_zero_tail(folio, nread, buf + nread);
- kunmap(page);
- unlock_page(page);
+ kunmap_local(buf);
+ folio_end_read(folio, err == 0);
return err;
}
@@ -295,7 +288,6 @@ static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
kref_put(&sf_handle->refcount, vboxsf_handle_release);
if (err == 0) {
- ClearPageError(page);
/* mtime changed */
sf_i->force_restat = 1;
} else {
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index ffb1d565da39..e95b8a48d8a0 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -41,8 +41,8 @@ enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode,
static const struct fs_parameter_spec vboxsf_fs_parameters[] = {
fsparam_string ("nls", opt_nls),
- fsparam_u32 ("uid", opt_uid),
- fsparam_u32 ("gid", opt_gid),
+ fsparam_uid ("uid", opt_uid),
+ fsparam_gid ("gid", opt_gid),
fsparam_u32 ("ttl", opt_ttl),
fsparam_u32oct ("dmode", opt_dmode),
fsparam_u32oct ("fmode", opt_fmode),
@@ -55,8 +55,6 @@ static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct vboxsf_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
int opt;
opt = fs_parse(fc, vboxsf_fs_parameters, param, &result);
@@ -73,16 +71,10 @@ static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param)
param->string = NULL;
break;
case opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return -EINVAL;
- ctx->o.uid = uid;
+ ctx->o.uid = result.uid;
break;
case opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return -EINVAL;
- ctx->o.gid = gid;
+ ctx->o.gid = result.gid;
break;
case opt_ttl:
ctx->o.ttl = msecs_to_jiffies(result.uint_32);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 9967334ea99f..cf629302d48e 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -86,9 +86,8 @@ xfs_inode_alloc(
return NULL;
}
- /* VFS doesn't initialise i_mode or i_state! */
+ /* VFS doesn't initialise i_mode! */
VFS_I(ip)->i_mode = 0;
- VFS_I(ip)->i_state = 0;
mapping_set_large_folios(VFS_I(ip)->i_mapping);
XFS_STATS_INC(mp, vn_active);
@@ -314,6 +313,7 @@ xfs_reinit_inode(
dev_t dev = inode->i_rdev;
kuid_t uid = inode->i_uid;
kgid_t gid = inode->i_gid;
+ unsigned long state = inode->i_state;
error = inode_init_always(mp->m_super, inode);
@@ -324,6 +324,7 @@ xfs_reinit_inode(
inode->i_rdev = dev;
inode->i_uid = uid;
inode->i_gid = gid;
+ inode->i_state = state;
mapping_set_large_folios(inode->i_mapping);
return error;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ff222827e550..a00dcbc77e12 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -17,6 +17,8 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_trans.h"
+#include "xfs_trans_space.h"
+#include "xfs_bmap_btree.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
@@ -811,6 +813,7 @@ xfs_setattr_size(
struct xfs_trans *tp;
int error;
uint lock_flags = 0;
+ uint resblks = 0;
bool did_zeroing = false;
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
@@ -917,7 +920,17 @@ xfs_setattr_size(
return error;
}
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+ /*
+ * For realtime inode with more than one block rtextsize, we need the
+ * block reservation for bmap btree block allocations/splits that can
+ * happen since it could split the tail written extent and convert the
+ * right beyond EOF one to unwritten.
+ */
+ if (xfs_inode_has_bigrtalloc(ip))
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
+ 0, 0, &tp);
if (error)
return error;