diff options
Diffstat (limited to 'fs')
41 files changed, 435 insertions, 281 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 9c380e7edf62..0cc02577577b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -391,7 +391,7 @@ static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) struct rb_node **p = &preftrees->direct.root.rb_root.rb_node; struct rb_node *parent = NULL; struct prelim_ref *ref = NULL; - struct prelim_ref target = {0}; + struct prelim_ref target = {}; int result; target.parent = bytenr; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 47f66c6a7d7f..696f47103cfc 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -916,7 +916,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out; + goto out_put_group; } /* @@ -954,7 +954,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out; + goto out_put_group; } clear_nlink(inode); /* One for the block groups ref */ @@ -977,13 +977,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out; + goto out_put_group; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out; + goto out_put_group; btrfs_release_path(path); } @@ -1102,9 +1102,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = remove_block_group_free_space(trans, block_group); if (ret) - goto out; + goto out_put_group; - btrfs_put_block_group(block_group); + /* Once for the block groups rbtree */ btrfs_put_block_group(block_group); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -1127,6 +1127,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, /* once for the tree */ free_extent_map(em); } + +out_put_group: + /* Once for the lookup reference */ + btrfs_put_block_group(block_group); out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); @@ -1288,11 +1292,15 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans, if (ret) goto err; mutex_unlock(&fs_info->unused_bg_unpin_mutex); + if (prev_trans) + btrfs_put_transaction(prev_trans); return true; err: mutex_unlock(&fs_info->unused_bg_unpin_mutex); + if (prev_trans) + btrfs_put_transaction(prev_trans); btrfs_dec_block_group_ro(bg); return false; } diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h index 21a15776dac4..353228d62f5a 100644 --- a/fs/btrfs/discard.h +++ b/fs/btrfs/discard.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef BTRFS_DISCARD_H #define BTRFS_DISCARD_H diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a6cb5cbbdb9f..d10c7be10f3b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2036,9 +2036,6 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) for (i = 0; i < ret; i++) btrfs_drop_and_free_fs_root(fs_info, gang[i]); } - - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) - btrfs_free_log_root_tree(NULL, fs_info); } static void btrfs_init_scrub(struct btrfs_fs_info *fs_info) @@ -3888,7 +3885,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->fs_roots_radix_lock); if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { - btrfs_free_log(NULL, root); + ASSERT(root->log_root == NULL); if (root->reloc_root) { btrfs_put_root(root->reloc_root); root->reloc_root = NULL; @@ -4211,6 +4208,36 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) up_write(&fs_info->cleanup_work_sem); } +static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *gang[8]; + u64 root_objectid = 0; + int ret; + + spin_lock(&fs_info->fs_roots_radix_lock); + while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, root_objectid, + ARRAY_SIZE(gang))) != 0) { + int i; + + for (i = 0; i < ret; i++) + gang[i] = btrfs_grab_root(gang[i]); + spin_unlock(&fs_info->fs_roots_radix_lock); + + for (i = 0; i < ret; i++) { + if (!gang[i]) + continue; + root_objectid = gang[i]->root_key.objectid; + btrfs_free_log(NULL, gang[i]); + btrfs_put_root(gang[i]); + } + root_objectid++; + spin_lock(&fs_info->fs_roots_radix_lock); + } + spin_unlock(&fs_info->fs_roots_radix_lock); + btrfs_free_log_root_tree(NULL, fs_info); +} + static void btrfs_destroy_ordered_extents(struct btrfs_root *root) { struct btrfs_ordered_extent *ordered; @@ -4603,6 +4630,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) btrfs_destroy_delayed_inodes(fs_info); btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_all_delalloc_inodes(fs_info); + btrfs_drop_all_logs(fs_info); mutex_unlock(&fs_info->transaction_kthread_mutex); return 0; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d35936c934ab..03bc7134e8cb 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4559,6 +4559,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) if (IS_ERR(fs_root)) { err = PTR_ERR(fs_root); list_add_tail(&reloc_root->root_list, &reloc_roots); + btrfs_end_transaction(trans); goto out_unset; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8cede6eb9843..2d5498136e5e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -662,10 +662,19 @@ again: } got_it: - btrfs_record_root_in_trans(h, root); - if (!current->journal_info) current->journal_info = h; + + /* + * btrfs_record_root_in_trans() needs to alloc new extents, and may + * call btrfs_join_transaction() while we're also starting a + * transaction. + * + * Thus it need to be called after current->journal_info initialized, + * or we can deadlock. + */ + btrfs_record_root_in_trans(h, root); + return h; join_fail: diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ec36a7c6ba3d..02ebdd9edc19 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4226,6 +4226,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; bool dropped_extents = false; + u64 truncate_offset = i_size; + struct extent_buffer *leaf; + int slot; int ins_nr = 0; int start_slot; int ret; @@ -4240,9 +4243,43 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (ret < 0) goto out; + /* + * We must check if there is a prealloc extent that starts before the + * i_size and crosses the i_size boundary. This is to ensure later we + * truncate down to the end of that extent and not to the i_size, as + * otherwise we end up losing part of the prealloc extent after a log + * replay and with an implicit hole if there is another prealloc extent + * that starts at an offset beyond i_size. + */ + ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY); + if (ret < 0) + goto out; + + if (ret == 0) { + struct btrfs_file_extent_item *ei; + + leaf = path->nodes[0]; + slot = path->slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, ei) == + BTRFS_FILE_EXTENT_PREALLOC) { + u64 extent_end; + + btrfs_item_key_to_cpu(leaf, &key, slot); + extent_end = key.offset + + btrfs_file_extent_num_bytes(leaf, ei); + + if (extent_end > i_size) + truncate_offset = extent_end; + } + } else { + ret = 0; + } + while (true) { - struct extent_buffer *leaf = path->nodes[0]; - int slot = path->slots[0]; + leaf = path->nodes[0]; + slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { @@ -4280,7 +4317,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, ret = btrfs_truncate_inode_items(trans, root->log_root, &inode->vfs_inode, - i_size, + truncate_offset, BTRFS_EXTENT_DATA_KEY); } while (ret == -EAGAIN); if (ret) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 185db76300b3..5f3aa4d607de 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2749,7 +2749,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want, ret = try_get_cap_refs(inode, need, want, 0, flags, got); /* three special error codes */ - if (ret == -EAGAIN || ret == -EFBIG || ret == -EAGAIN) + if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE) ret = 0; return ret; } @@ -3746,6 +3746,7 @@ retry: WARN_ON(1); tsession = NULL; target = -1; + mutex_lock(&session->s_mutex); } goto retry; diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 481ac97b4d25..dcaed75de9e6 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) &congestion_kb_fops); snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->sb->s_bdi->dev)); + bdi_dev_name(fsc->sb->s_bdi)); fsc->debugfs_bdi = debugfs_create_symlink("bdi", fsc->client->debugfs_dir, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 486f91f9685b..7c63abf5bea9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3251,8 +3251,7 @@ static void handle_session(struct ceph_mds_session *session, void *end = p + msg->front.iov_len; struct ceph_mds_session_head *h; u32 op; - u64 seq; - unsigned long features = 0; + u64 seq, features = 0; int wake = 0; bool blacklisted = false; @@ -3271,9 +3270,8 @@ static void handle_session(struct ceph_mds_session *session, goto bad; /* version >= 3, feature bits */ ceph_decode_32_safe(&p, end, len, bad); - ceph_decode_need(&p, end, len, bad); - memcpy(&features, p, min_t(size_t, len, sizeof(features))); - p += len; + ceph_decode_64_safe(&p, end, features, bad); + p += len - sizeof(features); } mutex_lock(&mdsc->mutex); diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index de56dee60540..19507e2fdb57 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -159,8 +159,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, } if (IS_ERR(in)) { - pr_warn("Can't lookup inode %llx (err: %ld)\n", - realm->ino, PTR_ERR(in)); + dout("Can't lookup inode %llx (err: %ld)\n", + realm->ino, PTR_ERR(in)); qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ } else { qri->timeout = 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 05dd3dea684b..39b708d9d86d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1891,7 +1891,8 @@ GLOBAL_EXTERN struct list_head cifs_tcp_ses_list; /* * This lock protects the cifs_tcp_ses_list, the list of smb sessions per * tcp session, and the list of tcon's per smb session. It also protects - * the reference counters for the server, smb session, and tcon. Finally, + * the reference counters for the server, smb session, and tcon. It also + * protects some fields in the TCP_Server_Info struct such as dstaddr. Finally, * changes to the tcon->tidStatus should be done while holding this lock. * generally the locks should be taken in order tcp_ses_lock before * tcon->open_file_lock and that before file->file_info_lock since the diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 95b3ab0ca8c0..28268ed461b8 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -375,8 +375,10 @@ static int reconn_set_ipaddr(struct TCP_Server_Info *server) return rc; } + spin_lock(&cifs_tcp_ses_lock); rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr, strlen(ipaddr)); + spin_unlock(&cifs_tcp_ses_lock); kfree(ipaddr); return !rc ? -1 : 0; @@ -3373,6 +3375,10 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) spin_lock(&cifs_tcp_ses_lock); list_for_each(tmp, &ses->tcon_list) { tcon = list_entry(tmp, struct cifs_tcon, tcon_list); +#ifdef CONFIG_CIFS_DFS_UPCALL + if (tcon->dfs_path) + continue; +#endif if (!match_tcon(tcon, volume_info)) continue; ++tcon->tc_count; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index a456febd4109..550ce9020a3e 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1025,51 +1025,99 @@ int copy_path_name(char *dst, const char *src) } struct super_cb_data { - struct TCP_Server_Info *server; + void *data; struct super_block *sb; }; -static void super_cb(struct super_block *sb, void *arg) +static void tcp_super_cb(struct super_block *sb, void *arg) { - struct super_cb_data *d = arg; + struct super_cb_data *sd = arg; + struct TCP_Server_Info *server = sd->data; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - if (d->sb) + if (sd->sb) return; cifs_sb = CIFS_SB(sb); tcon = cifs_sb_master_tcon(cifs_sb); - if (tcon->ses->server == d->server) - d->sb = sb; + if (tcon->ses->server == server) + sd->sb = sb; } -struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server) +static struct super_block *__cifs_get_super(void (*f)(struct super_block *, void *), + void *data) { - struct super_cb_data d = { - .server = server, + struct super_cb_data sd = { + .data = data, .sb = NULL, }; - iterate_supers_type(&cifs_fs_type, super_cb, &d); + iterate_supers_type(&cifs_fs_type, f, &sd); - if (unlikely(!d.sb)) - return ERR_PTR(-ENOENT); + if (!sd.sb) + return ERR_PTR(-EINVAL); /* * Grab an active reference in order to prevent automounts (DFS links) * of expiring and then freeing up our cifs superblock pointer while * we're doing failover. */ - cifs_sb_active(d.sb); - return d.sb; + cifs_sb_active(sd.sb); + return sd.sb; } -void cifs_put_tcp_super(struct super_block *sb) +static void __cifs_put_super(struct super_block *sb) { if (!IS_ERR_OR_NULL(sb)) cifs_sb_deactive(sb); } +struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server) +{ + return __cifs_get_super(tcp_super_cb, server); +} + +void cifs_put_tcp_super(struct super_block *sb) +{ + __cifs_put_super(sb); +} + +#ifdef CONFIG_CIFS_DFS_UPCALL +static void tcon_super_cb(struct super_block *sb, void *arg) +{ + struct super_cb_data *sd = arg; + struct cifs_tcon *tcon = sd->data; + struct cifs_sb_info *cifs_sb; + + if (sd->sb) + return; + + cifs_sb = CIFS_SB(sb); + if (tcon->dfs_path && cifs_sb->origin_fullpath && + !strcasecmp(tcon->dfs_path, cifs_sb->origin_fullpath)) + sd->sb = sb; +} + +static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon) +{ + return __cifs_get_super(tcon_super_cb, tcon); +} + +static inline void cifs_put_tcon_super(struct super_block *sb) +{ + __cifs_put_super(sb); +} +#else +static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void cifs_put_tcon_super(struct super_block *sb) +{ +} +#endif + int update_super_prepath(struct cifs_tcon *tcon, const char *prefix, size_t prefix_len) { @@ -1077,7 +1125,7 @@ int update_super_prepath(struct cifs_tcon *tcon, const char *prefix, struct cifs_sb_info *cifs_sb; int rc = 0; - sb = cifs_get_tcp_super(tcon->ses->server); + sb = cifs_get_tcon_super(tcon); if (IS_ERR(sb)) return PTR_ERR(sb); @@ -1099,6 +1147,6 @@ int update_super_prepath(struct cifs_tcon *tcon, const char *prefix, cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; out: - cifs_put_tcp_super(sb); + cifs_put_tcon_super(sb); return rc; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b36c46f48705..f829f4165d38 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -687,6 +687,11 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; + if (!server->ops->new_lease_key) + return -EIO; + + server->ops->new_lease_key(pfid); + memset(rqst, 0, sizeof(rqst)); resp_buftype[0] = resp_buftype[1] = CIFS_NO_BUFFER; memset(rsp_iov, 0, sizeof(rsp_iov)); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index cf7b7e1d5bd7..cb733652ecca 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1519,6 +1519,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) spin_lock(&configfs_dirent_lock); configfs_detach_rollback(dentry); spin_unlock(&configfs_dirent_lock); + config_item_put(parent_item); return -EINTR; } frag->frag_dead = true; diff --git a/fs/coredump.c b/fs/coredump.c index 408418e6aa13..478a0d810136 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -788,6 +788,14 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (displaced) put_files_struct(displaced); if (!dump_interrupted()) { + /* + * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would + * have this set to NULL. + */ + if (!cprm.file) { + pr_info("Core dump to |%s disabled\n", cn.corename); + goto close_fail; + } file_start_write(cprm.file); core_dumped = binfmt->core_dump(&cprm); file_end_write(cprm.file); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 2d357680094c..ae49a55bda00 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -506,20 +506,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. - * - * This function will return a pointer to a dentry if it succeeds. This - * pointer must be passed to the debugfs_remove() function when the file is - * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be - * returned. - * - * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will - * be returned. */ -struct dentry *debugfs_create_u32(const char *name, umode_t mode, - struct dentry *parent, u32 *value) +void debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, + u32 *value) { - return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u32, + debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u32, &fops_u32_ro, &fops_u32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u32); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8c596641a72b..12eebcdea9c8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi) { struct eventpoll *ep = epi->ep; + /* Fast preliminary check */ + if (epi->next != EP_UNACTIVE_PTR) + return false; + /* Check that the same epi has not been just chained from another CPU */ if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) return false; @@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * chained in ep->ovflist and requeued later on. */ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { - if (epi->next == EP_UNACTIVE_PTR && - chain_epi_lockless(epi)) + if (chain_epi_lockless(epi)) + ep_pm_stay_awake_rcu(epi); + } else if (!ep_is_linked(epi)) { + /* In the usual case, add event to ready list. */ + if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) ep_pm_stay_awake_rcu(epi); - goto out_unlock; - } - - /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(epi) && - list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) { - ep_pm_stay_awake_rcu(epi); } /* @@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, { int res = 0, eavail, timed_out = 0; u64 slack = 0; - bool waiter = false; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1867,55 +1866,75 @@ fetch_events: */ ep_reset_busy_poll_napi_id(ep); - /* - * We don't have any available event to return to the caller. We need - * to sleep here, and we will be woken by ep_poll_callback() when events - * become available. - */ - if (!waiter) { - waiter = true; - init_waitqueue_entry(&wait, current); + do { + /* + * Internally init_wait() uses autoremove_wake_function(), + * thus wait entry is removed from the wait queue on each + * wakeup. Why it is important? In case of several waiters + * each new wakeup will hit the next waiter, giving it the + * chance to harvest new event. Otherwise wakeup can be + * lost. This is also good performance-wise, because on + * normal wakeup path no need to call __remove_wait_queue() + * explicitly, thus ep->lock is not taken, which halts the + * event delivery. + */ + init_wait(&wait); write_lock_irq(&ep->lock); - __add_wait_queue_exclusive(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } - - for (;;) { /* - * We don't want to sleep if the ep_poll_callback() sends us - * a wakeup in between. That's why we set the task state - * to TASK_INTERRUPTIBLE before doing the checks. + * Barrierless variant, waitqueue_active() is called under + * the same lock on wakeup ep_poll_callback() side, so it + * is safe to avoid an explicit barrier. */ - set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_INTERRUPTIBLE); + /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. + * Do the final check under the lock. ep_scan_ready_list() + * plays with two lists (->rdllist and ->ovflist) and there + * is always a race when both lists are empty for short + * period of time although events are pending, so lock is + * important. */ - if (fatal_signal_pending(current)) { - res = -EINTR; - break; + eavail = ep_events_available(ep); + if (!eavail) { + if (signal_pending(current)) + res = -EINTR; + else + __add_wait_queue_exclusive(&ep->wq, &wait); } + write_unlock_irq(&ep->lock); - eavail = ep_events_available(ep); - if (eavail) + if (eavail || res) break; - if (signal_pending(current)) { - res = -EINTR; - break; - } if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { timed_out = 1; break; } - } + + /* We were woken up, thus go and try to harvest some events */ + eavail = 1; + + } while (0); __set_current_state(TASK_RUNNING); + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } + send_events: + if (fatal_signal_pending(current)) { + /* + * Always short-circuit for fatal signals to allow + * threads to make a timely exit without the chance of + * finding more events available and fetching + * repeatedly. + */ + res = -EINTR; + } /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of @@ -1925,12 +1944,6 @@ send_events: !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; - if (waiter) { - write_lock_irq(&ep->lock); - __remove_wait_queue(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } - return res; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 936a8ec6b48e..6306eaae378b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -528,10 +528,12 @@ lower_metapath: /* Advance in metadata tree. */ (mp->mp_list[hgt])++; - if (mp->mp_list[hgt] >= sdp->sd_inptrs) { - if (!hgt) + if (hgt) { + if (mp->mp_list[hgt] >= sdp->sd_inptrs) + goto lower_metapath; + } else { + if (mp->mp_list[hgt] >= sdp->sd_diptrs) break; - goto lower_metapath; } fill_up_metapath: @@ -876,10 +878,9 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, ret = -ENOENT; goto unlock; } else { - /* report a hole */ iomap->offset = pos; iomap->length = length; - goto do_alloc; + goto hole_found; } } iomap->length = size; @@ -933,8 +934,6 @@ unlock: return ret; do_alloc: - iomap->addr = IOMAP_NULL_ADDR; - iomap->type = IOMAP_HOLE; if (flags & IOMAP_REPORT) { if (pos >= size) ret = -ENOENT; @@ -956,6 +955,9 @@ do_alloc: if (pos < size && height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); } +hole_found: + iomap->addr = IOMAP_NULL_ADDR; + iomap->type = IOMAP_HOLE; goto out; } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 29f9b6684b74..bf70e3b14938 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -613,7 +613,7 @@ __acquires(&gl->gl_lockref.lock) fs_err(sdp, "Error %d syncing glock \n", ret); gfs2_dump_glock(NULL, gl, true); } - return; + goto skip_inval; } } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { @@ -633,6 +633,7 @@ __acquires(&gl->gl_lockref.lock) clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } +skip_inval: gfs2_glock_hold(gl); /* * Check for an error encountered since we called go_sync and go_inval. @@ -722,9 +723,6 @@ __acquires(&gl->gl_lockref.lock) goto out_unlock; if (nonblock) goto out_sched; - smp_mb(); - if (atomic_read(&gl->gl_revokes) != 0) - goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 70b2d3a1e866..5acd3ce30759 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -622,7 +622,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = finish_no_open(file, NULL); } gfs2_glock_dq_uninit(ghs); - return error; + goto fail; } else if (error != -ENOENT) { goto fail_gunlock; } @@ -764,9 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = finish_open(file, dentry, gfs2_open_common); } gfs2_glock_dq_uninit(ghs); + gfs2_qa_put(ip); gfs2_glock_dq_uninit(ghs + 1); clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); + gfs2_qa_put(dip); return error; fail_gunlock3: @@ -776,7 +778,6 @@ fail_gunlock2: clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); fail_free_inode: - gfs2_qa_put(ip); if (ip->i_gl) { glock_clear_object(ip->i_gl, ip); gfs2_glock_put(ip->i_gl); @@ -1005,7 +1006,7 @@ out_gunlock: out_child: gfs2_glock_dq(ghs); out_parent: - gfs2_qa_put(ip); + gfs2_qa_put(dip); gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); return error; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 3a75843ae580..0644e58c6191 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -669,13 +669,13 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct buffer_head *bh = bd->bd_bh; struct gfs2_glock *gl = bd->bd_gl; + sdp->sd_log_num_revoke++; + if (atomic_inc_return(&gl->gl_revokes) == 1) + gfs2_glock_hold(gl); bh->b_private = NULL; bd->bd_blkno = bh->b_blocknr; gfs2_remove_from_ail(bd); /* drops ref on bh */ bd->bd_bh = NULL; - sdp->sd_log_num_revoke++; - if (atomic_inc_return(&gl->gl_revokes) == 1) - gfs2_glock_hold(gl); set_bit(GLF_LFLUSH, &gl->gl_flags); list_add(&bd->bd_list, &sdp->sd_log_revokes); } @@ -1131,6 +1131,10 @@ int gfs2_logd(void *data) while (!kthread_should_stop()) { + if (gfs2_withdrawn(sdp)) { + msleep_interruptible(HZ); + continue; + } /* Check for errors writing to the journal */ if (sdp->sd_log_error) { gfs2_lm(sdp, @@ -1139,6 +1143,7 @@ int gfs2_logd(void *data) "prevent further damage.\n", sdp->sd_fsname, sdp->sd_log_error); gfs2_withdraw(sdp); + continue; } did_flush = false; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 5ea96757afc4..48b54ec1c793 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -263,7 +263,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, struct super_block *sb = sdp->sd_vfs; struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); - bio->bi_iter.bi_sector = blkno << (sb->s_blocksize_bits - 9); + bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift; bio_set_dev(bio, sb->s_bdev); bio->bi_end_io = end_io; bio->bi_private = sdp; @@ -509,7 +509,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, unsigned int bsize = sdp->sd_sb.sb_bsize, off; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; unsigned int shift = PAGE_SHIFT - bsize_shift; - unsigned int readahead_blocks = BIO_MAX_PAGES << shift; + unsigned int max_bio_size = 2 * 1024 * 1024; struct gfs2_journal_extent *je; int sz, ret = 0; struct bio *bio = NULL; @@ -537,12 +537,17 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, off = 0; } - if (!bio || (bio_chained && !off)) { + if (!bio || (bio_chained && !off) || + bio->bi_iter.bi_size >= max_bio_size) { /* start new bio */ } else { - sz = bio_add_page(bio, page, bsize, off); - if (sz == bsize) - goto block_added; + sector_t sector = dblock << sdp->sd_fsb2bb_shift; + + if (bio_end_sector(bio) == sector) { + sz = bio_add_page(bio, page, bsize, off); + if (sz == bsize) + goto block_added; + } if (off) { unsigned int blocks = (PAGE_SIZE - off) >> bsize_shift; @@ -568,7 +573,7 @@ block_added: off += bsize; if (off == PAGE_SIZE) page = NULL; - if (blocks_submitted < blocks_read + readahead_blocks) { + if (blocks_submitted < 2 * max_bio_size >> bsize_shift) { /* Keep at least one bio in flight */ continue; } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 4b72abcf83b2..9856cc2e0795 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -252,7 +252,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int num = 0; if (unlikely(gfs2_withdrawn(sdp)) && - (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) { + (!sdp->sd_jdesc || gl != sdp->sd_jinode_gl)) { *bhp = NULL; return -EIO; } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index cc0c4b5800be..8259fef3f986 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1051,8 +1051,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) u32 x; int error = 0; - if (capable(CAP_SYS_RESOURCE) || - sdp->sd_args.ar_quota != GFS2_QUOTA_ON) + if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; error = gfs2_quota_hold(ip, uid, gid); @@ -1125,7 +1124,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) - goto out; + return; for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { struct gfs2_quota_data *qd; @@ -1162,7 +1161,6 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) qd_unlock(qda[x]); } -out: gfs2_quota_unhold(ip); } @@ -1210,9 +1208,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) return 0; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) - return 0; - for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { qd = ip->i_qadata->qa_qd[x]; @@ -1270,7 +1265,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; - BUG_ON(ip->i_qadata->qa_ref <= 0); + if (gfs2_assert_withdraw(sdp, ip->i_qadata && + ip->i_qadata->qa_ref > 0)) + return; for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { qd = ip->i_qadata->qa_qd[x]; diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 7f9ca8ef40fc..21ada332d555 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -44,7 +44,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, int ret; ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ - if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + if (capable(CAP_SYS_RESOURCE) || + sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 37fc41632aa2..956fced0a8ec 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1404,7 +1404,6 @@ out: if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); gfs2_rs_delete(ip, NULL); - gfs2_qa_put(ip); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 9b64d40ab379..aa087a5675af 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -119,6 +119,12 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) if (!sb_rdonly(sdp->sd_vfs)) ret = gfs2_make_fs_ro(sdp); + if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ + if (!ret) + ret = -EIO; + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + goto skip_recovery; + } /* * Drop the glock for our journal so another node can recover it. */ @@ -159,10 +165,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); } - if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); - goto skip_recovery; - } /* * Dequeue the "live" glock, but keep a reference so it's never freed. */ diff --git a/fs/io_uring.c b/fs/io_uring.c index c687f57fb651..979d9f977409 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -524,6 +524,7 @@ enum { REQ_F_OVERFLOW_BIT, REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, + REQ_F_NO_FILE_TABLE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -577,6 +578,8 @@ enum { REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), /* buffer already selected */ REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + /* doesn't need file table for this request */ + REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), }; struct async_poll { @@ -677,8 +680,6 @@ struct io_op_def { unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; - /* needs req->file assigned IFF fd is >= 0 */ - unsigned fd_non_neg : 1; /* hash wq insertion if file is a regular file */ unsigned hash_reg_file : 1; /* unbound wq insertion if file is a non-regular file */ @@ -781,8 +782,6 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, }, [IORING_OP_OPENAT] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -796,9 +795,8 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_STATX] = { .needs_mm = 1, - .needs_file = 1, - .fd_non_neg = 1, .needs_fs = 1, + .file_table = 1, }, [IORING_OP_READ] = { .needs_mm = 1, @@ -833,8 +831,6 @@ static const struct io_op_def io_op_defs[] = { .buffer_select = 1, }, [IORING_OP_OPENAT2] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -1291,7 +1287,7 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx) struct io_kiocb *req; req = ctx->fallback_req; - if (!test_and_set_bit_lock(0, (unsigned long *) ctx->fallback_req)) + if (!test_and_set_bit_lock(0, (unsigned long *) &ctx->fallback_req)) return req; return NULL; @@ -1378,7 +1374,7 @@ static void __io_free_req(struct io_kiocb *req) if (likely(!io_is_fallback_req(req))) kmem_cache_free(req_cachep, req); else - clear_bit_unlock(0, (unsigned long *) req->ctx->fallback_req); + clear_bit_unlock(0, (unsigned long *) &req->ctx->fallback_req); } struct req_batch { @@ -2034,7 +2030,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd) * any file. For now, just ensure that anything potentially problematic is done * inline. */ -static bool io_file_supports_async(struct file *file) +static bool io_file_supports_async(struct file *file, int rw) { umode_t mode = file_inode(file)->i_mode; @@ -2043,7 +2039,13 @@ static bool io_file_supports_async(struct file *file) if (S_ISREG(mode) && file->f_op != &io_uring_fops) return true; - return false; + if (!(file->f_mode & FMODE_NOWAIT)) + return false; + + if (rw == READ) + return file->f_op->read_iter != NULL; + + return file->f_op->write_iter != NULL; } static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, @@ -2571,7 +2573,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * we know to async punt it even if it was opened O_NONBLOCK */ - if (force_nonblock && !io_file_supports_async(req->file)) + if (force_nonblock && !io_file_supports_async(req->file, READ)) goto copy_iov; iov_count = iov_iter_count(&iter); @@ -2594,7 +2596,8 @@ copy_iov: if (ret) goto out_free; /* any defer here is final, must blocking retry */ - if (!(req->flags & REQ_F_NOWAIT)) + if (!(req->flags & REQ_F_NOWAIT) && + !file_can_poll(req->file)) req->flags |= REQ_F_MUST_PUNT; return -EAGAIN; } @@ -2662,7 +2665,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * we know to async punt it even if it was opened O_NONBLOCK */ - if (force_nonblock && !io_file_supports_async(req->file)) + if (force_nonblock && !io_file_supports_async(req->file, WRITE)) goto copy_iov; /* file path doesn't support NOWAIT for non-direct_IO */ @@ -2716,7 +2719,8 @@ copy_iov: if (ret) goto out_free; /* any defer here is final, must blocking retry */ - req->flags |= REQ_F_MUST_PUNT; + if (!file_can_poll(req->file)) + req->flags |= REQ_F_MUST_PUNT; return -EAGAIN; } } @@ -2756,15 +2760,6 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static bool io_splice_punt(struct file *file) -{ - if (get_pipe_info(file)) - return false; - if (!io_file_supports_async(file)) - return true; - return !(file->f_flags & O_NONBLOCK); -} - static int io_splice(struct io_kiocb *req, bool force_nonblock) { struct io_splice *sp = &req->splice; @@ -2774,11 +2769,8 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock) loff_t *poff_in, *poff_out; long ret; - if (force_nonblock) { - if (io_splice_punt(in) || io_splice_punt(out)) - return -EAGAIN; - flags |= SPLICE_F_NONBLOCK; - } + if (force_nonblock) + return -EAGAIN; poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; @@ -3355,8 +3347,12 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock) struct kstat stat; int ret; - if (force_nonblock) + if (force_nonblock) { + /* only need file table for an actual valid fd */ + if (ctx->dfd == -1 || ctx->dfd == AT_FDCWD) + req->flags |= REQ_F_NO_FILE_TABLE; return -EAGAIN; + } if (vfs_stat_set_lookup_flags(&lookup_flags, ctx->how.flags)) return -EINVAL; @@ -3502,7 +3498,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr) if (io_req_cancelled(req)) return; __io_sync_file_range(req); - io_put_req(req); /* put submission ref */ + io_steal_work(req, workptr); } static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) @@ -5015,7 +5011,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe) int ret; /* Still need defer if there is pending req in defer list. */ - if (!req_need_defer(req) && list_empty(&ctx->defer_list)) + if (!req_need_defer(req) && list_empty_careful(&ctx->defer_list)) return 0; if (!req->io && io_alloc_async_ctx(req)) @@ -5364,15 +5360,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr) io_steal_work(req, workptr); } -static int io_req_needs_file(struct io_kiocb *req, int fd) -{ - if (!io_op_defs[req->opcode].needs_file) - return 0; - if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg) - return 0; - return 1; -} - static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, int index) { @@ -5410,14 +5397,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, } static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, - int fd, unsigned int flags) + int fd) { bool fixed; - if (!io_req_needs_file(req, fd)) - return 0; - - fixed = (flags & IOSQE_FIXED_FILE); + fixed = (req->flags & REQ_F_FIXED_FILE) != 0; if (unlikely(!fixed && req->needs_fixed_file)) return -EBADF; @@ -5429,7 +5413,7 @@ static int io_grab_files(struct io_kiocb *req) int ret = -EBADF; struct io_ring_ctx *ctx = req->ctx; - if (req->work.files) + if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE)) return 0; if (!ctx->ring_file) return -EBADF; @@ -5794,7 +5778,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, struct io_submit_state *state, bool async) { unsigned int sqe_flags; - int id, fd; + int id; /* * All io need record the previous position, if LINK vs DARIN, @@ -5846,8 +5830,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, IOSQE_ASYNC | IOSQE_FIXED_FILE | IOSQE_BUFFER_SELECT | IOSQE_IO_LINK); - fd = READ_ONCE(sqe->fd); - return io_req_set_file(state, req, fd, sqe_flags); + if (!io_op_defs[req->opcode].needs_file) + return 0; + + return io_req_set_file(state, req, READ_ONCE(sqe->fd)); } static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, @@ -7327,7 +7313,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) * it could cause shutdown to hang. */ while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait)) - cpu_relax(); + cond_resched(); io_kill_timeouts(ctx); io_poll_remove_all(ctx); @@ -7356,11 +7342,9 @@ static int io_uring_release(struct inode *inode, struct file *file) static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { - struct io_kiocb *req; - DEFINE_WAIT(wait); - while (!list_empty_careful(&ctx->inflight_list)) { - struct io_kiocb *cancel_req = NULL; + struct io_kiocb *cancel_req = NULL, *req; + DEFINE_WAIT(wait); spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { @@ -7400,6 +7384,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, */ if (refcount_sub_and_test(2, &cancel_req->refs)) { io_put_req(cancel_req); + finish_wait(&ctx->inflight_wait, &wait); continue; } } @@ -7407,8 +7392,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_wq_cancel_work(ctx->io_wq, &cancel_req->work); io_put_req(cancel_req); schedule(); + finish_wait(&ctx->inflight_wait, &wait); } - finish_wait(&ctx->inflight_wait, &wait); } static int io_uring_flush(struct file *file, void *data) @@ -7757,7 +7742,8 @@ err: return ret; } -static int io_uring_create(unsigned entries, struct io_uring_params *p) +static int io_uring_create(unsigned entries, struct io_uring_params *p, + struct io_uring_params __user *params) { struct user_struct *user = NULL; struct io_ring_ctx *ctx; @@ -7849,6 +7835,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); p->cq_off.cqes = offsetof(struct io_rings, cqes); + p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | + IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; + + if (copy_to_user(params, p, sizeof(*p))) { + ret = -EFAULT; + goto err; + } /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup @@ -7857,9 +7851,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret < 0) goto err; - p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | - IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: @@ -7875,7 +7866,6 @@ err: static long io_uring_setup(u32 entries, struct io_uring_params __user *params) { struct io_uring_params p; - long ret; int i; if (copy_from_user(&p, params, sizeof(p))) @@ -7890,14 +7880,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ)) return -EINVAL; - ret = io_uring_create(entries, &p); - if (ret < 0) - return ret; - - if (copy_to_user(params, &p, sizeof(p))) - return -EFAULT; - - return ret; + return io_uring_create(entries, &p, params); } SYSCALL_DEFINE2(io_uring_setup, u32, entries, diff --git a/fs/ioctl.c b/fs/ioctl.c index 282d45be6f45..5e80b40bc1b5 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -55,6 +55,7 @@ EXPORT_SYMBOL(vfs_ioctl); static int ioctl_fibmap(struct file *filp, int __user *p) { struct inode *inode = file_inode(filp); + struct super_block *sb = inode->i_sb; int error, ur_block; sector_t block; @@ -71,6 +72,13 @@ static int ioctl_fibmap(struct file *filp, int __user *p) block = ur_block; error = bmap(inode, &block); + if (block > INT_MAX) { + error = -ERANGE; + pr_warn_ratelimited("[%s/%d] FS: %s File: %pD4 would truncate fibmap result\n", + current->comm, task_pid_nr(current), + sb->s_id, filp); + } + if (error) ur_block = 0; else diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c index bccf305ea9ce..d55e8f491a5e 100644 --- a/fs/iomap/fiemap.c +++ b/fs/iomap/fiemap.c @@ -117,10 +117,7 @@ iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length, if (iomap->type == IOMAP_MAPPED) { addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits; - if (addr > INT_MAX) - WARN(1, "would truncate bmap result\n"); - else - *bno = addr; + *bno = addr; } return 0; } diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index c5c3fc6e6c60..26c94b32d6f4 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -253,37 +253,45 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct posix_acl *alloc = NULL, *dfacl = NULL; + struct posix_acl *orig = acl, *dfacl = NULL, *alloc; int status; if (S_ISDIR(inode->i_mode)) { switch(type) { case ACL_TYPE_ACCESS: - alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT); + alloc = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(alloc)) goto fail; + dfacl = alloc; break; case ACL_TYPE_DEFAULT: - dfacl = acl; - alloc = acl = get_acl(inode, ACL_TYPE_ACCESS); + alloc = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(alloc)) goto fail; + dfacl = acl; + acl = alloc; break; } } if (acl == NULL) { - alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + alloc = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(alloc)) goto fail; + acl = alloc; } status = __nfs3_proc_setacls(inode, acl, dfacl); - posix_acl_release(alloc); +out: + if (acl != orig) + posix_acl_release(acl); + if (dfacl != orig) + posix_acl_release(dfacl); return status; fail: - return PTR_ERR(alloc); + status = PTR_ERR(alloc); + goto out; } const struct xattr_handler *nfs3_xattr_handlers[] = { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 512afb1c7867..a0c1e653a935 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7891,6 +7891,7 @@ static void nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) { struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp; + struct nfs41_bind_conn_to_session_res *res = task->tk_msg.rpc_resp; struct nfs_client *clp = args->client; switch (task->tk_status) { @@ -7899,6 +7900,12 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); } + if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && + res->dir != NFS4_CDFS4_BOTH) { + rpc_task_close_connection(task); + if (args->retries++ < MAX_BIND_CONN_TO_SESSION_RETRIES) + rpc_restart_call(task); + } } static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { @@ -7921,6 +7928,7 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, struct nfs41_bind_conn_to_session_args args = { .client = clp, .dir = NFS4_CDFC4_FORE_OR_BOTH, + .retries = 0, }; struct nfs41_bind_conn_to_session_res res; struct rpc_message msg = { @@ -9191,8 +9199,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return ERR_CAST(task); + status = rpc_wait_for_completion_task(task); if (status != 0) goto out; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b8d78f393365..dd2e14f5875d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1332,13 +1332,15 @@ _pnfs_return_layout(struct inode *ino) !valid_layout) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout segments to return\n", __func__); - goto out_put_layout_hdr; + goto out_wait_layoutreturn; } send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL); spin_unlock(&ino->i_lock); if (send) status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true); +out_wait_layoutreturn: + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); out_put_layout_hdr: pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); @@ -1456,18 +1458,15 @@ retry: /* lo ref dropped in pnfs_roc_release() */ layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode); /* If the creds don't match, we can't compound the layoutreturn */ - if (!layoutreturn) + if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0) goto out_noroc; - if (cred_fscmp(cred, lc_cred) != 0) - goto out_noroc_put_cred; roc = layoutreturn; pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); res->lrs_present = 0; layoutreturn = false; - -out_noroc_put_cred: put_cred(lc_cred); + out_noroc: spin_unlock(&ino->i_lock); rcu_read_unlock(); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 59ef3b13ccca..bdb6d0c2e755 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -185,7 +185,7 @@ static int __nfs_list_for_each_server(struct list_head *head, rcu_read_lock(); list_for_each_entry_rcu(server, head, client_link) { - if (!nfs_sb_active(server->super)) + if (!(server->super && nfs_sb_active(server->super))) continue; rcu_read_unlock(); if (last) diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 8e4f1ace467c..1de77f1a600b 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -275,7 +275,6 @@ static ssize_t dlmfs_file_write(struct file *filp, loff_t *ppos) { int bytes_left; - ssize_t writelen; char *lvb_buf; struct inode *inode = file_inode(filp); @@ -285,32 +284,30 @@ static ssize_t dlmfs_file_write(struct file *filp, if (*ppos >= i_size_read(inode)) return -ENOSPC; + /* don't write past the lvb */ + if (count > i_size_read(inode) - *ppos) + count = i_size_read(inode) - *ppos; + if (!count) return 0; if (!access_ok(buf, count)) return -EFAULT; - /* don't write past the lvb */ - if ((count + *ppos) > i_size_read(inode)) - writelen = i_size_read(inode) - *ppos; - else - writelen = count - *ppos; - - lvb_buf = kmalloc(writelen, GFP_NOFS); + lvb_buf = kmalloc(count, GFP_NOFS); if (!lvb_buf) return -ENOMEM; - bytes_left = copy_from_user(lvb_buf, buf, writelen); - writelen -= bytes_left; - if (writelen) - user_dlm_write_lvb(inode, lvb_buf, writelen); + bytes_left = copy_from_user(lvb_buf, buf, count); + count -= bytes_left; + if (count) + user_dlm_write_lvb(inode, lvb_buf, count); kfree(lvb_buf); - *ppos = *ppos + writelen; - mlog(0, "wrote %zd bytes\n", writelen); - return writelen; + *ppos = *ppos + count; + mlog(0, "wrote %zu bytes\n", count); + return count; } static void dlmfs_init_once(void *foo) diff --git a/fs/pnode.c b/fs/pnode.c index 49f6d7ff2139..1106137c747a 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -261,14 +261,13 @@ static int propagate_one(struct mount *m) child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); + read_seqlock_excl(&mount_lock); mnt_set_mountpoint(m, mp, child); + if (m->mnt_master != dest_master) + SET_MNT_MARK(m->mnt_master); + read_sequnlock_excl(&mount_lock); last_dest = m; last_source = child; - if (m->mnt_master != dest_master) { - read_seqlock_excl(&mount_lock); - SET_MNT_MARK(m->mnt_master); - read_sequnlock_excl(&mount_lock); - } hlist_add_head(&child->mnt_hash, list); return count_mounts(m->mnt_ns, child); } diff --git a/fs/splice.c b/fs/splice.c index 4735defc46ee..fd0a1e7e5959 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1118,6 +1118,10 @@ long do_splice(struct file *in, loff_t __user *off_in, loff_t offset; long ret; + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + ipipe = get_pipe_info(in); opipe = get_pipe_info(out); @@ -1125,12 +1129,6 @@ long do_splice(struct file *in, loff_t __user *off_in, if (off_in || off_out) return -ESPIPE; - if (!(in->f_mode & FMODE_READ)) - return -EBADF; - - if (!(out->f_mode & FMODE_WRITE)) - return -EBADF; - /* Splicing to self would be fun, but... */ if (ipipe == opipe) return -EINVAL; @@ -1153,9 +1151,6 @@ long do_splice(struct file *in, loff_t __user *off_in, offset = out->f_pos; } - if (unlikely(!(out->f_mode & FMODE_WRITE))) - return -EBADF; - if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; @@ -1440,15 +1435,11 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, error = -EBADF; in = fdget(fd_in); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - out = fdget(fd_out); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_splice(in.file, off_in, - out.file, off_out, - len, flags); - fdput(out); - } + out = fdget(fd_out); + if (out.file) { + error = do_splice(in.file, off_in, out.file, off_out, + len, flags); + fdput(out); } fdput(in); } @@ -1770,6 +1761,10 @@ static long do_tee(struct file *in, struct file *out, size_t len, struct pipe_inode_info *opipe = get_pipe_info(out); int ret = -EINVAL; + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + /* * Duplicate the contents of ipipe to opipe without actually * copying the data. @@ -1795,7 +1790,7 @@ static long do_tee(struct file *in, struct file *out, size_t len, SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct fd in; + struct fd in, out; int error; if (unlikely(flags & ~SPLICE_F_ALL)) @@ -1807,14 +1802,10 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) error = -EBADF; in = fdget(fdin); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - struct fd out = fdget(fdout); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_tee(in.file, out.file, - len, flags); - fdput(out); - } + out = fdget(fdout); + if (out.file) { + error = do_tee(in.file, out.file, len, flags); + fdput(out); } fdput(in); } diff --git a/fs/super.c b/fs/super.c index cd352530eca9..a288cd60d2ae 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1302,8 +1302,8 @@ int get_tree_bdev(struct fs_context *fc, mutex_lock(&bdev->bd_fsfreeze_mutex); if (bdev->bd_fsfreeze_count > 0) { mutex_unlock(&bdev->bd_fsfreeze_mutex); - blkdev_put(bdev, mode); warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); + blkdev_put(bdev, mode); return -EBUSY; } diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 675e26989376..8fe03b4a0d2b 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_free; } - err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id); if (err) goto fail_free; |