diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-13 10:21:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-13 10:21:37 -0700 |
commit | 524d0c68826bc1adf9d1946e540eb4f7b16699a7 (patch) | |
tree | 2d2fcf87c05c43bf613da64b47b660e0759185b8 | |
parent | 66b834558537c623135241e1ea9ddf11b31596e4 (diff) | |
parent | 71cf0c1c4f9f8e42c84ca53a5ca7091e4eea7f6a (diff) |
Merge tag 'ceph-for-6.1-rc1' of https://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov:
"A quiet round this time: several assorted filesystem fixes, the most
noteworthy one being some additional wakeups in cap handling code, and
a messenger cleanup"
* tag 'ceph-for-6.1-rc1' of https://github.com/ceph/ceph-client:
ceph: remove Sage's git tree from documentation
ceph: fix incorrectly showing the .snap size for stat
ceph: fail the open_by_handle_at() if the dentry is being unlinked
ceph: increment i_version when doing a setattr with caps
ceph: Use kcalloc for allocating multiple elements
ceph: no need to wait for transition RDCACHE|RD -> RD
ceph: fail the request if the peer MDS doesn't support getvxattr op
ceph: wake up the waiters if any new caps comes
libceph: drop last_piece flag from ceph_msg_data_cursor
-rw-r--r-- | Documentation/filesystems/ceph.rst | 1 | ||||
-rw-r--r-- | fs/ceph/caps.c | 14 | ||||
-rw-r--r-- | fs/ceph/export.c | 3 | ||||
-rw-r--r-- | fs/ceph/inode.c | 29 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 11 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 6 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 4 | ||||
-rw-r--r-- | net/ceph/messenger.c | 40 | ||||
-rw-r--r-- | net/ceph/messenger_v1.c | 6 | ||||
-rw-r--r-- | net/ceph/messenger_v2.c | 2 |
10 files changed, 64 insertions, 52 deletions
diff --git a/Documentation/filesystems/ceph.rst b/Documentation/filesystems/ceph.rst index 4942e018db85..76ce938e7024 100644 --- a/Documentation/filesystems/ceph.rst +++ b/Documentation/filesystems/ceph.rst @@ -203,7 +203,6 @@ For more information on Ceph, see the home page at The Linux kernel client source tree is available at - https://github.com/ceph/ceph-client.git - - git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git and the source for the full system is at https://github.com/ceph/ceph.git diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 53cfe026b3ea..fb023f9fafcb 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -754,6 +754,7 @@ void ceph_add_cap(struct inode *inode, cap->issue_seq = seq; cap->mseq = mseq; cap->cap_gen = gen; + wake_up_all(&ci->i_cap_wq); } /* @@ -2285,7 +2286,7 @@ retry: struct ceph_mds_request *req; int i; - sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL); + sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); if (!sessions) { err = -ENOMEM; goto out; @@ -2759,13 +2760,17 @@ again: * on transition from wanted -> needed caps. This is needed * for WRBUFFER|WR -> WR to avoid a new WR sync write from * going before a prior buffered writeback happens. + * + * For RDCACHE|RD -> RD, there is not need to wait and we can + * just exclude the revoking caps and force to sync read. */ int not = want & ~(have & need); int revoking = implemented & ~have; + int exclude = revoking & not; dout("get_cap_refs %p have %s but not %s (revoking %s)\n", inode, ceph_cap_string(have), ceph_cap_string(not), ceph_cap_string(revoking)); - if ((revoking & not) == 0) { + if (!exclude || !(exclude & CEPH_CAP_FILE_BUFFER)) { if (!snap_rwsem_locked && !ci->i_head_snapc && (need & CEPH_CAP_FILE_WR)) { @@ -2787,7 +2792,7 @@ again: snap_rwsem_locked = true; } if ((have & want) == want) - *got = need | want; + *got = need | (want & ~exclude); else *got = need; ceph_take_cap_refs(ci, *got, true); @@ -3550,6 +3555,9 @@ static void handle_cap_grant(struct inode *inode, check_caps = 1; /* check auth cap only */ else check_caps = 2; /* check all caps */ + /* If there is new caps, try to wake up the waiters */ + if (~cap->issued & newcaps) + wake = true; cap->issued = newcaps; cap->implemented |= newcaps; } else if (cap->issued == newcaps) { diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e0fa66ac8b9f..f780e4e0d062 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -181,6 +181,7 @@ struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino) static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) { struct inode *inode = __lookup_inode(sb, ino); + struct ceph_inode_info *ci = ceph_inode(inode); int err; if (IS_ERR(inode)) @@ -192,7 +193,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) return ERR_PTR(err); } /* -ESTALE if inode as been unlinked and no file is open */ - if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) { + if ((inode->i_nlink == 0) && !__ceph_is_file_opened(ci)) { iput(inode); return ERR_PTR(-ESTALE); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 42351d7a0dd6..9ebb7cee7978 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2192,6 +2192,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, &prealloc_cf); inode->i_ctime = attr->ia_ctime; + inode_inc_iversion_raw(inode); } release &= issued; @@ -2356,6 +2357,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, goto out; } + req->r_feature_needed = CEPHFS_FEATURE_OP_GETVXATTR; req->r_path2 = kstrdup(name, GFP_NOFS); if (!req->r_path2) { err = -ENOMEM; @@ -2447,6 +2449,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct inode *inode = d_inode(path->dentry); + struct super_block *sb = inode->i_sb; struct ceph_inode_info *ci = ceph_inode(inode); u32 valid_mask = STATX_BASIC_STATS; int err = 0; @@ -2476,16 +2479,34 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, } if (ceph_snap(inode) == CEPH_NOSNAP) - stat->dev = inode->i_sb->s_dev; + stat->dev = sb->s_dev; else stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; if (S_ISDIR(inode->i_mode)) { - if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), - RBYTES)) + if (ceph_test_mount_opt(ceph_sb_to_client(sb), RBYTES)) { stat->size = ci->i_rbytes; - else + } else if (ceph_snap(inode) == CEPH_SNAPDIR) { + struct ceph_inode_info *pci; + struct ceph_snap_realm *realm; + struct inode *parent; + + parent = ceph_lookup_inode(sb, ceph_ino(inode)); + if (!parent) + return PTR_ERR(parent); + + pci = ceph_inode(parent); + spin_lock(&pci->i_ceph_lock); + realm = pci->i_snap_realm; + if (realm) + stat->size = realm->num_snaps; + else + stat->size = 0; + spin_unlock(&pci->i_ceph_lock); + iput(parent); + } else { stat->size = ci->i_files + ci->i_subdirs; + } stat->blocks = 0; stat->blksize = 65536; /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 80f8b9ec1a31..26a0a8b9975e 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2318,6 +2318,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) INIT_LIST_HEAD(&req->r_unsafe_dir_item); INIT_LIST_HEAD(&req->r_unsafe_target_item); req->r_fmode = -1; + req->r_feature_needed = -1; kref_init(&req->r_kref); RB_CLEAR_NODE(&req->r_node); INIT_LIST_HEAD(&req->r_wait); @@ -2916,6 +2917,16 @@ static void __do_request(struct ceph_mds_client *mdsc, dout("do_request mds%d session %p state %s\n", mds, session, ceph_session_state_name(session->s_state)); + + /* + * The old ceph will crash the MDSs when see unknown OPs + */ + if (req->r_feature_needed > 0 && + !test_bit(req->r_feature_needed, &session->s_features)) { + err = -EOPNOTSUPP; + goto out_session; + } + if (session->s_state != CEPH_MDS_SESSION_OPEN && session->s_state != CEPH_MDS_SESSION_HUNG) { /* diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 256e3eada6c1..0598faa50e2e 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -31,8 +31,9 @@ enum ceph_feature_type { CEPHFS_FEATURE_METRIC_COLLECT, CEPHFS_FEATURE_ALTERNATE_NAME, CEPHFS_FEATURE_NOTIFY_SESSION_STATE, + CEPHFS_FEATURE_OP_GETVXATTR, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_NOTIFY_SESSION_STATE, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_OP_GETVXATTR, }; #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ @@ -44,6 +45,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_DELEG_INO, \ CEPHFS_FEATURE_METRIC_COLLECT, \ CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \ + CEPHFS_FEATURE_OP_GETVXATTR, \ } /* @@ -336,6 +338,8 @@ struct ceph_mds_request { long long r_dir_ordered_cnt; int r_readdir_cache_idx; + int r_feature_needed; + struct ceph_cap_reservation r_caps_reservation; }; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e7f2fb2fc207..99c1726be6ee 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -207,7 +207,6 @@ struct ceph_msg_data_cursor { struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ - bool last_piece; /* current is last piece */ bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK @@ -498,8 +497,7 @@ void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq); void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, struct ceph_msg *msg, size_t length); struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, - size_t *page_offset, size_t *length, - bool *last_piece); + size_t *page_offset, size_t *length); void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes); u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset, diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d3bb656308b4..dfa237fbd5a3 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -728,7 +728,6 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor, it->iter.bi_size = cursor->resid; BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); - cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); } static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, @@ -754,10 +753,8 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, cursor->resid -= bytes; bio_advance_iter(it->bio, &it->iter, bytes); - if (!cursor->resid) { - BUG_ON(!cursor->last_piece); + if (!cursor->resid) return false; /* no more data */ - } if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done && page == bio_iter_page(it->bio, it->iter))) @@ -770,9 +767,7 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, it->iter.bi_size = cursor->resid; } - BUG_ON(cursor->last_piece); BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); - cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); return true; } #endif /* CONFIG_BLOCK */ @@ -788,8 +783,6 @@ static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor, cursor->bvec_iter.bi_size = cursor->resid; BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); - cursor->last_piece = - cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); } static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor, @@ -815,19 +808,14 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, cursor->resid -= bytes; bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes); - if (!cursor->resid) { - BUG_ON(!cursor->last_piece); + if (!cursor->resid) return false; /* no more data */ - } if (!bytes || (cursor->bvec_iter.bi_bvec_done && page == bvec_iter_page(bvecs, cursor->bvec_iter))) return false; /* more bytes to process in this segment */ - BUG_ON(cursor->last_piece); BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); - cursor->last_piece = - cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); return true; } @@ -853,7 +841,6 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor, BUG_ON(page_count > (int)USHRT_MAX); cursor->page_count = (unsigned short)page_count; BUG_ON(length > SIZE_MAX - cursor->page_offset); - cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE; } static struct page * @@ -868,11 +855,7 @@ ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor, BUG_ON(cursor->page_offset >= PAGE_SIZE); *page_offset = cursor->page_offset; - if (cursor->last_piece) - *length = cursor->resid; - else - *length = PAGE_SIZE - *page_offset; - + *length = min_t(size_t, cursor->resid, PAGE_SIZE - *page_offset); return data->pages[cursor->page_index]; } @@ -897,8 +880,6 @@ static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor, BUG_ON(cursor->page_index >= cursor->page_count); cursor->page_index++; - cursor->last_piece = cursor->resid <= PAGE_SIZE; - return true; } @@ -928,7 +909,6 @@ ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor, cursor->resid = min(length, pagelist->length); cursor->page = page; cursor->offset = 0; - cursor->last_piece = cursor->resid <= PAGE_SIZE; } static struct page * @@ -948,11 +928,7 @@ ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor, /* offset of first page in pagelist is always 0 */ *page_offset = cursor->offset & ~PAGE_MASK; - if (cursor->last_piece) - *length = cursor->resid; - else - *length = PAGE_SIZE - *page_offset; - + *length = min_t(size_t, cursor->resid, PAGE_SIZE - *page_offset); return cursor->page; } @@ -985,8 +961,6 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); cursor->page = list_next_entry(cursor->page, lru); - cursor->last_piece = cursor->resid <= PAGE_SIZE; - return true; } @@ -1044,8 +1018,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, * Indicate whether this is the last piece in this data item. */ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, - size_t *page_offset, size_t *length, - bool *last_piece) + size_t *page_offset, size_t *length) { struct page *page; @@ -1074,8 +1047,6 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, BUG_ON(*page_offset + *length > PAGE_SIZE); BUG_ON(!*length); BUG_ON(*length > cursor->resid); - if (last_piece) - *last_piece = cursor->last_piece; return page; } @@ -1112,7 +1083,6 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) cursor->total_resid -= bytes; if (!cursor->resid && cursor->total_resid) { - WARN_ON(!cursor->last_piece); cursor->data++; __ceph_msg_data_cursor_init(cursor); new_piece = true; diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 6b014eca3a13..3ddbde87e4d6 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -495,7 +495,7 @@ static int write_partial_message_data(struct ceph_connection *con) continue; } - page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); + page = ceph_msg_data_next(cursor, &page_offset, &length); if (length == cursor->total_resid) more = MSG_MORE; ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, @@ -1008,7 +1008,7 @@ static int read_partial_msg_data(struct ceph_connection *con) continue; } - page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); + page = ceph_msg_data_next(cursor, &page_offset, &length); ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); if (ret <= 0) { if (do_datacrc) @@ -1050,7 +1050,7 @@ static int read_partial_msg_data_bounce(struct ceph_connection *con) continue; } - page = ceph_msg_data_next(cursor, &off, &len, NULL); + page = ceph_msg_data_next(cursor, &off, &len); ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); if (ret <= 0) { con->in_data_crc = crc; diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c6e5bfc717d5..cc8ff81a50b7 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -862,7 +862,7 @@ static void get_bvec_at(struct ceph_msg_data_cursor *cursor, ceph_msg_data_advance(cursor, 0); /* get a piece of data, cursor isn't advanced */ - page = ceph_msg_data_next(cursor, &off, &len, NULL); + page = ceph_msg_data_next(cursor, &off, &len); bv->bv_page = page; bv->bv_offset = off; |