diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2019-04-10 14:43:44 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2019-05-01 22:37:39 -0400 |
commit | fdb0da89f4ba0c74d7d3b9e6f471e96a5766820b (patch) | |
tree | 3c82eb1fe04568315a76d03ec86617e206b12a41 | |
parent | ad7999cd701e4e058765d35cf5274ee16801e986 (diff) |
new inode method: ->free_inode()
A lot of ->destroy_inode() instances end with call_rcu() of a callback
that does RCU-delayed part of freeing. Introduce a new method for
doing just that, with saner signature.
Rules:
->destroy_inode ->free_inode
f g immediate call of f(),
RCU-delayed call of g()
f NULL immediate call of f(),
no RCU-delayed calls
NULL g RCU-delayed call of g()
NULL NULL RCU-delayed default freeing
IOW, NULL ->free_inode gives the same behaviour as now.
Note that NULL, NULL is equivalent to NULL, free_inode_nonrcu; we could
mandate the latter form, but that would have very little benefit beyond
making rules a bit more symmetric. It would break backwards compatibility,
require extra boilerplate and expected semantics for (NULL, NULL) pair
would have no use whatsoever...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | Documentation/filesystems/Locking | 2 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 25 | ||||
-rw-r--r-- | fs/inode.c | 56 | ||||
-rw-r--r-- | include/linux/fs.h | 6 |
4 files changed, 66 insertions, 23 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index efea228ccd8a..7b20c385cc02 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -118,6 +118,7 @@ set: exclusive --------------------------- super_operations --------------------------- prototypes: struct inode *(*alloc_inode)(struct super_block *sb); + void (*free_inode)(struct inode *); void (*destroy_inode)(struct inode *); void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); @@ -139,6 +140,7 @@ locking rules: All may block [not true, see below] s_umount alloc_inode: +free_inode: called from RCU callback destroy_inode: dirty_inode: write_inode: diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index cf43bc4dbf31..b8d3ddd8b8db 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -638,3 +638,28 @@ in your dentry operations instead. inode to d_splice_alias() will also do the right thing (equivalent of d_add(dentry, NULL); return NULL;), so that kind of special cases also doesn't need a separate treatment. +-- +[strongly recommended] + take the RCU-delayed parts of ->destroy_inode() into a new method - + ->free_inode(). If ->destroy_inode() becomes empty - all the better, + just get rid of it. Synchronous work (e.g. the stuff that can't + be done from an RCU callback, or any WARN_ON() where we want the + stack trace) *might* be movable to ->evict_inode(); however, + that goes only for the things that are not needed to balance something + done by ->alloc_inode(). IOW, if it's cleaning up the stuff that + might have accumulated over the life of in-core inode, ->evict_inode() + might be a fit. + + Rules for inode destruction: + * if ->destroy_inode() is non-NULL, it gets called + * if ->free_inode() is non-NULL, it gets scheduled by call_rcu() + * combination of NULL ->destroy_inode and NULL ->free_inode is + treated as NULL/free_inode_nonrcu, to preserve the compatibility. + + Note that the callback (be it via ->free_inode() or explicit call_rcu() + in ->destroy_inode()) is *NOT* ordered wrt superblock destruction; + as the matter of fact, the superblock and all associated structures + might be already gone. The filesystem driver is guaranteed to be still + there, but that's it. Freeing memory in the callback is fine; doing + more than that is possible, but requires a lot of care and is best + avoided. diff --git a/fs/inode.c b/fs/inode.c index e9d97add2b36..627e1766503a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -202,12 +202,28 @@ out: } EXPORT_SYMBOL(inode_init_always); +void free_inode_nonrcu(struct inode *inode) +{ + kmem_cache_free(inode_cachep, inode); +} +EXPORT_SYMBOL(free_inode_nonrcu); + +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + if (inode->free_inode) + inode->free_inode(inode); + else + free_inode_nonrcu(inode); +} + static struct inode *alloc_inode(struct super_block *sb) { + const struct super_operations *ops = sb->s_op; struct inode *inode; - if (sb->s_op->alloc_inode) - inode = sb->s_op->alloc_inode(sb); + if (ops->alloc_inode) + inode = ops->alloc_inode(sb); else inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); @@ -215,22 +231,19 @@ static struct inode *alloc_inode(struct super_block *sb) return NULL; if (unlikely(inode_init_always(sb, inode))) { - if (inode->i_sb->s_op->destroy_inode) - inode->i_sb->s_op->destroy_inode(inode); - else - kmem_cache_free(inode_cachep, inode); + if (ops->destroy_inode) { + ops->destroy_inode(inode); + if (!ops->free_inode) + return NULL; + } + inode->free_inode = ops->free_inode; + i_callback(&inode->i_rcu); return NULL; } return inode; } -void free_inode_nonrcu(struct inode *inode) -{ - kmem_cache_free(inode_cachep, inode); -} -EXPORT_SYMBOL(free_inode_nonrcu); - void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); @@ -253,20 +266,19 @@ void __destroy_inode(struct inode *inode) } EXPORT_SYMBOL(__destroy_inode); -static void i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(inode_cachep, inode); -} - static void destroy_inode(struct inode *inode) { + const struct super_operations *ops = inode->i_sb->s_op; + BUG_ON(!list_empty(&inode->i_lru)); __destroy_inode(inode); - if (inode->i_sb->s_op->destroy_inode) - inode->i_sb->s_op->destroy_inode(inode); - else - call_rcu(&inode->i_rcu, i_callback); + if (ops->destroy_inode) { + ops->destroy_inode(inode); + if (!ops->free_inode) + return; + } + inode->free_inode = ops->free_inode; + call_rcu(&inode->i_rcu, i_callback); } /** diff --git a/include/linux/fs.h b/include/linux/fs.h index dd28e7679089..92732286b748 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -694,7 +694,10 @@ struct inode { #ifdef CONFIG_IMA atomic_t i_readcount; /* struct files open RO */ #endif - const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ + union { + const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ + void (*free_inode)(struct inode *); + }; struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; @@ -1903,6 +1906,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); + void (*free_inode)(struct inode *); void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); |