summaryrefslogtreecommitdiff
path: root/fs/super.c
diff options
context:
space:
mode:
authorAlex Elder <aelder@sgi.com>2011-08-08 07:06:24 -0500
committerAlex Elder <aelder@sgi.com>2011-08-08 07:06:24 -0500
commit2ddb4e94065470828e131351566102274ea9e83f (patch)
tree3eb237d28e10d7735d57c051880e8173113acef8 /fs/super.c
parentc35a549c8b9e85bdff7e531a410d10e36b4b4f32 (diff)
parent322a8b034003c0d46d39af85bf24fee27b902f48 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux
Diffstat (limited to 'fs/super.c')
-rw-r--r--fs/super.c182
1 files changed, 167 insertions, 15 deletions
diff --git a/fs/super.c b/fs/super.c
index c75593953c52..3f56a269a4f4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,6 +38,69 @@
LIST_HEAD(super_blocks);
DEFINE_SPINLOCK(sb_lock);
+/*
+ * One thing we have to be careful of with a per-sb shrinker is that we don't
+ * drop the last active reference to the superblock from within the shrinker.
+ * If that happens we could trigger unregistering the shrinker from within the
+ * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
+ * take a passive reference to the superblock to avoid this from occurring.
+ */
+static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct super_block *sb;
+ int fs_objects = 0;
+ int total_objects;
+
+ sb = container_of(shrink, struct super_block, s_shrink);
+
+ /*
+ * Deadlock avoidance. We may hold various FS locks, and we don't want
+ * to recurse into the FS that called us in clear_inode() and friends..
+ */
+ if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
+ return -1;
+
+ if (!grab_super_passive(sb))
+ return -1;
+
+ if (sb->s_op && sb->s_op->nr_cached_objects)
+ fs_objects = sb->s_op->nr_cached_objects(sb);
+
+ total_objects = sb->s_nr_dentry_unused +
+ sb->s_nr_inodes_unused + fs_objects + 1;
+
+ if (sc->nr_to_scan) {
+ int dentries;
+ int inodes;
+
+ /* proportion the scan between the caches */
+ dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) /
+ total_objects;
+ inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) /
+ total_objects;
+ if (fs_objects)
+ fs_objects = (sc->nr_to_scan * fs_objects) /
+ total_objects;
+ /*
+ * prune the dcache first as the icache is pinned by it, then
+ * prune the icache, followed by the filesystem specific caches
+ */
+ prune_dcache_sb(sb, dentries);
+ prune_icache_sb(sb, inodes);
+
+ if (fs_objects && sb->s_op->free_cached_objects) {
+ sb->s_op->free_cached_objects(sb, fs_objects);
+ fs_objects = sb->s_op->nr_cached_objects(sb);
+ }
+ total_objects = sb->s_nr_dentry_unused +
+ sb->s_nr_inodes_unused + fs_objects;
+ }
+
+ total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure;
+ drop_super(sb);
+ return total_objects;
+}
+
/**
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
@@ -77,6 +140,8 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_HLIST_BL_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
+ INIT_LIST_HEAD(&s->s_inode_lru);
+ spin_lock_init(&s->s_inode_lru_lock);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -114,6 +179,10 @@ static struct super_block *alloc_super(struct file_system_type *type)
s->s_op = &default_op;
s->s_time_gran = 1000000000;
s->cleancache_poolid = -1;
+
+ s->s_shrink.seeks = DEFAULT_SEEKS;
+ s->s_shrink.shrink = prune_super;
+ s->s_shrink.batch = 1024;
}
out:
return s;
@@ -181,6 +250,10 @@ void deactivate_locked_super(struct super_block *s)
if (atomic_dec_and_test(&s->s_active)) {
cleancache_flush_fs(s);
fs->kill_sb(s);
+
+ /* caches are now gone, we can safely kill the shrinker now */
+ unregister_shrinker(&s->s_shrink);
+
/*
* We need to call rcu_barrier so all the delayed rcu free
* inodes are flushed before we release the fs module.
@@ -241,17 +314,48 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
}
/*
+ * grab_super_passive - acquire a passive reference
+ * @s: reference we are trying to grab
+ *
+ * Tries to acquire a passive reference. This is used in places where we
+ * cannot take an active reference but we need to ensure that the
+ * superblock does not go away while we are working on it. It returns
+ * false if a reference was not gained, and returns true with the s_umount
+ * lock held in read mode if a reference is gained. On successful return,
+ * the caller must drop the s_umount lock and the passive reference when
+ * done.
+ */
+bool grab_super_passive(struct super_block *sb)
+{
+ spin_lock(&sb_lock);
+ if (list_empty(&sb->s_instances)) {
+ spin_unlock(&sb_lock);
+ return false;
+ }
+
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+
+ if (down_read_trylock(&sb->s_umount)) {
+ if (sb->s_root)
+ return true;
+ up_read(&sb->s_umount);
+ }
+
+ put_super(sb);
+ return false;
+}
+
+/*
* Superblock locking. We really ought to get rid of these two.
*/
void lock_super(struct super_block * sb)
{
- get_fs_excl();
mutex_lock(&sb->s_lock);
}
void unlock_super(struct super_block * sb)
{
- put_fs_excl();
mutex_unlock(&sb->s_lock);
}
@@ -276,11 +380,9 @@ void generic_shutdown_super(struct super_block *sb)
{
const struct super_operations *sop = sb->s_op;
-
if (sb->s_root) {
shrink_dcache_for_umount(sb);
sync_filesystem(sb);
- get_fs_excl();
sb->s_flags &= ~MS_ACTIVE;
fsnotify_unmount_inodes(&sb->s_inodes);
@@ -295,7 +397,6 @@ void generic_shutdown_super(struct super_block *sb)
"Self-destruct in 5 seconds. Have a nice day...\n",
sb->s_id);
}
- put_fs_excl();
}
spin_lock(&sb_lock);
/* should be initialized for __put_super_and_need_restart() */
@@ -364,6 +465,7 @@ retry:
list_add(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
+ register_shrinker(&s->s_shrink);
return s;
}
@@ -452,6 +554,42 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
}
/**
+ * iterate_supers_type - call function for superblocks of given type
+ * @type: fs type
+ * @f: function to call
+ * @arg: argument to pass to it
+ *
+ * Scans the superblock list and calls given function, passing it
+ * locked superblock and given argument.
+ */
+void iterate_supers_type(struct file_system_type *type,
+ void (*f)(struct super_block *, void *), void *arg)
+{
+ struct super_block *sb, *p = NULL;
+
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &type->fs_supers, s_instances) {
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+
+ down_read(&sb->s_umount);
+ if (sb->s_root)
+ f(sb, arg);
+ up_read(&sb->s_umount);
+
+ spin_lock(&sb_lock);
+ if (p)
+ __put_super(p);
+ p = sb;
+ }
+ if (p)
+ __put_super(p);
+ spin_unlock(&sb_lock);
+}
+
+EXPORT_SYMBOL(iterate_supers_type);
+
+/**
* get_super - get the superblock of a device
* @bdev: device to get the superblock for
*
@@ -657,7 +795,7 @@ static DEFINE_IDA(unnamed_dev_ida);
static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
static int unnamed_dev_start = 0; /* don't bother trying below it */
-int set_anon_super(struct super_block *s, void *data)
+int get_anon_bdev(dev_t *p)
{
int dev;
int error;
@@ -684,24 +822,38 @@ int set_anon_super(struct super_block *s, void *data)
spin_unlock(&unnamed_dev_lock);
return -EMFILE;
}
- s->s_dev = MKDEV(0, dev & MINORMASK);
- s->s_bdi = &noop_backing_dev_info;
+ *p = MKDEV(0, dev & MINORMASK);
return 0;
}
+EXPORT_SYMBOL(get_anon_bdev);
-EXPORT_SYMBOL(set_anon_super);
-
-void kill_anon_super(struct super_block *sb)
+void free_anon_bdev(dev_t dev)
{
- int slot = MINOR(sb->s_dev);
-
- generic_shutdown_super(sb);
+ int slot = MINOR(dev);
spin_lock(&unnamed_dev_lock);
ida_remove(&unnamed_dev_ida, slot);
if (slot < unnamed_dev_start)
unnamed_dev_start = slot;
spin_unlock(&unnamed_dev_lock);
}
+EXPORT_SYMBOL(free_anon_bdev);
+
+int set_anon_super(struct super_block *s, void *data)
+{
+ int error = get_anon_bdev(&s->s_dev);
+ if (!error)
+ s->s_bdi = &noop_backing_dev_info;
+ return error;
+}
+
+EXPORT_SYMBOL(set_anon_super);
+
+void kill_anon_super(struct super_block *sb)
+{
+ dev_t dev = sb->s_dev;
+ generic_shutdown_super(sb);
+ free_anon_bdev(dev);
+}
EXPORT_SYMBOL(kill_anon_super);
@@ -822,7 +974,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
} else {
char b[BDEVNAME_SIZE];
- s->s_flags = flags;
+ s->s_flags = flags | MS_NOSEC;
s->s_mode = mode;
strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
sb_set_blocksize(s, block_size(bdev));