From d6cb125b9983e1ea9444f794b2d3ed5e3ad737b7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 24 Dec 2014 22:47:00 -0500 Subject: kill d_validate() no users left Signed-off-by: Al Viro --- include/linux/dcache.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5a813988e6d4..92c08cf7670e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -319,9 +319,6 @@ static inline unsigned d_count(const struct dentry *dentry) return dentry->d_lockref.count; } -/* validate "insecure" dentry pointer */ -extern int d_validate(struct dentry *, struct dentry *); - /* * helper function for dentry_operations.d_dname() members */ -- cgit v1.2.3-58-ga151 From 9e251d02041432487d89cb340e72490c4bbc198a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Jan 2015 20:40:02 -0500 Subject: kill pin_put() Signed-off-by: Al Viro --- fs/fs_pin.c | 11 ----------- include/linux/fs_pin.h | 1 - kernel/acct.c | 14 ++++++++++---- 3 files changed, 10 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 9368236ca100..f173313760b8 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -4,19 +4,8 @@ #include "internal.h" #include "mount.h" -static void pin_free_rcu(struct rcu_head *head) -{ - kfree(container_of(head, struct fs_pin, rcu)); -} - static DEFINE_SPINLOCK(pin_lock); -void pin_put(struct fs_pin *p) -{ - if (atomic_long_dec_and_test(&p->count)) - call_rcu(&p->rcu, pin_free_rcu); -} - void pin_remove(struct fs_pin *pin) { spin_lock(&pin_lock); diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index f66525e72ccf..68a54b7741aa 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -12,6 +12,5 @@ struct fs_pin { void (*kill)(struct fs_pin *); }; -void pin_put(struct fs_pin *); void pin_remove(struct fs_pin *); void pin_insert(struct fs_pin *, struct vfsmount *); diff --git a/kernel/acct.c b/kernel/acct.c index 33738ef972f3..7bb9e659a7da 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -124,6 +124,12 @@ out: return acct->active; } +static void acct_put(struct bsd_acct_struct *p) +{ + if (atomic_long_dec_and_test(&p->pin.count)) + kfree_rcu(p, pin.rcu); +} + static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { struct bsd_acct_struct *res; @@ -144,7 +150,7 @@ again: mutex_lock(&res->lock); if (!res->ns) { mutex_unlock(&res->lock); - pin_put(&res->pin); + acct_put(res); goto again; } return res; @@ -175,7 +181,7 @@ static void acct_kill(struct bsd_acct_struct *acct, acct->ns = NULL; atomic_long_dec(&acct->pin.count); mutex_unlock(&acct->lock); - pin_put(&acct->pin); + acct_put(acct); } } @@ -186,7 +192,7 @@ static void acct_pin_kill(struct fs_pin *pin) mutex_lock(&acct->lock); if (!acct->ns) { mutex_unlock(&acct->lock); - pin_put(pin); + acct_put(acct); acct = NULL; } acct_kill(acct, NULL); @@ -576,7 +582,7 @@ static void slow_acct_process(struct pid_namespace *ns) if (acct) { do_acct_process(acct); mutex_unlock(&acct->lock); - pin_put(&acct->pin); + acct_put(acct); } } } -- cgit v1.2.3-58-ga151 From 360f54796ed65939093ae373b92ebd5ef3341776 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Jan 2015 15:19:03 -0800 Subject: dcache: let the dentry count go down to zero without taking d_lock We can be more aggressive about this, if we are clever and careful. This is subtle. Signed-off-by: Linus Torvalds Signed-off-by: Al Viro --- fs/dcache.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/lockref.h | 3 +- lib/lockref.c | 36 +++++++++++---- 3 files changed, 144 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 40432e59d72e..a14d00e9839e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -508,7 +508,7 @@ static void __dentry_kill(struct dentry *dentry) * dentry_iput drops the locks, at which point nobody (except * transient RCU lookups) can reach this dentry. */ - BUG_ON((int)dentry->d_lockref.count > 0); + BUG_ON(dentry->d_lockref.count > 0); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -561,7 +561,7 @@ static inline struct dentry *lock_parent(struct dentry *dentry) struct dentry *parent = dentry->d_parent; if (IS_ROOT(dentry)) return NULL; - if (unlikely((int)dentry->d_lockref.count < 0)) + if (unlikely(dentry->d_lockref.count < 0)) return NULL; if (likely(spin_trylock(&parent->d_lock))) return parent; @@ -590,6 +590,110 @@ again: return parent; } +/* + * Try to do a lockless dput(), and return whether that was successful. + * + * If unsuccessful, we return false, having already taken the dentry lock. + * + * The caller needs to hold the RCU read lock, so that the dentry is + * guaranteed to stay around even if the refcount goes down to zero! + */ +static inline bool fast_dput(struct dentry *dentry) +{ + int ret; + unsigned int d_flags; + + /* + * If we have a d_op->d_delete() operation, we sould not + * let the dentry count go to zero, so use "put__or_lock". + */ + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) + return lockref_put_or_lock(&dentry->d_lockref); + + /* + * .. otherwise, we can try to just decrement the + * lockref optimistically. + */ + ret = lockref_put_return(&dentry->d_lockref); + + /* + * If the lockref_put_return() failed due to the lock being held + * by somebody else, the fast path has failed. We will need to + * get the lock, and then check the count again. + */ + if (unlikely(ret < 0)) { + spin_lock(&dentry->d_lock); + if (dentry->d_lockref.count > 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + return 1; + } + return 0; + } + + /* + * If we weren't the last ref, we're done. + */ + if (ret) + return 1; + + /* + * Careful, careful. The reference count went down + * to zero, but we don't hold the dentry lock, so + * somebody else could get it again, and do another + * dput(), and we need to not race with that. + * + * However, there is a very special and common case + * where we don't care, because there is nothing to + * do: the dentry is still hashed, it does not have + * a 'delete' op, and it's referenced and already on + * the LRU list. + * + * NOTE! Since we aren't locked, these values are + * not "stable". However, it is sufficient that at + * some point after we dropped the reference the + * dentry was hashed and the flags had the proper + * value. Other dentry users may have re-gotten + * a reference to the dentry and change that, but + * our work is done - we can leave the dentry + * around with a zero refcount. + */ + smp_rmb(); + d_flags = ACCESS_ONCE(dentry->d_flags); + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + + /* Nothing to do? Dropping the reference was all we needed? */ + if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) + return 1; + + /* + * Not the fast normal case? Get the lock. We've already decremented + * the refcount, but we'll need to re-check the situation after + * getting the lock. + */ + spin_lock(&dentry->d_lock); + + /* + * Did somebody else grab a reference to it in the meantime, and + * we're no longer the last user after all? Alternatively, somebody + * else could have killed it and marked it dead. Either way, we + * don't need to do anything else. + */ + if (dentry->d_lockref.count) { + spin_unlock(&dentry->d_lock); + return 1; + } + + /* + * Re-get the reference we optimistically dropped. We hold the + * lock, and we just tested that it was zero, so we can just + * set it to 1. + */ + dentry->d_lockref.count = 1; + return 0; +} + + /* * This is dput * @@ -622,8 +726,14 @@ void dput(struct dentry *dentry) return; repeat: - if (lockref_put_or_lock(&dentry->d_lockref)) + rcu_read_lock(); + if (likely(fast_dput(dentry))) { + rcu_read_unlock(); return; + } + + /* Slow case: now with the dentry lock held */ + rcu_read_unlock(); /* Unreachable? Get rid of it */ if (unlikely(d_unhashed(dentry))) @@ -810,7 +920,7 @@ static void shrink_dentry_list(struct list_head *list) * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free it. */ - if ((int)dentry->d_lockref.count > 0) { + if (dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); if (parent) spin_unlock(&parent->d_lock); diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 4bfde0e99ed5..b10b122dd099 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -28,12 +28,13 @@ struct lockref { #endif struct { spinlock_t lock; - unsigned int count; + int count; }; }; }; extern void lockref_get(struct lockref *); +extern int lockref_put_return(struct lockref *); extern int lockref_get_not_zero(struct lockref *); extern int lockref_get_or_lock(struct lockref *); extern int lockref_put_or_lock(struct lockref *); diff --git a/lib/lockref.c b/lib/lockref.c index d2233de9a86e..ecb9a665ec19 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -60,7 +60,7 @@ void lockref_get(struct lockref *lockref) EXPORT_SYMBOL(lockref_get); /** - * lockref_get_not_zero - Increments count unless the count is 0 + * lockref_get_not_zero - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ @@ -70,7 +70,7 @@ int lockref_get_not_zero(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) return 0; , return 1; @@ -78,7 +78,7 @@ int lockref_get_not_zero(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if (lockref->count) { + if (lockref->count > 0) { lockref->count++; retval = 1; } @@ -88,7 +88,7 @@ int lockref_get_not_zero(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_not_zero); /** - * lockref_get_or_lock - Increments count unless the count is 0 + * lockref_get_or_lock - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero * and we got the lock instead. @@ -97,14 +97,14 @@ int lockref_get_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) break; , return 1; ); spin_lock(&lockref->lock); - if (!lockref->count) + if (lockref->count <= 0) return 0; lockref->count++; spin_unlock(&lockref->lock); @@ -112,6 +112,26 @@ int lockref_get_or_lock(struct lockref *lockref) } EXPORT_SYMBOL(lockref_get_or_lock); +/** + * lockref_put_return - Decrement reference count if possible + * @lockref: pointer to lockref structure + * + * Decrement the reference count and return the new value. + * If the lockref was dead or locked, return an error. + */ +int lockref_put_return(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count--; + if (old.count <= 0) + return -1; + , + return new.count; + ); + return -1; +} +EXPORT_SYMBOL(lockref_put_return); + /** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure @@ -158,7 +178,7 @@ int lockref_get_not_dead(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if ((int)old.count < 0) + if (old.count < 0) return 0; , return 1; @@ -166,7 +186,7 @@ int lockref_get_not_dead(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if ((int) lockref->count >= 0) { + if (lockref->count >= 0) { lockref->count++; retval = 1; } -- cgit v1.2.3-58-ga151 From 34cece2e8a1d2b66f00e153a19b80b4d4cec4eb8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Jan 2015 12:47:38 -0500 Subject: take count and rcu_head out of fs_pin Signed-off-by: Al Viro --- include/linux/fs_pin.h | 10 ++-------- kernel/acct.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 68a54b7741aa..a2e71912e758 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -1,14 +1,8 @@ #include struct fs_pin { - atomic_long_t count; - union { - struct { - struct hlist_node s_list; - struct hlist_node m_list; - }; - struct rcu_head rcu; - }; + struct hlist_node s_list; + struct hlist_node m_list; void (*kill)(struct fs_pin *); }; diff --git a/kernel/acct.c b/kernel/acct.c index a74f3d1a0c26..b8fbefb8678f 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -80,6 +80,8 @@ static void do_acct_process(struct bsd_acct_struct *acct); struct bsd_acct_struct { struct fs_pin pin; + atomic_long_t count; + struct rcu_head rcu; struct mutex lock; int active; unsigned long needcheck; @@ -126,8 +128,8 @@ out: static void acct_put(struct bsd_acct_struct *p) { - if (atomic_long_dec_and_test(&p->pin.count)) - kfree_rcu(p, pin.rcu); + if (atomic_long_dec_and_test(&p->count)) + kfree_rcu(p, rcu); } static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) @@ -141,7 +143,7 @@ again: rcu_read_unlock(); return NULL; } - if (!atomic_long_inc_not_zero(&res->pin.count)) { + if (!atomic_long_inc_not_zero(&res->count)) { rcu_read_unlock(); cpu_relax(); goto again; @@ -179,7 +181,7 @@ static void acct_kill(struct bsd_acct_struct *acct, pin_remove(&acct->pin); ns->bacct = new; acct->ns = NULL; - atomic_long_dec(&acct->pin.count); + atomic_long_dec(&acct->count); mutex_unlock(&acct->lock); acct_put(acct); } @@ -189,7 +191,7 @@ static void acct_pin_kill(struct fs_pin *pin) { struct bsd_acct_struct *acct; acct = container_of(pin, struct bsd_acct_struct, pin); - if (!atomic_long_inc_not_zero(&pin->count)) { + if (!atomic_long_inc_not_zero(&acct->count)) { rcu_read_unlock(); cpu_relax(); return; @@ -250,7 +252,7 @@ static int acct_on(struct filename *pathname) mnt = file->f_path.mnt; file->f_path.mnt = internal; - atomic_long_set(&acct->pin.count, 1); + atomic_long_set(&acct->count, 1); acct->pin.kill = acct_pin_kill; acct->file = file; acct->needcheck = jiffies; -- cgit v1.2.3-58-ga151 From fdab684d7202774bfd8762d4a656a553b787c8ec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 11 Jan 2015 10:57:27 -0500 Subject: allow attaching fs_pin to a group not associated with some superblock Signed-off-by: Al Viro --- fs/fs_pin.c | 20 +++++++++++++------- fs/internal.h | 2 +- fs/super.c | 4 ++-- include/linux/fs_pin.h | 1 + 4 files changed, 17 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 5eb39a93a560..50ef7d2ef03c 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -14,14 +14,20 @@ void pin_remove(struct fs_pin *pin) spin_unlock(&pin_lock); } -void pin_insert(struct fs_pin *pin, struct vfsmount *m) +void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) { spin_lock(&pin_lock); - hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); + if (p) + hlist_add_head(&pin->s_list, p); hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); spin_unlock(&pin_lock); } +void pin_insert(struct fs_pin *pin, struct vfsmount *m) +{ + pin_insert_group(pin, m, &m->mnt_sb->s_pins); +} + void mnt_pin_kill(struct mount *m) { while (1) { @@ -38,18 +44,18 @@ void mnt_pin_kill(struct mount *m) } } -void sb_pin_kill(struct super_block *sb) +void group_pin_kill(struct hlist_head *p) { while (1) { - struct hlist_node *p; + struct hlist_node *q; struct fs_pin *pin; rcu_read_lock(); - p = ACCESS_ONCE(sb->s_pins.first); - if (!p) { + q = ACCESS_ONCE(p->first); + if (!q) { rcu_read_unlock(); break; } - pin = hlist_entry(p, struct fs_pin, s_list); + pin = hlist_entry(q, struct fs_pin, s_list); pin->kill(pin); } } diff --git a/fs/internal.h b/fs/internal.h index e9a61fe67575..a6efd2f09ba3 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -145,7 +145,7 @@ extern const struct file_operations pipefifo_fops; /* * fs_pin.c */ -extern void sb_pin_kill(struct super_block *sb); +extern void group_pin_kill(struct hlist_head *p); extern void mnt_pin_kill(struct mount *m); /* diff --git a/fs/super.c b/fs/super.c index eae088f6aaae..2d822459bc3d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -706,9 +706,9 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); if (remount_ro) { - if (sb->s_pins.first) { + if (!hlist_empty(&sb->s_pins)) { up_write(&sb->s_umount); - sb_pin_kill(sb); + group_pin_kill(&sb->s_pins); down_write(&sb->s_umount); if (!sb->s_root) return 0; diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index a2e71912e758..2be38d1464ae 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -7,4 +7,5 @@ struct fs_pin { }; void pin_remove(struct fs_pin *); +void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); void pin_insert(struct fs_pin *, struct vfsmount *); -- cgit v1.2.3-58-ga151 From 59eda0e07f43c950d31756213b607af673e551f0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Jan 2015 17:53:21 -0500 Subject: new fs_pin killing logics Signed-off-by: Al Viro --- fs/fs_pin.c | 54 +++++++++++++++++++++++++---- include/linux/fs_pin.h | 13 ++++++- include/linux/pid_namespace.h | 4 +-- kernel/acct.c | 81 ++++++++++++++++++------------------------- 4 files changed, 96 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 50ef7d2ef03c..0c77bdc238b2 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -1,4 +1,5 @@ #include +#include #include #include #include "internal.h" @@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin) hlist_del(&pin->m_list); hlist_del(&pin->s_list); spin_unlock(&pin_lock); + spin_lock_irq(&pin->wait.lock); + pin->done = 1; + wake_up_locked(&pin->wait); + spin_unlock_irq(&pin->wait.lock); } void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) @@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) pin_insert_group(pin, m, &m->mnt_sb->s_pins); } +void pin_kill(struct fs_pin *p) +{ + wait_queue_t wait; + + if (!p) { + rcu_read_unlock(); + return; + } + init_wait(&wait); + spin_lock_irq(&p->wait.lock); + if (likely(!p->done)) { + p->done = -1; + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + p->kill(p); + return; + } + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + return; + } + __add_wait_queue(&p->wait, &wait); + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + schedule(); + rcu_read_lock(); + if (likely(list_empty(&wait.task_list))) + break; + /* OK, we know p couldn't have been freed yet */ + spin_lock_irq(&p->wait.lock); + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + break; + } + } + rcu_read_unlock(); +} + void mnt_pin_kill(struct mount *m) { while (1) { struct hlist_node *p; - struct fs_pin *pin; rcu_read_lock(); p = ACCESS_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; } - pin = hlist_entry(p, struct fs_pin, m_list); - pin->kill(pin); + pin_kill(hlist_entry(p, struct fs_pin, m_list)); } } @@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p) { while (1) { struct hlist_node *q; - struct fs_pin *pin; rcu_read_lock(); q = ACCESS_ONCE(p->first); if (!q) { rcu_read_unlock(); break; } - pin = hlist_entry(q, struct fs_pin, s_list); - pin->kill(pin); + pin_kill(hlist_entry(q, struct fs_pin, s_list)); } } diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 2be38d1464ae..9dc4e0384bfb 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -1,11 +1,22 @@ -#include +#include struct fs_pin { + wait_queue_head_t wait; + int done; struct hlist_node s_list; struct hlist_node m_list; void (*kill)(struct fs_pin *); }; +struct vfsmount; + +static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) +{ + init_waitqueue_head(&p->wait); + p->kill = kill; +} + void pin_remove(struct fs_pin *); void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); void pin_insert(struct fs_pin *, struct vfsmount *); +void pin_kill(struct fs_pin *); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index b9cf6c51b181..918b117a7cd3 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -19,7 +19,7 @@ struct pidmap { #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) #define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) -struct bsd_acct_struct; +struct fs_pin; struct pid_namespace { struct kref kref; @@ -37,7 +37,7 @@ struct pid_namespace { struct dentry *proc_thread_self; #endif #ifdef CONFIG_BSD_PROCESS_ACCT - struct bsd_acct_struct *bacct; + struct fs_pin *bacct; #endif struct user_namespace *user_ns; struct work_struct proc_work; diff --git a/kernel/acct.c b/kernel/acct.c index cf6588ab517b..e6c10d1a4058 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30}; /* * External references and all of the globals. */ -static void do_acct_process(struct bsd_acct_struct *acct); struct bsd_acct_struct { struct fs_pin pin; @@ -91,6 +90,8 @@ struct bsd_acct_struct { struct completion done; }; +static void do_acct_process(struct bsd_acct_struct *acct); + /* * Check the amount of free space and suspend/resume accordingly. */ @@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p) kfree_rcu(p, rcu); } +static inline struct bsd_acct_struct *to_acct(struct fs_pin *p) +{ + return p ? container_of(p, struct bsd_acct_struct, pin) : NULL; +} + static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { struct bsd_acct_struct *res; again: smp_rmb(); rcu_read_lock(); - res = ACCESS_ONCE(ns->bacct); + res = to_acct(ACCESS_ONCE(ns->bacct)); if (!res) { rcu_read_unlock(); return NULL; @@ -150,7 +156,7 @@ again: } rcu_read_unlock(); mutex_lock(&res->lock); - if (!res->ns) { + if (res != to_acct(ACCESS_ONCE(ns->bacct))) { mutex_unlock(&res->lock); acct_put(res); goto again; @@ -158,6 +164,19 @@ again: return res; } +static void acct_pin_kill(struct fs_pin *pin) +{ + struct bsd_acct_struct *acct = to_acct(pin); + mutex_lock(&acct->lock); + do_acct_process(acct); + schedule_work(&acct->work); + wait_for_completion(&acct->done); + cmpxchg(&acct->ns->bacct, pin, NULL); + mutex_unlock(&acct->lock); + pin_remove(pin); + acct_put(acct); +} + static void close_work(struct work_struct *work) { struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); @@ -168,49 +187,13 @@ static void close_work(struct work_struct *work) complete(&acct->done); } -static void acct_kill(struct bsd_acct_struct *acct) -{ - if (acct) { - struct pid_namespace *ns = acct->ns; - do_acct_process(acct); - INIT_WORK(&acct->work, close_work); - init_completion(&acct->done); - schedule_work(&acct->work); - wait_for_completion(&acct->done); - pin_remove(&acct->pin); - cmpxchg(&ns->bacct, acct, NULL); - acct->ns = NULL; - atomic_long_dec(&acct->count); - mutex_unlock(&acct->lock); - acct_put(acct); - } -} - -static void acct_pin_kill(struct fs_pin *pin) -{ - struct bsd_acct_struct *acct; - acct = container_of(pin, struct bsd_acct_struct, pin); - if (!atomic_long_inc_not_zero(&acct->count)) { - rcu_read_unlock(); - cpu_relax(); - return; - } - rcu_read_unlock(); - mutex_lock(&acct->lock); - if (!acct->ns) { - mutex_unlock(&acct->lock); - acct_put(acct); - acct = NULL; - } - acct_kill(acct); -} - static int acct_on(struct filename *pathname) { struct file *file; struct vfsmount *mnt, *internal; struct pid_namespace *ns = task_active_pid_ns(current); - struct bsd_acct_struct *acct, *old; + struct bsd_acct_struct *acct; + struct fs_pin *old; int err; acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); @@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname) file->f_path.mnt = internal; atomic_long_set(&acct->count, 1); - acct->pin.kill = acct_pin_kill; + init_fs_pin(&acct->pin, acct_pin_kill); acct->file = file; acct->needcheck = jiffies; acct->ns = ns; mutex_init(&acct->lock); + INIT_WORK(&acct->work, close_work); + init_completion(&acct->done); mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ pin_insert(&acct->pin, mnt); - old = acct_get(ns); - ns->bacct = acct; - acct_kill(old); + rcu_read_lock(); + old = xchg(&ns->bacct, &acct->pin); mutex_unlock(&acct->lock); + pin_kill(old); mnt_drop_write(mnt); mntput(mnt); return 0; @@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) mutex_unlock(&acct_on_mutex); putname(tmp); } else { - acct_kill(acct_get(task_active_pid_ns(current))); + rcu_read_lock(); + pin_kill(task_active_pid_ns(current)->bacct); } return error; @@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) void acct_exit_ns(struct pid_namespace *ns) { - acct_kill(acct_get(ns)); + rcu_read_lock(); + pin_kill(ns->bacct); } /* -- cgit v1.2.3-58-ga151