diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2009-05-05 19:21:40 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2009-05-15 15:24:18 +0200 |
commit | f1a11e0576c7a73d759d05d776692b2b2d37172b (patch) | |
tree | 3cb845c4cfd48cdbe0755d057c2698657fb965b5 | |
parent | b30505c81a9d4adea8b70ecff512b0216929b797 (diff) |
futex: remove the wait queue
The waitqueue which is used in struct futex_q is a leftover from the
futexfd implementation. There is no need to use a waitqueue at all, as
the waiting task is the only user of it. The waitqueue just adds
additional locking and a loop in the wake up path which both can be
avoided.
We have already a task reference in struct futex_q which is used for
PI futexes. Use it for normal futexes as well and just wake up the
task directly.
The logic of signalling the futex wakeup via setting q->lock_ptr to
NULL is kept with the difference that we set it NULL before doing the
wakeup. This opens an exit race window vs. a non futex wake up of the
to be woken up task, which we prevent with get_task_struct /
put_task_struct on the waiter.
[ Impact: simplification ]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | kernel/futex.c | 58 |
1 files changed, 25 insertions, 33 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index aec8bf89bf4e..157bfcd725b8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -100,8 +100,8 @@ struct futex_pi_state { */ struct futex_q { struct plist_node list; - /* There can only be a single waiter */ - wait_queue_head_t waiter; + /* Waiter reference */ + struct task_struct *task; /* Which hash list lock to use: */ spinlock_t *lock_ptr; @@ -111,7 +111,6 @@ struct futex_q { /* Optional priority inheritance state: */ struct futex_pi_state *pi_state; - struct task_struct *task; /* rt_waiter storage for requeue_pi: */ struct rt_mutex_waiter *rt_waiter; @@ -694,22 +693,29 @@ retry: */ static void wake_futex(struct futex_q *q) { - plist_del(&q->list, &q->list.plist); + struct task_struct *p = q->task; + /* - * The lock in wake_up_all() is a crucial memory barrier after the - * plist_del() and also before assigning to q->lock_ptr. + * We set q->lock_ptr = NULL _before_ we wake up the task. If + * a non futex wake up happens on another CPU then the task + * might exit and p would dereference a non existing task + * struct. Prevent this by holding a reference on p across the + * wake up. */ - wake_up(&q->waiter); + get_task_struct(p); + + plist_del(&q->list, &q->list.plist); /* - * The waiting task can free the futex_q as soon as this is written, - * without taking any locks. This must come last. - * - * A memory barrier is required here to prevent the following store to - * lock_ptr from getting ahead of the wakeup. Clearing the lock at the - * end of wake_up() does not prevent this store from moving. + * The waiting task can free the futex_q as soon as + * q->lock_ptr = NULL is written, without taking any locks. A + * memory barrier is required here to prevent the following + * store to lock_ptr from getting ahead of the plist_del. */ smp_wmb(); q->lock_ptr = NULL; + + wake_up_state(p, TASK_NORMAL); + put_task_struct(p); } static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) @@ -1003,7 +1009,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) WARN_ON(!q->rt_waiter); q->rt_waiter = NULL; - wake_up(&q->waiter); + wake_up_state(q->task, TASK_NORMAL); } /** @@ -1280,8 +1286,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) { struct futex_hash_bucket *hb; - init_waitqueue_head(&q->waiter); - get_futex_key_refs(&q->key); hb = hash_futex(&q->key); q->lock_ptr = &hb->lock; @@ -1575,11 +1579,9 @@ out: * @hb: the futex hash bucket, must be locked by the caller * @q: the futex_q to queue up on * @timeout: the prepared hrtimer_sleeper, or null for no timeout - * @wait: the wait_queue to add to the futex_q after queueing in the hb */ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, - struct hrtimer_sleeper *timeout, - wait_queue_t *wait) + struct hrtimer_sleeper *timeout) { queue_me(q, hb); @@ -1587,19 +1589,11 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * There might have been scheduling since the queue_me(), as we * cannot hold a spinlock across the get_user() in case it * faults, and we cannot just set TASK_INTERRUPTIBLE state when - * queueing ourselves into the futex hash. This code thus has to + * queueing ourselves into the futex hash. This code thus has to * rely on the futex_wake() code removing us from hash when it * wakes us up. */ - - /* add_wait_queue is the barrier after __set_current_state. */ - __set_current_state(TASK_INTERRUPTIBLE); - - /* - * Add current as the futex_q waiter. We don't remove ourselves from - * the wait_queue because we are the only user of it. - */ - add_wait_queue(&q->waiter, wait); + set_current_state(TASK_INTERRUPTIBLE); /* Arm the timer */ if (timeout) { @@ -1704,7 +1698,6 @@ static int futex_wait(u32 __user *uaddr, int fshared, u32 val, ktime_t *abs_time, u32 bitset, int clockrt) { struct hrtimer_sleeper timeout, *to = NULL; - DECLARE_WAITQUEUE(wait, current); struct restart_block *restart; struct futex_hash_bucket *hb; struct futex_q q; @@ -1733,7 +1726,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, goto out; /* queue_me and wait for wakeup, timeout, or a signal. */ - futex_wait_queue_me(hb, &q, to, &wait); + futex_wait_queue_me(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ ret = 0; @@ -2147,7 +2140,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; struct rt_mutex *pi_mutex = NULL; - DECLARE_WAITQUEUE(wait, current); struct restart_block *restart; struct futex_hash_bucket *hb; union futex_key key2; @@ -2191,7 +2183,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, } /* Queue the futex_q, drop the hb lock, wait for wakeup. */ - futex_wait_queue_me(hb, &q, to, &wait); + futex_wait_queue_me(hb, &q, to); spin_lock(&hb->lock); ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); |