diff options
-rw-r--r-- | fs/eventpoll.c | 65 |
1 files changed, 29 insertions, 36 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 67db22fe99c5..2737ef591b3e 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -50,10 +50,10 @@ * * 1) epmutex (mutex) * 2) ep->mtx (mutex) - * 3) ep->lock (spinlock) + * 3) ep->wq.lock (spinlock) * * The acquire order is the one listed above, from 1 to 3. - * We need a spinlock (ep->lock) because we manipulate objects + * We need a spinlock (ep->wq.lock) because we manipulate objects * from inside the poll callback, that might be triggered from * a wake_up() that in turn might be called from IRQ context. * So we can't sleep inside the poll callback and hence we need @@ -85,7 +85,7 @@ * of epoll file descriptors, we use the current recursion depth as * the lockdep subkey. * It is possible to drop the "ep->mtx" and to use the global - * mutex "epmutex" (together with "ep->lock") to have it working, + * mutex "epmutex" (together with "ep->wq.lock") to have it working, * but having "ep->mtx" will make the interface more scalable. * Events that require holding "epmutex" are very rare, while for * normal operations the epoll private "ep->mtx" will guarantee @@ -182,11 +182,10 @@ struct epitem { * This structure is stored inside the "private_data" member of the file * structure and represents the main data structure for the eventpoll * interface. + * + * Access to it is protected by the lock inside wq. */ struct eventpoll { - /* Protect the access to this structure */ - spinlock_t lock; - /* * This mutex is used to ensure that files are not removed * while epoll is using them. This is held during the event @@ -210,7 +209,7 @@ struct eventpoll { /* * This is a single linked list that chains all the "struct epitem" that * happened while transferring ready events to userspace w/out - * holding ->lock. + * holding ->wq.lock. */ struct epitem *ovflist; @@ -688,17 +687,17 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * because we want the "sproc" callback to be able to do it * in a lockless way. */ - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); list_splice_init(&ep->rdllist, &txlist); ep->ovflist = NULL; - spin_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->wq.lock, flags); /* * Now call the callback function. */ res = (*sproc)(ep, &txlist, priv); - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); /* * During the time we spent inside the "sproc" callback, some * other events might have been queued by the poll callback. @@ -740,7 +739,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, if (waitqueue_active(&ep->poll_wait)) pwake++; } - spin_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->wq.lock, flags); if (!ep_locked) mutex_unlock(&ep->mtx); @@ -768,12 +767,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) struct file *file = epi->ffd.file; /* - * Removes poll wait queue hooks. We _have_ to do this without holding - * the "ep->lock" otherwise a deadlock might occur. This because of the - * sequence of the lock acquisition. Here we do "ep->lock" then the wait - * queue head lock when unregistering the wait queue. The wakeup callback - * will run by holding the wait queue head lock and will call our callback - * that will try to get "ep->lock". + * Removes poll wait queue hooks. */ ep_unregister_pollwait(ep, epi); @@ -784,10 +778,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) rb_erase_cached(&epi->rbn, &ep->rbr); - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); if (ep_is_linked(&epi->rdllink)) list_del_init(&epi->rdllink); - spin_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->wq.lock, flags); wakeup_source_unregister(ep_wakeup_source(epi)); /* @@ -837,7 +831,7 @@ static void ep_free(struct eventpoll *ep) * Walks through the whole tree by freeing each "struct epitem". At this * point we are sure no poll callbacks will be lingering around, and also by * holding "epmutex" we can be sure that no file cleanup code will hit - * us during this operation. So we can avoid the lock on "ep->lock". + * us during this operation. So we can avoid the lock on "ep->wq.lock". * We do not need to lock ep->mtx, either, we only do it to prevent * a lockdep warning. */ @@ -1017,7 +1011,6 @@ static int ep_alloc(struct eventpoll **pep) if (unlikely(!ep)) goto free_uid; - spin_lock_init(&ep->lock); mutex_init(&ep->mtx); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); @@ -1122,7 +1115,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v __poll_t pollflags = key_to_poll(key); int ewake = 0; - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); ep_set_busy_poll_napi_id(epi); @@ -1199,7 +1192,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v pwake++; out_unlock: - spin_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->wq.lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -1484,7 +1477,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, goto error_remove_epi; /* We have to drop the new item inside our item list to keep track of it */ - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); /* record NAPI ID of new item if present */ ep_set_busy_poll_napi_id(epi); @@ -1501,7 +1494,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, pwake++; } - spin_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->wq.lock, flags); atomic_long_inc(&ep->user->epoll_watches); @@ -1527,10 +1520,10 @@ error_unregister: * list, since that is used/cleaned only inside a section bound by "mtx". * And ep_insert() is called with "mtx" held. */ - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); if (ep_is_linked(&epi->rdllink)) list_del_init(&epi->rdllink); - spin_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->wq.lock, flags); wakeup_source_unregister(ep_wakeup_source(epi)); @@ -1572,9 +1565,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, * 1) Flush epi changes above to other CPUs. This ensures * we do not miss events from ep_poll_callback if an * event occurs immediately after we call f_op->poll(). - * We need this because we did not take ep->lock while + * We need this because we did not take ep->wq.lock while * changing epi above (but ep_poll_callback does take - * ep->lock). + * ep->wq.lock). * * 2) We also need to ensure we do not miss _past_ events * when calling f_op->poll(). This barrier also @@ -1593,7 +1586,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, * list, push it inside. */ if (ep_item_poll(epi, &pt, 1)) { - spin_lock_irq(&ep->lock); + spin_lock_irq(&ep->wq.lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); @@ -1604,7 +1597,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, if (waitqueue_active(&ep->poll_wait)) pwake++; } - spin_unlock_irq(&ep->lock); + spin_unlock_irq(&ep->wq.lock); } /* We have to call this outside the lock */ @@ -1756,7 +1749,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, * caller specified a non blocking operation. */ timed_out = 1; - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); goto check_events; } @@ -1765,7 +1758,7 @@ fetch_events: if (!ep_events_available(ep)) ep_busy_loop(ep, timed_out); - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); if (!ep_events_available(ep)) { /* @@ -1807,11 +1800,11 @@ fetch_events: break; } - spin_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->wq.lock, flags); if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) timed_out = 1; - spin_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->wq.lock, flags); } __remove_wait_queue(&ep->wq, &wait); @@ -1821,7 +1814,7 @@ check_events: /* Is it worth to try to dig for events ? */ eavail = ep_events_available(ep); - spin_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->wq.lock, flags); /* * Try to transfer events to user space. In case we get 0 events and |