diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-05-08 08:41:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-05-08 08:41:09 -0700 |
commit | af38553c661207f96464e15f3506bf788daee474 (patch) | |
tree | 5b58816c3cd8126736065c831dc3bc5a61cbea12 /fs | |
parent | 79dede78c0573618e3137d3d8cbf78c84e25fabd (diff) | |
parent | 14f69140ff9c92a0928547ceefb153a842e8492c (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"14 fixes and one selftest to verify the ipc fixes herein"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm: limit boost_watermark on small zones
ubsan: disable UBSAN_ALIGNMENT under COMPILE_TEST
mm/vmscan: remove unnecessary argument description of isolate_lru_pages()
epoll: atomically remove wait entry on wake up
kselftests: introduce new epoll60 testcase for catching lost wakeups
percpu: make pcpu_alloc() aware of current gfp context
mm/slub: fix incorrect interpretation of s->offset
scripts/gdb: repair rb_first() and rb_last()
eventpoll: fix missing wakeup for ovflist in ep_poll_callback
arch/x86/kvm/svm/sev.c: change flag passed to GUP fast in sev_pin_memory()
scripts/decodecode: fix trapping instruction formatting
kernel/kcov.c: fix typos in kcov_remote_start documentation
mm/page_alloc: fix watchdog soft lockups during set_zone_contiguous()
mm, memcg: fix error return value of mem_cgroup_css_alloc()
ipc/mqueue.c: change __do_notify() to bypass check_kill_permission()
Diffstat (limited to 'fs')
-rw-r--r-- | fs/eventpoll.c | 61 |
1 files changed, 33 insertions, 28 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8c596641a72b..aba03ee749f8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi) { struct eventpoll *ep = epi->ep; + /* Fast preliminary check */ + if (epi->next != EP_UNACTIVE_PTR) + return false; + /* Check that the same epi has not been just chained from another CPU */ if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) return false; @@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * chained in ep->ovflist and requeued later on. */ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { - if (epi->next == EP_UNACTIVE_PTR && - chain_epi_lockless(epi)) + if (chain_epi_lockless(epi)) + ep_pm_stay_awake_rcu(epi); + } else if (!ep_is_linked(epi)) { + /* In the usual case, add event to ready list. */ + if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) ep_pm_stay_awake_rcu(epi); - goto out_unlock; - } - - /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(epi) && - list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) { - ep_pm_stay_awake_rcu(epi); } /* @@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, { int res = 0, eavail, timed_out = 0; u64 slack = 0; - bool waiter = false; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1867,21 +1866,23 @@ fetch_events: */ ep_reset_busy_poll_napi_id(ep); - /* - * We don't have any available event to return to the caller. We need - * to sleep here, and we will be woken by ep_poll_callback() when events - * become available. - */ - if (!waiter) { - waiter = true; - init_waitqueue_entry(&wait, current); - + do { + /* + * Internally init_wait() uses autoremove_wake_function(), + * thus wait entry is removed from the wait queue on each + * wakeup. Why it is important? In case of several waiters + * each new wakeup will hit the next waiter, giving it the + * chance to harvest new event. Otherwise wakeup can be + * lost. This is also good performance-wise, because on + * normal wakeup path no need to call __remove_wait_queue() + * explicitly, thus ep->lock is not taken, which halts the + * event delivery. + */ + init_wait(&wait); write_lock_irq(&ep->lock); __add_wait_queue_exclusive(&ep->wq, &wait); write_unlock_irq(&ep->lock); - } - for (;;) { /* * We don't want to sleep if the ep_poll_callback() sends us * a wakeup in between. That's why we set the task state @@ -1911,10 +1912,20 @@ fetch_events: timed_out = 1; break; } - } + + /* We were woken up, thus go and try to harvest some events */ + eavail = 1; + + } while (0); __set_current_state(TASK_RUNNING); + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } + send_events: /* * Try to transfer events to user space. In case we get 0 events and @@ -1925,12 +1936,6 @@ send_events: !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; - if (waiter) { - write_lock_irq(&ep->lock); - __remove_wait_queue(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } - return res; } |