summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2020-04-28 16:15:09 -0500
committerEric W. Biederman <ebiederm@xmission.com>2020-04-28 16:19:04 -0500
commit507122805edd25fcda8e959b870cd897b27a05c1 (patch)
tree440123d6108568366d2cd81c40e7b638ec57bc68
parent3147d8aaa03eac0e68742621e8f05323459aa37d (diff)
parent6b03d1304a32dc8450c7516000a0fe07bef7c446 (diff)
proc: Ensure we see the exit of each process tid exactly
In the work to remove proc_mnt I noticed that we were calling proc_flush_task now proc_flush_pid possibly multiple times for the same pid because of how de_thread works. This is a bare minimal patchset to sort out de_thread, by introducing exchange_tids and the helper of exchange_tids hlists_swap_heads_rcu. The actual call of exchange_tids should be slowpath so I have prioritized readability over getting every last drop of performance. I have also read through a bunch of the code to see if I could find anything that would be affected by this change. Users of has_group_leader_pid were a good canidates. But I also looked at other cases that might have a pid->task->pid transition. I ignored other sources of races with de_thread and exec as those are preexisting. I found a close call with send_signals user of task_active_pid_ns, but all pids of a thread group are guaranteeds to be in the same pid namespace so there is not a problem. I found a few pieces of debugging code that do: task = pid_task(pid, PIDTYPE_PID); if (task) { printk("%u\n", task->pid); } But I can't see how we care if it happens at the wrong moment that task->pid might not match pid_nr(pid); Similarly because the code in posix-cpu-timers goes pid->task->pid it feels like there should be a problem. But as the code that works with PIDTYPE_PID is only available within the thread group, and as de_thread kills all of the other threads before it makes any changes of this kind the race can not happen. In short I don't think this change will introduce any regressions. Eric W. Biederman (2): rculist: Add hlists_swap_heads_rcu proc: Ensure we see the exit of each process tid exactly once fs/exec.c | 5 +---- include/linux/pid.h | 1 + include/linux/rculist.h | 21 +++++++++++++++++++++ kernel/pid.c | 19 +++++++++++++++++++ 4 files changed, 42 insertions(+), 4 deletions(-) Link: https://lore.kernel.org/lkml/87sggnajpv.fsf_-_@x220.int.ebiederm.org/ Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r--fs/exec.c5
-rw-r--r--include/linux/pid.h1
-rw-r--r--include/linux/rculist.h21
-rw-r--r--kernel/pid.c19
4 files changed, 42 insertions, 4 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 06b4c550af5d..9b60f927afd7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1186,11 +1186,8 @@ static int de_thread(struct task_struct *tsk)
/* Become a process group leader with the old leader's pid.
* The old leader becomes a thread of the this thread group.
- * Note: The old leader also uses this pid until release_task
- * is called. Odd but simple and correct.
*/
- tsk->pid = leader->pid;
- change_pid(tsk, PIDTYPE_PID, task_pid(leader));
+ exchange_tids(tsk, leader);
transfer_pid(leader, tsk, PIDTYPE_TGID);
transfer_pid(leader, tsk, PIDTYPE_PGID);
transfer_pid(leader, tsk, PIDTYPE_SID);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index cc896f0fc4e3..2159ffca63fc 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -102,6 +102,7 @@ extern void attach_pid(struct task_struct *task, enum pid_type);
extern void detach_pid(struct task_struct *task, enum pid_type);
extern void change_pid(struct task_struct *task, enum pid_type,
struct pid *pid);
+extern void exchange_tids(struct task_struct *task, struct task_struct *old);
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8214cdc715f2..67867e0d4cec 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -506,6 +506,27 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
WRITE_ONCE(old->pprev, LIST_POISON2);
}
+/**
+ * hlists_swap_heads_rcu - swap the lists the hlist heads point to
+ * @left: The hlist head on the left
+ * @right: The hlist head on the right
+ *
+ * The lists start out as [@left ][node1 ... ] and
+ [@right ][node2 ... ]
+ * The lists end up as [@left ][node2 ... ]
+ * [@right ][node1 ... ]
+ */
+static inline void hlists_swap_heads_rcu(struct hlist_head *left, struct hlist_head *right)
+{
+ struct hlist_node *node1 = left->first;
+ struct hlist_node *node2 = right->first;
+
+ rcu_assign_pointer(left->first, node2);
+ rcu_assign_pointer(right->first, node1);
+ WRITE_ONCE(node2->pprev, &left->first);
+ WRITE_ONCE(node1->pprev, &right->first);
+}
+
/*
* return the first or the next element in an RCU protected hlist
*/
diff --git a/kernel/pid.c b/kernel/pid.c
index c835b844aca7..6d5d0a5bda82 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -363,6 +363,25 @@ void change_pid(struct task_struct *task, enum pid_type type,
attach_pid(task, type);
}
+void exchange_tids(struct task_struct *left, struct task_struct *right)
+{
+ struct pid *pid1 = left->thread_pid;
+ struct pid *pid2 = right->thread_pid;
+ struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
+ struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
+
+ /* Swap the single entry tid lists */
+ hlists_swap_heads_rcu(head1, head2);
+
+ /* Swap the per task_struct pid */
+ rcu_assign_pointer(left->thread_pid, pid2);
+ rcu_assign_pointer(right->thread_pid, pid1);
+
+ /* Swap the cached value */
+ WRITE_ONCE(left->pid, pid_nr(pid2));
+ WRITE_ONCE(right->pid, pid_nr(pid1));
+}
+
/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type type)