diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-06-03 16:13:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-06-03 16:13:25 -0700 |
commit | 67850b7bdcd2803e10d019f0da5673a92139b43a (patch) | |
tree | 3f8914ce44edd9b5dd3a55ca4fb47efe0a5a2552 | |
parent | 1ec6574a3c0a22c130c08e8c36c825cb87d68f8e (diff) | |
parent | 31cae1eaae4fd65095ad6a3659db467bc3c2599e (diff) |
Merge tag 'ptrace_stop-cleanup-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull ptrace_stop cleanups from Eric Biederman:
"While looking at the ptrace problems with PREEMPT_RT and the problems
Peter Zijlstra was encountering with ptrace in his freezer rewrite I
identified some cleanups to ptrace_stop that make sense on their own
and move make resolving the other problems much simpler.
The biggest issue is the habit of the ptrace code to change
task->__state from the tracer to suppress TASK_WAKEKILL from waking up
the tracee. No other code in the kernel does that and it is straight
forward to update signal_wake_up and friends to make that unnecessary.
Peter's task freezer sets frozen tasks to a new state TASK_FROZEN and
then it stores them by calling "wake_up_state(t, TASK_FROZEN)" relying
on the fact that all stopped states except the special stop states can
tolerate spurious wake up and recover their state.
The state of stopped and traced tasked is changed to be stored in
task->jobctl as well as in task->__state. This makes it possible for
the freezer to recover tasks in these special states, as well as
serving as a general cleanup. With a little more work in that
direction I believe TASK_STOPPED can learn to tolerate spurious wake
ups and become an ordinary stop state.
The TASK_TRACED state has to remain a special state as the registers
for a process are only reliably available when the process is stopped
in the scheduler. Fundamentally ptrace needs acess to the saved
register values of a task.
There are bunch of semi-random ptrace related cleanups that were found
while looking at these issues.
One cleanup that deserves to be called out is from commit 57b6de08b5f6
("ptrace: Admit ptrace_stop can generate spuriuos SIGTRAPs"). This
makes a change that is technically user space visible, in the handling
of what happens to a tracee when a tracer dies unexpectedly. According
to our testing and our understanding of userspace nothing cares that
spurious SIGTRAPs can be generated in that case"
* tag 'ptrace_stop-cleanup-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
sched,signal,ptrace: Rework TASK_TRACED, TASK_STOPPED state
ptrace: Always take siglock in ptrace_resume
ptrace: Don't change __state
ptrace: Admit ptrace_stop can generate spuriuos SIGTRAPs
ptrace: Document that wait_task_inactive can't fail
ptrace: Reimplement PTRACE_KILL by always sending SIGKILL
signal: Use lockdep_assert_held instead of assert_spin_locked
ptrace: Remove arch_ptrace_attach
ptrace/xtensa: Replace PT_SINGLESTEP with TIF_SINGLESTEP
ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP
signal: Replace __group_send_sig_info with send_signal_locked
signal: Rename send_signal send_signal_locked
-rw-r--r-- | arch/ia64/include/asm/ptrace.h | 4 | ||||
-rw-r--r-- | arch/ia64/kernel/ptrace.c | 57 | ||||
-rw-r--r-- | arch/um/include/asm/thread_info.h | 2 | ||||
-rw-r--r-- | arch/um/kernel/exec.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/ptrace.c | 8 | ||||
-rw-r--r-- | arch/um/kernel/signal.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/step.c | 3 | ||||
-rw-r--r-- | arch/xtensa/kernel/ptrace.c | 4 | ||||
-rw-r--r-- | arch/xtensa/kernel/signal.c | 4 | ||||
-rw-r--r-- | drivers/tty/tty_jobctrl.c | 4 | ||||
-rw-r--r-- | include/linux/ptrace.h | 7 | ||||
-rw-r--r-- | include/linux/sched.h | 10 | ||||
-rw-r--r-- | include/linux/sched/jobctl.h | 8 | ||||
-rw-r--r-- | include/linux/sched/signal.h | 20 | ||||
-rw-r--r-- | include/linux/signal.h | 3 | ||||
-rw-r--r-- | kernel/ptrace.c | 87 | ||||
-rw-r--r-- | kernel/sched/core.c | 5 | ||||
-rw-r--r-- | kernel/signal.c | 140 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 6 |
20 files changed, 140 insertions, 240 deletions
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index a10a498eede1..402874489890 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -139,10 +139,6 @@ static inline long regs_return_value(struct pt_regs *regs) #define arch_ptrace_stop_needed() \ (!test_thread_flag(TIF_RESTORE_RSE)) - extern void ptrace_attach_sync_user_rbs (struct task_struct *); - #define arch_ptrace_attach(child) \ - ptrace_attach_sync_user_rbs(child) - #define arch_has_single_step() (1) #define arch_has_block_step() (1) diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 4fc6e38a8459..ab8aeb34d1d9 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -618,63 +618,6 @@ void ia64_sync_krbs(void) } /* - * After PTRACE_ATTACH, a thread's register backing store area in user - * space is assumed to contain correct data whenever the thread is - * stopped. arch_ptrace_stop takes care of this on tracing stops. - * But if the child was already stopped for job control when we attach - * to it, then it might not ever get into ptrace_stop by the time we - * want to examine the user memory containing the RBS. - */ -void -ptrace_attach_sync_user_rbs (struct task_struct *child) -{ - int stopped = 0; - struct unw_frame_info info; - - /* - * If the child is in TASK_STOPPED, we need to change that to - * TASK_TRACED momentarily while we operate on it. This ensures - * that the child won't be woken up and return to user mode while - * we are doing the sync. (It can only be woken up for SIGKILL.) - */ - - read_lock(&tasklist_lock); - if (child->sighand) { - spin_lock_irq(&child->sighand->siglock); - if (READ_ONCE(child->__state) == TASK_STOPPED && - !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { - set_notify_resume(child); - - WRITE_ONCE(child->__state, TASK_TRACED); - stopped = 1; - } - spin_unlock_irq(&child->sighand->siglock); - } - read_unlock(&tasklist_lock); - - if (!stopped) - return; - - unw_init_from_blocked_task(&info, child); - do_sync_rbs(&info, ia64_sync_user_rbs); - - /* - * Now move the child back into TASK_STOPPED if it should be in a - * job control stop, so that SIGCONT can be used to wake it up. - */ - read_lock(&tasklist_lock); - if (child->sighand) { - spin_lock_irq(&child->sighand->siglock); - if (READ_ONCE(child->__state) == TASK_TRACED && - (child->signal->flags & SIGNAL_STOP_STOPPED)) { - WRITE_ONCE(child->__state, TASK_STOPPED); - } - spin_unlock_irq(&child->sighand->siglock); - } - read_unlock(&tasklist_lock); -} - -/* * Write f32-f127 back to task->thread.fph if it has been modified. */ inline void diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 1395cbd7e340..c7b4b49826a2 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -60,6 +60,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 #define TIF_SECCOMP 9 /* secure computing */ +#define TIF_SINGLESTEP 10 /* single stepping userspace */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -68,5 +69,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #endif diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index c85e40c72779..58938d75871a 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -43,7 +43,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; - current->ptrace &= ~PT_DTRACE; + clear_thread_flag(TIF_SINGLESTEP); #ifdef SUBARCH_EXECVE1 SUBARCH_EXECVE1(regs->regs); #endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 181cc9aafb25..80b90b1276a1 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -336,7 +336,7 @@ int singlestepping(void * t) { struct task_struct *task = t ? t : current; - if (!(task->ptrace & PT_DTRACE)) + if (!test_thread_flag(TIF_SINGLESTEP)) return 0; if (task->thread.singlestep_syscall) diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index bfaf6ab1ac03..5154b27de580 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -11,7 +11,7 @@ void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_DTRACE; + set_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -21,7 +21,7 @@ void user_enable_single_step(struct task_struct *child) void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_DTRACE; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -120,7 +120,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code) } /* - * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and + * XXX Check TIF_SINGLESTEP for singlestepping check and * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check */ int syscall_trace_enter(struct pt_regs *regs) @@ -144,7 +144,7 @@ void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); /* Fake a debug trap */ - if (ptraced & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(®s->regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 88cd9b5c1b74..ae4658f576ab 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -53,7 +53,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) unsigned long sp; int err; - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) singlestep = 1; /* Did we come from a system call? */ @@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs) * on the host. The tracing thread will check this flag and * PTRACE_SYSCALL if necessary. */ - if (current->ptrace & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) current->thread.singlestep_syscall = is_syscall(PT_REGS_IP(¤t->thread.regs)); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 0f3c307b37b3..8e2b2552b5ee 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -180,8 +180,7 @@ void set_task_blockstep(struct task_struct *task, bool on) * * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if * task is current or it can't be running, otherwise we can race - * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but - * PTRACE_KILL is not safe. + * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). */ local_irq_disable(); debugctl = get_debugctlmsr(); diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index 22cdaa6729d3..f29477162ede 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -224,12 +224,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_SINGLESTEP; + set_tsk_thread_flag(child, TIF_SINGLESTEP); } void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_SINGLESTEP; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); } /* diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index c9ffd42db873..876d5df157ed 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -472,7 +472,7 @@ static void do_signal(struct pt_regs *regs) /* Set up the stack frame */ ret = setup_frame(&ksig, sigmask_to_save(), regs); signal_setup_done(ret, &ksig, 0); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; @@ -498,7 +498,7 @@ static void do_signal(struct pt_regs *regs) /* If there's no signal to deliver, we just restore the saved mask. */ restore_saved_sigmask(); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; } diff --git a/drivers/tty/tty_jobctrl.c b/drivers/tty/tty_jobctrl.c index 80b86a7992b5..0d04287da098 100644 --- a/drivers/tty/tty_jobctrl.c +++ b/drivers/tty/tty_jobctrl.c @@ -215,8 +215,8 @@ int tty_signal_session_leader(struct tty_struct *tty, int exit_session) spin_unlock_irq(&p->sighand->siglock); continue; } - __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); - __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); + send_signal_locked(SIGHUP, SEND_SIG_PRIV, p, PIDTYPE_TGID); + send_signal_locked(SIGCONT, SEND_SIG_PRIV, p, PIDTYPE_TGID); put_pid(p->signal->tty_old_pgrp); /* A noop */ spin_lock(&tty->ctrl.lock); tty_pgrp = get_pid(tty->ctrl.pgrp); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index db4509587d2c..c952c5ba8fab 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 -#define PT_DTRACE 0x00000002 /* delayed trace (used on um) */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ @@ -47,12 +46,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) -/* single stepping state bits (used on ARM and PA-RISC) */ -#define PT_SINGLESTEP_BIT 31 -#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT) -#define PT_BLOCKSTEP_BIT 30 -#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT) - extern long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); diff --git a/include/linux/sched.h b/include/linux/sched.h index b89c8571187b..c46f3a63b758 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -103,7 +103,7 @@ struct task_group; /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) -#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) +#define TASK_TRACED __TASK_TRACED #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) @@ -118,11 +118,9 @@ struct task_group; #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) -#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) - -#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) - -#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) +#define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) +#define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) +#define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) /* * Special states are those that do not use the normal wait-loop pattern. See diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index fa067de9f1a9..68876d0a7ef9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,6 +19,10 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ +#define JOBCTL_PTRACE_FROZEN_BIT 24 /* frozen for ptrace */ + +#define JOBCTL_STOPPED_BIT 26 /* do_signal_stop() */ +#define JOBCTL_TRACED_BIT 27 /* ptrace_stop() */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -28,6 +32,10 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) +#define JOBCTL_PTRACE_FROZEN (1UL << JOBCTL_PTRACE_FROZEN_BIT) + +#define JOBCTL_STOPPED (1UL << JOBCTL_STOPPED_BIT) +#define JOBCTL_TRACED (1UL << JOBCTL_TRACED_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 20ed5ba2bde4..cafbe03eed01 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -294,8 +294,10 @@ static inline int kernel_dequeue_signal(void) static inline void kernel_signal_stop(void) { spin_lock_irq(¤t->sighand->siglock); - if (current->jobctl & JOBCTL_STOP_DEQUEUED) + if (current->jobctl & JOBCTL_STOP_DEQUEUED) { + current->jobctl |= JOBCTL_STOPPED; set_special_state(TASK_STOPPED); + } spin_unlock_irq(¤t->sighand->siglock); schedule(); @@ -444,13 +446,23 @@ extern void calculate_sigpending(void); extern void signal_wake_up_state(struct task_struct *t, unsigned int state); -static inline void signal_wake_up(struct task_struct *t, bool resume) +static inline void signal_wake_up(struct task_struct *t, bool fatal) { - signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); + unsigned int state = 0; + if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) { + t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED); + state = TASK_WAKEKILL | __TASK_TRACED; + } + signal_wake_up_state(t, state); } static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) { - signal_wake_up_state(t, resume ? __TASK_TRACED : 0); + unsigned int state = 0; + if (resume) { + t->jobctl &= ~JOBCTL_TRACED; + state = __TASK_TRACED; + } + signal_wake_up_state(t, state); } void task_join_group_stop(struct task_struct *task); diff --git a/include/linux/signal.h b/include/linux/signal.h index a6db6f2ae113..3b98e7a28538 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -282,7 +282,8 @@ extern int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); -extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); +extern int send_signal_locked(int sig, struct kernel_siginfo *info, + struct task_struct *p, enum pid_type type); extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 49c29baf9907..156a99283b11 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -185,7 +185,12 @@ static bool looks_like_a_spurious_pid(struct task_struct *task) return true; } -/* Ensure that nothing can wake it up, even SIGKILL */ +/* + * Ensure that nothing can wake it up, even SIGKILL + * + * A task is switched to this state while a ptrace operation is in progress; + * such that the ptrace operation is uninterruptible. + */ static bool ptrace_freeze_traced(struct task_struct *task) { bool ret = false; @@ -197,7 +202,7 @@ static bool ptrace_freeze_traced(struct task_struct *task) spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && !__fatal_signal_pending(task)) { - WRITE_ONCE(task->__state, __TASK_TRACED); + task->jobctl |= JOBCTL_PTRACE_FROZEN; ret = true; } spin_unlock_irq(&task->sighand->siglock); @@ -207,23 +212,21 @@ static bool ptrace_freeze_traced(struct task_struct *task) static void ptrace_unfreeze_traced(struct task_struct *task) { - if (READ_ONCE(task->__state) != __TASK_TRACED) - return; - - WARN_ON(!task->ptrace || task->parent != current); + unsigned long flags; /* - * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. - * Recheck state under the lock to close this race. + * The child may be awake and may have cleared + * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will + * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew. */ - spin_lock_irq(&task->sighand->siglock); - if (READ_ONCE(task->__state) == __TASK_TRACED) { - if (__fatal_signal_pending(task)) + if (lock_task_sighand(task, &flags)) { + task->jobctl &= ~JOBCTL_PTRACE_FROZEN; + if (__fatal_signal_pending(task)) { + task->jobctl &= ~TASK_TRACED; wake_up_state(task, __TASK_TRACED); - else - WRITE_ONCE(task->__state, TASK_TRACED); + } + unlock_task_sighand(task, &flags); } - spin_unlock_irq(&task->sighand->siglock); } /** @@ -256,7 +259,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) */ read_lock(&tasklist_lock); if (child->ptrace && child->parent == current) { - WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). @@ -266,17 +268,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) } read_unlock(&tasklist_lock); - if (!ret && !ignore_state) { - if (!wait_task_inactive(child, __TASK_TRACED)) { - /* - * This can only happen if may_ptrace_stop() fails and - * ptrace_stop() changes ->state back to TASK_RUNNING, - * so we should not worry about leaking __TASK_TRACED. - */ - WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); - ret = -ESRCH; - } - } + if (!ret && !ignore_state && + WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED))) + ret = -ESRCH; return ret; } @@ -475,8 +469,10 @@ static int ptrace_attach(struct task_struct *task, long request, * in and out of STOPPED are protected by siglock. */ if (task_is_stopped(task) && - task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) + task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { + task->jobctl &= ~JOBCTL_STOPPED; signal_wake_up_state(task, __TASK_STOPPED); + } spin_unlock(&task->sighand->siglock); @@ -846,8 +842,6 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, static int ptrace_resume(struct task_struct *child, long request, unsigned long data) { - bool need_siglock; - if (!valid_signal(data)) return -EIO; @@ -883,18 +877,12 @@ static int ptrace_resume(struct task_struct *child, long request, * Note that we need siglock even if ->exit_code == data and/or this * status was not reported yet, the new status must not be cleared by * wait_task_stopped() after resume. - * - * If data == 0 we do not care if wait_task_stopped() reports the old - * status and clears the code too; this can't race with the tracee, it - * takes siglock after resume. */ - need_siglock = data && !thread_group_empty(current); - if (need_siglock) - spin_lock_irq(&child->sighand->siglock); + spin_lock_irq(&child->sighand->siglock); child->exit_code = data; + child->jobctl &= ~JOBCTL_TRACED; wake_up_state(child, __TASK_TRACED); - if (need_siglock) - spin_unlock_irq(&child->sighand->siglock); + spin_unlock_irq(&child->sighand->siglock); return 0; } @@ -1230,9 +1218,8 @@ int ptrace_request(struct task_struct *child, long request, return ptrace_resume(child, request, data); case PTRACE_KILL: - if (child->exit_state) /* already dead */ - return 0; - return ptrace_resume(child, request, SIGKILL); + send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); + return 0; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: @@ -1279,10 +1266,6 @@ int ptrace_request(struct task_struct *child, long request, return ret; } -#ifndef arch_ptrace_attach -#define arch_ptrace_attach(child) do { } while (0) -#endif - SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, unsigned long, data) { @@ -1291,8 +1274,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); - if (!ret) - arch_ptrace_attach(current); goto out; } @@ -1304,12 +1285,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); - /* - * Some architectures need to do book-keeping after - * a ptrace attach. - */ - if (!ret) - arch_ptrace_attach(child); goto out_put_task_struct; } @@ -1449,12 +1424,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); - /* - * Some architectures need to do book-keeping after - * a ptrace attach. - */ - if (!ret) - arch_ptrace_attach(child); goto out_put_task_struct; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 696c6490bd5b..bfa7452ca92e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6353,10 +6353,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) /* * We must load prev->state once (task_struct::state is volatile), such - * that: - * - * - we form a control dependency vs deactivate_task() below. - * - ptrace_{,un}freeze_traced() can change ->state underneath us. + * that we form a control dependency vs deactivate_task() below. */ prev_state = READ_ONCE(prev->__state); if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { diff --git a/kernel/signal.c b/kernel/signal.c index e43bc2a692f5..edb1dc9b00dc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -762,7 +762,10 @@ still_pending: */ void signal_wake_up_state(struct task_struct *t, unsigned int state) { + lockdep_assert_held(&t->sighand->siglock); + set_tsk_thread_flag(t, TIF_SIGPENDING); + /* * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it @@ -884,7 +887,7 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info, static void ptrace_trap_notify(struct task_struct *t) { WARN_ON_ONCE(!(t->ptrace & PT_SEIZED)); - assert_spin_locked(&t->sighand->siglock); + lockdep_assert_held(&t->sighand->siglock); task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); @@ -930,9 +933,10 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) for_each_thread(p, t) { flush_sigqueue_mask(&flush, &t->pending); task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); - if (likely(!(t->ptrace & PT_SEIZED))) + if (likely(!(t->ptrace & PT_SEIZED))) { + t->jobctl &= ~JOBCTL_STOPPED; wake_up_state(t, __TASK_STOPPED); - else + } else ptrace_trap_notify(t); } @@ -1071,15 +1075,15 @@ static inline bool legacy_queue(struct sigpending *signals, int sig) return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } -static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, - enum pid_type type, bool force) +static int __send_signal_locked(int sig, struct kernel_siginfo *info, + struct task_struct *t, enum pid_type type, bool force) { struct sigpending *pending; struct sigqueue *q; int override_rlimit; int ret = 0, result; - assert_spin_locked(&t->sighand->siglock); + lockdep_assert_held(&t->sighand->siglock); result = TRACE_SIGNAL_IGNORED; if (!prepare_signal(sig, t, force)) @@ -1212,8 +1216,8 @@ static inline bool has_si_pid_and_uid(struct kernel_siginfo *info) return ret; } -static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, - enum pid_type type) +int send_signal_locked(int sig, struct kernel_siginfo *info, + struct task_struct *t, enum pid_type type) { /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */ bool force = false; @@ -1245,7 +1249,7 @@ static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct force = true; } } - return __send_signal(sig, info, t, type, force); + return __send_signal_locked(sig, info, t, type, force); } static void print_fatal_signal(int signr) @@ -1281,12 +1285,6 @@ static int __init setup_print_fatal_signals(char *str) __setup("print-fatal-signals=", setup_print_fatal_signals); -int -__group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p) -{ - return send_signal(sig, info, p, PIDTYPE_TGID); -} - int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type) { @@ -1294,7 +1292,7 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p int ret = -ESRCH; if (lock_task_sighand(p, &flags)) { - ret = send_signal(sig, info, p, type); + ret = send_signal_locked(sig, info, p, type); unlock_task_sighand(p, &flags); } @@ -1347,7 +1345,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, if (action->sa.sa_handler == SIG_DFL && (!t->ptrace || (handler == HANDLER_EXIT))) t->signal->flags &= ~SIGNAL_UNKILLABLE; - ret = send_signal(sig, info, t, PIDTYPE_PID); + ret = send_signal_locked(sig, info, t, PIDTYPE_PID); spin_unlock_irqrestore(&t->sighand->siglock, flags); return ret; @@ -1567,7 +1565,7 @@ int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, if (sig) { if (lock_task_sighand(p, &flags)) { - ret = __send_signal(sig, &info, p, PIDTYPE_TGID, false); + ret = __send_signal_locked(sig, &info, p, PIDTYPE_TGID, false); unlock_task_sighand(p, &flags); } else ret = -ESRCH; @@ -2114,7 +2112,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) * parent's namespaces. */ if (valid_signal(sig) && sig) - __send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false); + __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false); __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); @@ -2184,7 +2182,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, spin_lock_irqsave(&sighand->siglock, flags); if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) - __group_send_sig_info(SIGCHLD, &info, parent); + send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID); /* * Even if SIGCHLD is not generated, we must wake up wait4 calls. */ @@ -2204,13 +2202,12 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, * with. If the code did not stop because the tracer is gone, * the stop signal remains unchanged unless clear_code. */ -static int ptrace_stop(int exit_code, int why, int clear_code, - unsigned long message, kernel_siginfo_t *info) +static int ptrace_stop(int exit_code, int why, unsigned long message, + kernel_siginfo_t *info) __releases(¤t->sighand->siglock) __acquires(¤t->sighand->siglock) { bool gstop_done = false; - bool read_code = true; if (arch_ptrace_stop_needed()) { /* @@ -2227,10 +2224,16 @@ static int ptrace_stop(int exit_code, int why, int clear_code, } /* - * schedule() will not sleep if there is a pending signal that - * can awaken the task. + * After this point ptrace_signal_wake_up or signal_wake_up + * will clear TASK_TRACED if ptrace_unlink happens or a fatal + * signal comes in. Handle previous ptrace_unlinks and fatal + * signals here to prevent ptrace_stop sleeping in schedule. */ + if (!current->ptrace || __fatal_signal_pending(current)) + return exit_code; + set_special_state(TASK_TRACED); + current->jobctl |= JOBCTL_TRACED; /* * We're committing to trapping. TRACED should be visible before @@ -2276,54 +2279,33 @@ static int ptrace_stop(int exit_code, int why, int clear_code, spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); - if (likely(current->ptrace)) { - /* - * Notify parents of the stop. - * - * While ptraced, there are two parents - the ptracer and - * the real_parent of the group_leader. The ptracer should - * know about every stop while the real parent is only - * interested in the completion of group stop. The states - * for the two don't interact with each other. Notify - * separately unless they're gonna be duplicates. - */ + /* + * Notify parents of the stop. + * + * While ptraced, there are two parents - the ptracer and + * the real_parent of the group_leader. The ptracer should + * know about every stop while the real parent is only + * interested in the completion of group stop. The states + * for the two don't interact with each other. Notify + * separately unless they're gonna be duplicates. + */ + if (current->ptrace) do_notify_parent_cldstop(current, true, why); - if (gstop_done && ptrace_reparented(current)) - do_notify_parent_cldstop(current, false, why); + if (gstop_done && (!current->ptrace || ptrace_reparented(current))) + do_notify_parent_cldstop(current, false, why); - /* - * Don't want to allow preemption here, because - * sys_ptrace() needs this task to be inactive. - * - * XXX: implement read_unlock_no_resched(). - */ - preempt_disable(); - read_unlock(&tasklist_lock); - cgroup_enter_frozen(); - preempt_enable_no_resched(); - freezable_schedule(); - cgroup_leave_frozen(true); - } else { - /* - * By the time we got the lock, our tracer went away. - * Don't drop the lock yet, another tracer may come. - * - * If @gstop_done, the ptracer went away between group stop - * completion and here. During detach, it would have set - * JOBCTL_STOP_PENDING on us and we'll re-enter - * TASK_STOPPED in do_signal_stop() on return, so notifying - * the real parent of the group stop completion is enough. - */ - if (gstop_done) - do_notify_parent_cldstop(current, false, why); - - /* tasklist protects us from ptrace_freeze_traced() */ - __set_current_state(TASK_RUNNING); - read_code = false; - if (clear_code) - exit_code = 0; - read_unlock(&tasklist_lock); - } + /* + * Don't want to allow preemption here, because + * sys_ptrace() needs this task to be inactive. + * + * XXX: implement read_unlock_no_resched(). + */ + preempt_disable(); + read_unlock(&tasklist_lock); + cgroup_enter_frozen(); + preempt_enable_no_resched(); + freezable_schedule(); + cgroup_leave_frozen(true); /* * We are back. Now reacquire the siglock before touching @@ -2331,14 +2313,13 @@ static int ptrace_stop(int exit_code, int why, int clear_code, * any signal-sending on another CPU that wants to examine it. */ spin_lock_irq(¤t->sighand->siglock); - if (read_code) - exit_code = current->exit_code; + exit_code = current->exit_code; current->last_siginfo = NULL; current->ptrace_message = 0; current->exit_code = 0; /* LISTENING can be set only during STOP traps, clear it */ - current->jobctl &= ~JOBCTL_LISTENING; + current->jobctl &= ~(JOBCTL_LISTENING | JOBCTL_PTRACE_FROZEN); /* * Queued signals ignored us while we were stopped for tracing. @@ -2360,7 +2341,7 @@ static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long mes info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); /* Let the debugger run. */ - return ptrace_stop(exit_code, why, 1, message, &info); + return ptrace_stop(exit_code, why, message, &info); } int ptrace_notify(int exit_code, unsigned long message) @@ -2471,6 +2452,7 @@ static bool do_signal_stop(int signr) if (task_participate_group_stop(current)) notify = CLD_STOPPED; + current->jobctl |= JOBCTL_STOPPED; set_special_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); @@ -2532,7 +2514,7 @@ static void do_jobctl_trap(void) CLD_STOPPED, 0); } else { WARN_ON_ONCE(!signr); - ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL); + ptrace_stop(signr, CLD_STOPPED, 0, NULL); } } @@ -2585,7 +2567,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) * comment in dequeue_signal(). */ current->jobctl |= JOBCTL_STOP_DEQUEUED; - signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info); + signr = ptrace_stop(signr, CLD_TRAPPED, 0, info); /* We're back. Did the debugger cancel the sig? */ if (signr == 0) @@ -2612,7 +2594,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) /* If the (new) signal is now blocked, requeue it. */ if (sigismember(¤t->blocked, signr) || fatal_signal_pending(current)) { - send_signal(signr, info, current, type); + send_signal_locked(signr, info, current, type); signr = 0; } @@ -4807,7 +4789,7 @@ void kdb_send_sig(struct task_struct *t, int sig) "the deadlock.\n"); return; } - ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); + ret = send_signal_locked(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); spin_unlock(&t->sighand->siglock); if (ret) kdb_printf("Fail to deliver Signal %d to process %d.\n", diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 0a97193984db..cb925e8ef9a8 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -870,7 +870,7 @@ static inline void check_dl_overrun(struct task_struct *tsk) { if (tsk->dl.dl_overrun) { tsk->dl.dl_overrun = 0; - __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); + send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); } } @@ -884,7 +884,7 @@ static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) rt ? "RT" : "CPU", hard ? "hard" : "soft", current->comm, task_pid_nr(current)); } - __group_send_sig_info(signo, SEND_SIG_PRIV, current); + send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID); return true; } @@ -958,7 +958,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, trace_itimer_expire(signo == SIGPROF ? ITIMER_PROF : ITIMER_VIRTUAL, task_tgid(tsk), cur_time); - __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); + send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); } if (it->expires && it->expires < *expires) |