summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-16 13:12:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-16 13:12:16 -0700
commit1ca995edf838a70c7c0aba2de7fc6da57e22cbf3 (patch)
tree9c353521b749b26625bd04d69933efcd71939d19
parent72fda6c8e553699f6ba8d3ddc34f0bbe7a5898df (diff)
parentf0c508faea645da58d6ae6b644a1b68020d5a9d2 (diff)
Merge tag 'seccomp-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull seccomp updates from Kees Cook: - interrupt SECCOMP_IOCTL_NOTIF_RECV when all users exit (Andrei Vagin) - Update selftests to check for expected NOTIF_RECV exits (Andrei Vagin) * tag 'seccomp-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: selftests/seccomp: check that a zombie leader doesn't affect others selftests/seccomp: add test for NOTIF_RECV and unused filters seccomp: release task filters when the task exits seccomp: interrupt SECCOMP_IOCTL_NOTIF_RECV when all users have exited
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/seccomp.c30
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c131
3 files changed, 157 insertions, 7 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 81fcee45d630..be81342caf1b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -277,7 +277,6 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
- seccomp_filter_release(p);
proc_flush_pid(thread_pid);
put_pid(thread_pid);
release_thread(p);
@@ -834,6 +833,8 @@ void __noreturn do_exit(long code)
io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */
+ seccomp_filter_release(tsk);
+
acct_update_integrals(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index e30b60b57614..dc51e521bc1d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -502,6 +502,9 @@ static inline pid_t seccomp_can_sync_threads(void)
/* Skip current, since it is initiating the sync. */
if (thread == caller)
continue;
+ /* Skip exited threads. */
+ if (thread->flags & PF_EXITING)
+ continue;
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
(thread->seccomp.mode == SECCOMP_MODE_FILTER &&
@@ -563,18 +566,21 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)
* @tsk: task the filter should be released from.
*
* This function should only be called when the task is exiting as
- * it detaches it from its filter tree. As such, READ_ONCE() and
- * barriers are not needed here, as would normally be needed.
+ * it detaches it from its filter tree. PF_EXITING has to be set
+ * for the task.
*/
void seccomp_filter_release(struct task_struct *tsk)
{
- struct seccomp_filter *orig = tsk->seccomp.filter;
+ struct seccomp_filter *orig;
- /* We are effectively holding the siglock by not having any sighand. */
- WARN_ON(tsk->sighand != NULL);
+ if (WARN_ON((tsk->flags & PF_EXITING) == 0))
+ return;
+ spin_lock_irq(&tsk->sighand->siglock);
+ orig = tsk->seccomp.filter;
/* Detach task from its filter tree. */
tsk->seccomp.filter = NULL;
+ spin_unlock_irq(&tsk->sighand->siglock);
__seccomp_filter_release(orig);
}
@@ -602,6 +608,13 @@ static inline void seccomp_sync_threads(unsigned long flags)
if (thread == caller)
continue;
+ /*
+ * Skip exited threads. seccomp_filter_release could have
+ * been already called for this task.
+ */
+ if (thread->flags & PF_EXITING)
+ continue;
+
/* Get a task reference for the new leaf node. */
get_seccomp_filter(caller);
@@ -1466,7 +1479,7 @@ static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int s
void *key)
{
/* Avoid a wakeup if event not interesting for us. */
- if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
+ if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR | EPOLLHUP)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
@@ -1476,6 +1489,9 @@ static int recv_wait_event(struct seccomp_filter *filter)
DEFINE_WAIT_FUNC(wait, recv_wake_function);
int ret;
+ if (refcount_read(&filter->users) == 0)
+ return 0;
+
if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
return 0;
@@ -1484,6 +1500,8 @@ static int recv_wait_event(struct seccomp_filter *filter)
if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
break;
+ if (refcount_read(&filter->users) == 0)
+ break;
if (ret)
return ret;
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 783ebce8c4de..e3f97f90d8db 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -3954,6 +3954,60 @@ TEST(user_notification_filter_empty)
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
}
+TEST(user_ioctl_notification_filter_empty)
+{
+ pid_t pid;
+ long ret;
+ int status, p[2];
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+ struct seccomp_notif req = {};
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
+ ASSERT_EQ(0, pipe(p));
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int listener;
+
+ listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup2(listener, 200) != 200)
+ _exit(EXIT_FAILURE);
+ close(p[1]);
+ close(listener);
+ sleep(1);
+
+ _exit(EXIT_SUCCESS);
+ }
+ if (read(p[0], &status, 1) != 0)
+ _exit(EXIT_SUCCESS);
+ close(p[0]);
+ /*
+ * The seccomp filter has become unused so we should be notified once
+ * the kernel gets around to cleaning up task struct.
+ */
+ EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1);
+ EXPECT_EQ(errno, ENOENT);
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
static void *do_thread(void *data)
{
return NULL;
@@ -4755,6 +4809,83 @@ TEST(user_notification_wait_killable_fatal)
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}
+struct tsync_vs_thread_leader_args {
+ pthread_t leader;
+};
+
+static void *tsync_vs_dead_thread_leader_sibling(void *_args)
+{
+ struct sock_filter allow_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog allow_prog = {
+ .len = (unsigned short)ARRAY_SIZE(allow_filter),
+ .filter = allow_filter,
+ };
+ struct tsync_vs_thread_leader_args *args = _args;
+ void *retval;
+ long ret;
+
+ ret = pthread_join(args->leader, &retval);
+ if (ret)
+ exit(1);
+ if (retval != _args)
+ exit(2);
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog);
+ if (ret)
+ exit(3);
+
+ exit(0);
+}
+
+/*
+ * Ensure that a dead thread leader doesn't prevent installing new filters with
+ * SECCOMP_FILTER_FLAG_TSYNC from other threads.
+ */
+TEST(tsync_vs_dead_thread_leader)
+{
+ int status;
+ pid_t pid;
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct sock_filter allow_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog allow_prog = {
+ .len = (unsigned short)ARRAY_SIZE(allow_filter),
+ .filter = allow_filter,
+ };
+ struct tsync_vs_thread_leader_args *args;
+ pthread_t sibling;
+
+ args = malloc(sizeof(*args));
+ ASSERT_NE(NULL, args);
+ args->leader = pthread_self();
+
+ ret = pthread_create(&sibling, NULL,
+ tsync_vs_dead_thread_leader_sibling, args);
+ ASSERT_EQ(0, ret);
+
+ /* Install a new filter just to the leader thread. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+ ASSERT_EQ(0, ret);
+ pthread_exit(args);
+ exit(1);
+ }
+
+ EXPECT_EQ(pid, waitpid(pid, &status, 0));
+ EXPECT_EQ(0, status);
+}
+
/*
* TODO:
* - expand NNP testing