summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-24 12:37:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-24 12:37:24 -0700
commitdc8af1ffd657c90733088e0093c7990305b2b4e9 (patch)
treebc48bacb34bb92f3b31146f07a3dfd0c2eb2cbb6
parent0bf13a84362e750a90008af259b098d7c0e0755b (diff)
parent5e91d2a4146946ea0abc984ca957f12b70632901 (diff)
Merge tag 'seccomp-v5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull seccomp updates from Kees Cook: - Rework USER_NOTIF notification ordering and kill logic (Sargun Dhillon) - Improved PTRACE_O_SUSPEND_SECCOMP selftest (Jann Horn) - Gracefully handle failed unshare() in selftests (Yang Guang) - Spelling fix (Colin Ian King) * tag 'seccomp-v5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: selftests/seccomp: Fix spelling mistake "Coud" -> "Could" selftests/seccomp: Add test for wait killable notifier selftests/seccomp: Refactor get_proc_stat to split out file reading code seccomp: Add wait_killable semantic to seccomp user notifier selftests/seccomp: Ensure that notifications come in FIFO order seccomp: Use FIFO semantics to order notifications selftests/seccomp: Add SKIP for failed unshare() selftests/seccomp: Test PTRACE_O_SUSPEND_SECCOMP without CAP_SYS_ADMIN
-rw-r--r--Documentation/userspace-api/seccomp_filter.rst10
-rw-r--r--include/linux/seccomp.h3
-rw-r--r--include/uapi/linux/seccomp.h2
-rw-r--r--kernel/seccomp.c44
-rw-r--r--tools/testing/selftests/seccomp/Makefile1
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c427
6 files changed, 482 insertions, 5 deletions
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 539e9d4a4860..d1e2b9193f09 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -271,6 +271,16 @@ notifying process it will be replaced. The supervisor can also add an FD, and
respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
value will be the injected file descriptor number.
+The notifying process can be preempted, resulting in the notification being
+aborted. This can be problematic when trying to take actions on behalf of the
+notifying process that are long-running and typically retryable (mounting a
+filesytem). Alternatively, at filter installation time, the
+``SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV`` flag can be set. This flag makes it
+such that when a user notification is received by the supervisor, the notifying
+process will ignore non-fatal signals until the response is sent. Signals that
+are sent prior to the notification being received by userspace are handled
+normally.
+
It is worth noting that ``struct seccomp_data`` contains the values of register
arguments to the syscall, but does not contain pointers to memory. The task's
memory is accessible to suitably privileged traces via ``ptrace()`` or
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 0c564e5d40ff..d31d76be4982 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -8,7 +8,8 @@
SECCOMP_FILTER_FLAG_LOG | \
SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
SECCOMP_FILTER_FLAG_NEW_LISTENER | \
- SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH | \
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
/* sizeof() the first published struct seccomp_notif_addfd */
#define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 78074254ab98..0fdc6ef02b94 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -23,6 +23,8 @@
#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
+/* Received notifications wait in killable state (only respond to fatal signals) */
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
/*
* All BPF programs must return a 32-bit value.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b5ac87f6dbd4..e9852d1b4a5e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -200,6 +200,8 @@ static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
* the filter can be freed.
* @cache: cache of arch/syscall mappings to actions
* @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
+ * @wait_killable_recv: Put notifying process in killable state once the
+ * notification is received by the userspace listener.
* @prev: points to a previously installed, or inherited, filter
* @prog: the BPF program to evaluate
* @notif: the struct that holds all notification related information
@@ -220,6 +222,7 @@ struct seccomp_filter {
refcount_t refs;
refcount_t users;
bool log;
+ bool wait_killable_recv;
struct action_cache cache;
struct seccomp_filter *prev;
struct bpf_prog *prog;
@@ -893,6 +896,10 @@ static long seccomp_attach_filter(unsigned int flags,
if (flags & SECCOMP_FILTER_FLAG_LOG)
filter->log = true;
+ /* Set wait killable flag, if present. */
+ if (flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
+ filter->wait_killable_recv = true;
+
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
@@ -1080,6 +1087,12 @@ static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_kn
complete(&addfd->completion);
}
+static bool should_sleep_killable(struct seccomp_filter *match,
+ struct seccomp_knotif *n)
+{
+ return match->wait_killable_recv && n->state == SECCOMP_NOTIFY_SENT;
+}
+
static int seccomp_do_user_notification(int this_syscall,
struct seccomp_filter *match,
const struct seccomp_data *sd)
@@ -1100,7 +1113,7 @@ static int seccomp_do_user_notification(int this_syscall,
n.data = sd;
n.id = seccomp_next_notify_id(match);
init_completion(&n.ready);
- list_add(&n.list, &match->notif->notifications);
+ list_add_tail(&n.list, &match->notif->notifications);
INIT_LIST_HEAD(&n.addfd);
up(&match->notif->request);
@@ -1110,11 +1123,25 @@ static int seccomp_do_user_notification(int this_syscall,
* This is where we wait for a reply from userspace.
*/
do {
+ bool wait_killable = should_sleep_killable(match, &n);
+
mutex_unlock(&match->notify_lock);
- err = wait_for_completion_interruptible(&n.ready);
+ if (wait_killable)
+ err = wait_for_completion_killable(&n.ready);
+ else
+ err = wait_for_completion_interruptible(&n.ready);
mutex_lock(&match->notify_lock);
- if (err != 0)
+
+ if (err != 0) {
+ /*
+ * Check to see if the notifcation got picked up and
+ * whether we should switch to wait killable.
+ */
+ if (!wait_killable && should_sleep_killable(match, &n))
+ continue;
+
goto interrupted;
+ }
addfd = list_first_entry_or_null(&n.addfd,
struct seccomp_kaddfd, list);
@@ -1484,6 +1511,9 @@ out:
mutex_lock(&filter->notify_lock);
knotif = find_notification(filter, unotif.id);
if (knotif) {
+ /* Reset the process to make sure it's not stuck */
+ if (should_sleep_killable(filter, knotif))
+ complete(&knotif->ready);
knotif->state = SECCOMP_NOTIFY_INIT;
up(&filter->notif->request);
}
@@ -1829,6 +1859,14 @@ static long seccomp_set_mode_filter(unsigned int flags,
((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
return -EINVAL;
+ /*
+ * The SECCOMP_FILTER_FLAG_WAIT_KILLABLE_SENT flag doesn't make sense
+ * without the SECCOMP_FILTER_FLAG_NEW_LISTENER flag.
+ */
+ if ((flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) &&
+ ((flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) == 0))
+ return -EINVAL;
+
/* Prepare the new filter before holding any locks. */
prepared = seccomp_prepare_user_filter(filter);
if (IS_ERR(prepared))
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index 585f7a0c10cb..f017c382c036 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/
LDFLAGS += -lpthread
+LDLIBS += -lcap
TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
include ../lib.mk
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 313bb0cbfb1e..136df5b76319 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -46,6 +46,7 @@
#include <sys/ioctl.h>
#include <linux/kcmp.h>
#include <sys/resource.h>
+#include <sys/capability.h>
#include <unistd.h>
#include <sys/syscall.h>
@@ -59,6 +60,8 @@
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
#endif
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
#ifndef PR_SET_PTRACER
# define PR_SET_PTRACER 0x59616d61
#endif
@@ -268,6 +271,10 @@ struct seccomp_notif_addfd_big {
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
#endif
+#ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -3742,7 +3749,10 @@ TEST(user_notification_fault_recv)
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
- ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
+ ASSERT_EQ(unshare(CLONE_NEWUSER), 0) {
+ if (errno == EINVAL)
+ SKIP(return, "kernel missing CLONE_NEWUSER support");
+ }
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
@@ -4231,6 +4241,421 @@ TEST(user_notification_addfd_rlimit)
close(memfd);
}
+/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
+FIXTURE(O_SUSPEND_SECCOMP) {
+ pid_t pid;
+};
+
+FIXTURE_SETUP(O_SUSPEND_SECCOMP)
+{
+ ERRNO_FILTER(block_read, E2BIG);
+ cap_value_t cap_list[] = { CAP_SYS_ADMIN };
+ cap_t caps;
+
+ self->pid = 0;
+
+ /* make sure we don't have CAP_SYS_ADMIN */
+ caps = cap_get_proc();
+ ASSERT_NE(NULL, caps);
+ ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
+ ASSERT_EQ(0, cap_set_proc(caps));
+ cap_free(caps);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read));
+
+ self->pid = fork();
+ ASSERT_GE(self->pid, 0);
+
+ if (self->pid == 0) {
+ while (1)
+ pause();
+ _exit(127);
+ }
+}
+
+FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
+{
+ if (self->pid)
+ kill(self->pid, SIGKILL);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, setoptions)
+{
+ int wstatus;
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
+ ASSERT_EQ(self->pid, wait(&wstatus));
+ ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP));
+ if (errno == EINVAL)
+ SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+ ASSERT_EQ(EPERM, errno);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, seize)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
+ ASSERT_EQ(-1, ret);
+ if (errno == EINVAL)
+ SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+ ASSERT_EQ(EPERM, errno);
+}
+
+/*
+ * get_nth - Get the nth, space separated entry in a file.
+ *
+ * Returns the length of the read field.
+ * Throws error if field is zero-lengthed.
+ */
+static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
+ const unsigned int position, char **entry)
+{
+ char *line = NULL;
+ unsigned int i;
+ ssize_t nread;
+ size_t len = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ ASSERT_NE(f, NULL) {
+ TH_LOG("Could not open %s: %s", path, strerror(errno));
+ }
+
+ for (i = 0; i < position; i++) {
+ nread = getdelim(&line, &len, ' ', f);
+ ASSERT_GE(nread, 0) {
+ TH_LOG("Failed to read %d entry in file %s", i, path);
+ }
+ }
+ fclose(f);
+
+ ASSERT_GT(nread, 0) {
+ TH_LOG("Entry in file %s had zero length", path);
+ }
+
+ *entry = line;
+ return nread - 1;
+}
+
+/* For a given PID, get the task state (D, R, etc...) */
+static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
+{
+ char proc_path[100] = {0};
+ char status;
+ char *line;
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
+ ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1);
+
+ status = *line;
+ free(line);
+
+ return status;
+}
+
+TEST(user_notification_fifo)
+{
+ struct seccomp_notif_resp resp = {};
+ struct seccomp_notif req = {};
+ int i, status, listener;
+ pid_t pid, pids[3];
+ __u64 baseid;
+ long ret;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Setup a listener */
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ baseid = req.id + 1;
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+
+ /* check that we make sure flags == 0 */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ /* Start children, and generate notifications */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ pid = fork();
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+ pids[i] = pid;
+ }
+
+ /* This spins until all of the children are sleeping */
+restart_wait:
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ if (get_proc_stat(_metadata, pids[i]) != 'S') {
+ nanosleep(&delay, NULL);
+ goto restart_wait;
+ }
+ }
+
+ /* Read the notifications in order (and respond) */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ memset(&req, 0, sizeof(req));
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ EXPECT_EQ(req.id, baseid + i);
+ resp.id = req.id;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+ }
+
+ /* Make sure notifications were received */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+ }
+}
+
+/* get_proc_syscall - Get the syscall in progress for a given pid
+ *
+ * Returns the current syscall number for a given process
+ * Returns -1 if not in syscall (running or blocked)
+ */
+static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
+{
+ char proc_path[100] = {0};
+ long ret = -1;
+ ssize_t nread;
+ char *line;
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid);
+ nread = get_nth(_metadata, proc_path, 1, &line);
+ ASSERT_GT(nread, 0);
+
+ if (!strncmp("running", line, MIN(7, nread)))
+ ret = strtol(line, NULL, 16);
+
+ free(line);
+ return ret;
+}
+
+/* Ensure non-fatal signals prior to receive are unmodified */
+TEST(user_notification_wait_killable_pre_notification)
+{
+ struct sigaction new_action = {
+ .sa_handler = signal_handler,
+ };
+ int listener, status, sk_pair[2];
+ pid_t pid;
+ long ret;
+ char c;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ /*
+ * Check that we can kill the process with SIGUSR1 prior to receiving
+ * the notification. SIGUSR1 is wired up to a custom signal handler,
+ * and make sure it gets called.
+ */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(sk_pair[0]);
+ handled = sk_pair[1];
+
+ /* Setup the non-fatal sigaction without SA_RESTART */
+ if (sigaction(SIGUSR1, &new_action, NULL)) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ ret = syscall(__NR_getppid);
+ /* Make sure we got a return from a signal interruption */
+ exit(ret != -1 || errno != EINTR);
+ }
+
+ /*
+ * Make sure we've gotten to the seccomp user notification wait
+ * from getppid prior to sending any signals
+ */
+ while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
+ get_proc_stat(_metadata, pid) != 'S')
+ nanosleep(&delay, NULL);
+
+ /* Send non-fatal kill signal */
+ EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+ /* wait for process to exit (exit checks for EINTR) */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+}
+
+/* Ensure non-fatal signals after receive are blocked */
+TEST(user_notification_wait_killable)
+{
+ struct sigaction new_action = {
+ .sa_handler = signal_handler,
+ };
+ struct seccomp_notif_resp resp = {};
+ struct seccomp_notif req = {};
+ int listener, status, sk_pair[2];
+ pid_t pid;
+ long ret;
+ char c;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(sk_pair[0]);
+ handled = sk_pair[1];
+
+ /* Setup the sigaction without SA_RESTART */
+ if (sigaction(SIGUSR1, &new_action, NULL)) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ /* Make sure that the syscall is completed (no EINTR) */
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+
+ /*
+ * Get the notification, to make move the notifying process into a
+ * non-preemptible (TASK_KILLABLE) state.
+ */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ /* Send non-fatal kill signal */
+ EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+ /*
+ * Make sure the task enters moves to TASK_KILLABLE by waiting for
+ * D (Disk Sleep) state after receiving non-fatal signal.
+ */
+ while (get_proc_stat(_metadata, pid) != 'D')
+ nanosleep(&delay, NULL);
+
+ resp.id = req.id;
+ resp.val = USER_NOTIF_MAGIC;
+ /* Make sure the notification is found and able to be replied to */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /*
+ * Make sure that the signal handler does get called once we're back in
+ * userspace.
+ */
+ EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+ /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+/* Ensure fatal signals after receive are not blocked */
+TEST(user_notification_wait_killable_fatal)
+{
+ struct seccomp_notif req = {};
+ int listener, status;
+ pid_t pid;
+ long ret;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* This should never complete as it should get a SIGTERM */
+ syscall(__NR_getppid);
+ exit(1);
+ }
+
+ while (get_proc_stat(_metadata, pid) != 'S')
+ nanosleep(&delay, NULL);
+
+ /*
+ * Get the notification, to make move the notifying process into a
+ * non-preemptible (TASK_KILLABLE) state.
+ */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ /* Kill the process with a fatal signal */
+ EXPECT_EQ(kill(pid, SIGTERM), 0);
+
+ /*
+ * Wait for the process to exit, and make sure the process terminated
+ * due to the SIGTERM signal.
+ */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFSIGNALED(status));
+ EXPECT_EQ(SIGTERM, WTERMSIG(status));
+}
+
/*
* TODO:
* - expand NNP testing