summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/audit.h2
-rw-r--r--kernel/auditsc.c12
-rw-r--r--kernel/bpf/devmap.c6
-rw-r--r--kernel/bpf/syscall.c6
-rw-r--r--kernel/bpf/verifier.c10
-rw-r--r--kernel/compat.c23
-rw-r--r--kernel/events/core.c3
-rw-r--r--kernel/extable.c45
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/locking/test-ww_mutex.c2
-rw-r--r--kernel/rcu/tree.c10
-rw-r--r--kernel/sched/wait.c85
-rw-r--r--kernel/seccomp.c321
-rw-r--r--kernel/sysctl_binary.c21
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_events_filter.c2
-rw-r--r--kernel/trace/trace_mmiotrace.c1
-rw-r--r--kernel/trace/trace_stack.c15
-rw-r--r--kernel/trace/trace_syscalls.c2
21 files changed, 484 insertions, 107 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 5b1284370367..5e72af29ab73 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -516,7 +516,7 @@ static void do_acct_process(struct bsd_acct_struct *acct)
if (file_start_write_trylock(file)) {
/* it's been opened O_APPEND, so position is irrelevant */
loff_t pos = 0;
- __kernel_write(file, (char *)&ac, sizeof(acct_t), &pos);
+ __kernel_write(file, &ac, sizeof(acct_t), &pos);
file_end_write(file);
}
out:
diff --git a/kernel/audit.h b/kernel/audit.h
index b331d9b83f63..9b110ae17ee3 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -182,7 +182,7 @@ struct audit_context {
mqd_t mqdes;
size_t msg_len;
unsigned int msg_prio;
- struct timespec abs_timeout;
+ struct timespec64 abs_timeout;
} mq_sendrecv;
struct {
int oflag;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index aac1a41f82bd..ecc23e25c9eb 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1235,11 +1235,11 @@ static void show_special(struct audit_context *context, int *call_panic)
case AUDIT_MQ_SENDRECV:
audit_log_format(ab,
"mqdes=%d msg_len=%zd msg_prio=%u "
- "abs_timeout_sec=%ld abs_timeout_nsec=%ld",
+ "abs_timeout_sec=%lld abs_timeout_nsec=%ld",
context->mq_sendrecv.mqdes,
context->mq_sendrecv.msg_len,
context->mq_sendrecv.msg_prio,
- context->mq_sendrecv.abs_timeout.tv_sec,
+ (long long) context->mq_sendrecv.abs_timeout.tv_sec,
context->mq_sendrecv.abs_timeout.tv_nsec);
break;
case AUDIT_MQ_NOTIFY:
@@ -2083,15 +2083,15 @@ void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
*
*/
void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
- const struct timespec *abs_timeout)
+ const struct timespec64 *abs_timeout)
{
struct audit_context *context = current->audit_context;
- struct timespec *p = &context->mq_sendrecv.abs_timeout;
+ struct timespec64 *p = &context->mq_sendrecv.abs_timeout;
if (abs_timeout)
- memcpy(p, abs_timeout, sizeof(struct timespec));
+ memcpy(p, abs_timeout, sizeof(*p));
else
- memset(p, 0, sizeof(struct timespec));
+ memset(p, 0, sizeof(*p));
context->mq_sendrecv.mqdes = mqdes;
context->mq_sendrecv.msg_len = msg_len;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 959c9a07f318..e093d9a2c4dd 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -75,8 +75,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
{
struct bpf_dtab *dtab;
+ int err = -EINVAL;
u64 cost;
- int err;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -108,6 +108,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (err)
goto free_dtab;
+ err = -ENOMEM;
+
/* A per cpu bitfield with a bit per possible net device */
dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
__alignof__(unsigned long));
@@ -128,7 +130,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
free_dtab:
free_percpu(dtab->flush_needed);
kfree(dtab);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(err);
}
static void dev_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cb17e1cd1d43..25d074920a00 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -186,15 +186,17 @@ static int bpf_map_alloc_id(struct bpf_map *map)
static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
{
+ unsigned long flags;
+
if (do_idr_lock)
- spin_lock_bh(&map_idr_lock);
+ spin_lock_irqsave(&map_idr_lock, flags);
else
__acquire(&map_idr_lock);
idr_remove(&map_idr, map->id);
if (do_idr_lock)
- spin_unlock_bh(&map_idr_lock);
+ spin_unlock_irqrestore(&map_idr_lock, flags);
else
__release(&map_idr_lock);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 477b6932c3c1..b914fbe1383e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2292,7 +2292,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
} else {
if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
- (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
+ (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
+ BPF_CLASS(insn->code) == BPF_ALU64) {
verbose("BPF_END uses reserved fields\n");
return -EINVAL;
}
@@ -4204,7 +4205,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
}
if (insn->imm == BPF_FUNC_redirect_map) {
- u64 addr = (unsigned long)prog;
+ /* Note, we cannot use prog directly as imm as subsequent
+ * rewrites would still change the prog pointer. The only
+ * stable address we can use is aux, which also works with
+ * prog clones during blinding.
+ */
+ u64 addr = (unsigned long)prog->aux;
struct bpf_insn r4_ld[] = {
BPF_LD_IMM64(BPF_REG_4, addr),
*insn,
diff --git a/kernel/compat.c b/kernel/compat.c
index 6f0a0e723a06..772e038d04d9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -200,29 +200,6 @@ int compat_put_timespec(const struct timespec *ts, void __user *uts)
}
EXPORT_SYMBOL_GPL(compat_put_timespec);
-int compat_convert_timespec(struct timespec __user **kts,
- const void __user *cts)
-{
- struct timespec ts;
- struct timespec __user *uts;
-
- if (!cts || COMPAT_USE_64BIT_TIME) {
- *kts = (struct timespec __user *)cts;
- return 0;
- }
-
- uts = compat_alloc_user_space(sizeof(ts));
- if (!uts)
- return -EFAULT;
- if (compat_get_timespec(&ts, cts))
- return -EFAULT;
- if (copy_to_user(uts, &ts, sizeof(ts)))
- return -EFAULT;
-
- *kts = uts;
- return 0;
-}
-
int get_compat_itimerval(struct itimerval *o, const struct compat_itimerval __user *i)
{
struct compat_itimerval v32;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3e691b75b2db..6bc21e202ae4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8171,6 +8171,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
}
}
event->tp_event->prog = prog;
+ event->tp_event->bpf_prog_owner = event;
return 0;
}
@@ -8185,7 +8186,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
return;
prog = event->tp_event->prog;
- if (prog) {
+ if (prog && event->tp_event->bpf_prog_owner == event) {
event->tp_event->prog = NULL;
bpf_prog_put(prog);
}
diff --git a/kernel/extable.c b/kernel/extable.c
index 38c2412401a1..9aa1cc41ecf7 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -102,15 +102,7 @@ int core_kernel_data(unsigned long addr)
int __kernel_text_address(unsigned long addr)
{
- if (core_kernel_text(addr))
- return 1;
- if (is_module_text_address(addr))
- return 1;
- if (is_ftrace_trampoline(addr))
- return 1;
- if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
- return 1;
- if (is_bpf_text_address(addr))
+ if (kernel_text_address(addr))
return 1;
/*
* There might be init symbols in saved stacktraces.
@@ -127,17 +119,42 @@ int __kernel_text_address(unsigned long addr)
int kernel_text_address(unsigned long addr)
{
+ bool no_rcu;
+ int ret = 1;
+
if (core_kernel_text(addr))
return 1;
+
+ /*
+ * If a stack dump happens while RCU is not watching, then
+ * RCU needs to be notified that it requires to start
+ * watching again. This can happen either by tracing that
+ * triggers a stack trace, or a WARN() that happens during
+ * coming back from idle, or cpu on or offlining.
+ *
+ * is_module_text_address() as well as the kprobe slots
+ * and is_bpf_text_address() require RCU to be watching.
+ */
+ no_rcu = !rcu_is_watching();
+
+ /* Treat this like an NMI as it can happen anywhere */
+ if (no_rcu)
+ rcu_nmi_enter();
+
if (is_module_text_address(addr))
- return 1;
+ goto out;
if (is_ftrace_trampoline(addr))
- return 1;
+ goto out;
if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
- return 1;
+ goto out;
if (is_bpf_text_address(addr))
- return 1;
- return 0;
+ goto out;
+ ret = 0;
+out:
+ if (no_rcu)
+ rcu_nmi_exit();
+
+ return ret;
}
/*
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f51b7b6d2451..6fc89fd93824 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -202,7 +202,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
irqd_clr_managed_shutdown(d);
- if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) {
+ if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) {
/*
* Catch code which fiddles with enable_irq() on a managed
* and potentially shutdown IRQ. Chained interrupt
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 39f56c870051..0e4cd64ad2c0 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -362,7 +362,7 @@ static int *get_random_order(int count)
int *order;
int n, r, tmp;
- order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY);
+ order = kmalloc_array(count, sizeof(*order), GFP_KERNEL);
if (!order)
return order;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1250e4bd4b85..0c44c7b42e6d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -882,6 +882,11 @@ void rcu_irq_exit(void)
RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
+
+ /* Page faults can happen in NMI handlers, so check... */
+ if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+ return;
+
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
rdtp->dynticks_nesting < 1);
if (rdtp->dynticks_nesting <= 1) {
@@ -1015,6 +1020,11 @@ void rcu_irq_enter(void)
RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
+
+ /* Page faults can happen in NMI handlers, so check... */
+ if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+ return;
+
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index d6afed6d0752..98feab7933c7 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -53,6 +53,12 @@ void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry
}
EXPORT_SYMBOL(remove_wait_queue);
+/*
+ * Scan threshold to break wait queue walk.
+ * This allows a waker to take a break from holding the
+ * wait queue lock during the wait queue walk.
+ */
+#define WAITQUEUE_WALK_BREAK_CNT 64
/*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
@@ -63,18 +69,67 @@ EXPORT_SYMBOL(remove_wait_queue);
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
-static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
- int nr_exclusive, int wake_flags, void *key)
+static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
+ int nr_exclusive, int wake_flags, void *key,
+ wait_queue_entry_t *bookmark)
{
wait_queue_entry_t *curr, *next;
+ int cnt = 0;
+
+ if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
+ curr = list_next_entry(bookmark, entry);
+
+ list_del(&bookmark->entry);
+ bookmark->flags = 0;
+ } else
+ curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
- list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
+ if (&curr->entry == &wq_head->head)
+ return nr_exclusive;
+
+ list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
unsigned flags = curr->flags;
- int ret = curr->func(curr, mode, wake_flags, key);
+ int ret;
+
+ if (flags & WQ_FLAG_BOOKMARK)
+ continue;
+
+ ret = curr->func(curr, mode, wake_flags, key);
if (ret < 0)
break;
if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
+
+ if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) &&
+ (&next->entry != &wq_head->head)) {
+ bookmark->flags = WQ_FLAG_BOOKMARK;
+ list_add_tail(&bookmark->entry, &next->entry);
+ break;
+ }
+ }
+ return nr_exclusive;
+}
+
+static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
+ int nr_exclusive, int wake_flags, void *key)
+{
+ unsigned long flags;
+ wait_queue_entry_t bookmark;
+
+ bookmark.flags = 0;
+ bookmark.private = NULL;
+ bookmark.func = NULL;
+ INIT_LIST_HEAD(&bookmark.entry);
+
+ spin_lock_irqsave(&wq_head->lock, flags);
+ nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark);
+ spin_unlock_irqrestore(&wq_head->lock, flags);
+
+ while (bookmark.flags & WQ_FLAG_BOOKMARK) {
+ spin_lock_irqsave(&wq_head->lock, flags);
+ nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
+ wake_flags, key, &bookmark);
+ spin_unlock_irqrestore(&wq_head->lock, flags);
}
}
@@ -91,11 +146,7 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key)
{
- unsigned long flags;
-
- spin_lock_irqsave(&wq_head->lock, flags);
- __wake_up_common(wq_head, mode, nr_exclusive, 0, key);
- spin_unlock_irqrestore(&wq_head->lock, flags);
+ __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
}
EXPORT_SYMBOL(__wake_up);
@@ -104,16 +155,23 @@ EXPORT_SYMBOL(__wake_up);
*/
void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
{
- __wake_up_common(wq_head, mode, nr, 0, NULL);
+ __wake_up_common(wq_head, mode, nr, 0, NULL, NULL);
}
EXPORT_SYMBOL_GPL(__wake_up_locked);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
{
- __wake_up_common(wq_head, mode, 1, 0, key);
+ __wake_up_common(wq_head, mode, 1, 0, key, NULL);
}
EXPORT_SYMBOL_GPL(__wake_up_locked_key);
+void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
+ unsigned int mode, void *key, wait_queue_entry_t *bookmark)
+{
+ __wake_up_common(wq_head, mode, 1, 0, key, bookmark);
+}
+EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
+
/**
* __wake_up_sync_key - wake up threads blocked on a waitqueue.
* @wq_head: the waitqueue
@@ -134,7 +192,6 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key);
void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key)
{
- unsigned long flags;
int wake_flags = 1; /* XXX WF_SYNC */
if (unlikely(!wq_head))
@@ -143,9 +200,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
if (unlikely(nr_exclusive != 1))
wake_flags = 0;
- spin_lock_irqsave(&wq_head->lock, flags);
- __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key);
- spin_unlock_irqrestore(&wq_head->lock, flags);
+ __wake_up_common_lock(wq_head, mode, nr_exclusive, wake_flags, key);
}
EXPORT_SYMBOL_GPL(__wake_up_sync_key);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 98b59b5db90b..c24579dfa7a1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -17,11 +17,13 @@
#include <linux/audit.h>
#include <linux/compat.h>
#include <linux/coredump.h>
+#include <linux/kmemleak.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/seccomp.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
+#include <linux/sysctl.h>
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
#include <asm/syscall.h>
@@ -42,6 +44,7 @@
* get/put helpers should be used when accessing an instance
* outside of a lifetime-guarded section. In general, this
* is only needed for handling filters shared across tasks.
+ * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
* @prev: points to a previously installed, or inherited, filter
* @prog: the BPF program to evaluate
*
@@ -57,6 +60,7 @@
*/
struct seccomp_filter {
refcount_t usage;
+ bool log;
struct seccomp_filter *prev;
struct bpf_prog *prog;
};
@@ -171,10 +175,15 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
/**
* seccomp_run_filters - evaluates all seccomp filters against @sd
* @sd: optional seccomp data to be passed to filters
+ * @match: stores struct seccomp_filter that resulted in the return value,
+ * unless filter returned SECCOMP_RET_ALLOW, in which case it will
+ * be unchanged.
*
* Returns valid seccomp BPF response codes.
*/
-static u32 seccomp_run_filters(const struct seccomp_data *sd)
+#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
+static u32 seccomp_run_filters(const struct seccomp_data *sd,
+ struct seccomp_filter **match)
{
struct seccomp_data sd_local;
u32 ret = SECCOMP_RET_ALLOW;
@@ -184,7 +193,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
/* Ensure unexpected behavior doesn't result in failing open. */
if (unlikely(WARN_ON(f == NULL)))
- return SECCOMP_RET_KILL;
+ return SECCOMP_RET_KILL_PROCESS;
if (!sd) {
populate_seccomp_data(&sd_local);
@@ -198,8 +207,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
for (; f; f = f->prev) {
u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
- if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
+ if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
ret = cur_ret;
+ *match = f;
+ }
}
return ret;
}
@@ -444,6 +455,10 @@ static long seccomp_attach_filter(unsigned int flags,
return ret;
}
+ /* Set log flag, if present. */
+ if (flags & SECCOMP_FILTER_FLAG_LOG)
+ filter->log = true;
+
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
@@ -514,6 +529,65 @@ static void seccomp_send_sigsys(int syscall, int reason)
}
#endif /* CONFIG_SECCOMP_FILTER */
+/* For use with seccomp_actions_logged */
+#define SECCOMP_LOG_KILL_PROCESS (1 << 0)
+#define SECCOMP_LOG_KILL_THREAD (1 << 1)
+#define SECCOMP_LOG_TRAP (1 << 2)
+#define SECCOMP_LOG_ERRNO (1 << 3)
+#define SECCOMP_LOG_TRACE (1 << 4)
+#define SECCOMP_LOG_LOG (1 << 5)
+#define SECCOMP_LOG_ALLOW (1 << 6)
+
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
+ SECCOMP_LOG_KILL_THREAD |
+ SECCOMP_LOG_TRAP |
+ SECCOMP_LOG_ERRNO |
+ SECCOMP_LOG_TRACE |
+ SECCOMP_LOG_LOG;
+
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
+ bool requested)
+{
+ bool log = false;
+
+ switch (action) {
+ case SECCOMP_RET_ALLOW:
+ break;
+ case SECCOMP_RET_TRAP:
+ log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
+ break;
+ case SECCOMP_RET_ERRNO:
+ log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
+ break;
+ case SECCOMP_RET_TRACE:
+ log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
+ break;
+ case SECCOMP_RET_LOG:
+ log = seccomp_actions_logged & SECCOMP_LOG_LOG;
+ break;
+ case SECCOMP_RET_KILL_THREAD:
+ log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
+ break;
+ case SECCOMP_RET_KILL_PROCESS:
+ default:
+ log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
+ }
+
+ /*
+ * Force an audit message to be emitted when the action is RET_KILL_*,
+ * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
+ * allowed to be logged by the admin.
+ */
+ if (log)
+ return __audit_seccomp(syscall, signr, action);
+
+ /*
+ * Let the audit subsystem decide if the action should be audited based
+ * on whether the current task itself is being audited.
+ */
+ return audit_seccomp(syscall, signr, action);
+}
+
/*
* Secure computing mode 1 allows only read/write/exit/sigreturn.
* To be fully secure this must be combined with rlimit
@@ -539,7 +613,7 @@ static void __secure_computing_strict(int this_syscall)
#ifdef SECCOMP_DEBUG
dump_stack();
#endif
- audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+ seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
do_exit(SIGKILL);
}
@@ -566,6 +640,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
const bool recheck_after_trace)
{
u32 filter_ret, action;
+ struct seccomp_filter *match = NULL;
int data;
/*
@@ -574,9 +649,9 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
*/
rmb();
- filter_ret = seccomp_run_filters(sd);
+ filter_ret = seccomp_run_filters(sd, &match);
data = filter_ret & SECCOMP_RET_DATA;
- action = filter_ret & SECCOMP_RET_ACTION;
+ action = filter_ret & SECCOMP_RET_ACTION_FULL;
switch (action) {
case SECCOMP_RET_ERRNO:
@@ -637,14 +712,25 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
return 0;
+ case SECCOMP_RET_LOG:
+ seccomp_log(this_syscall, 0, action, true);
+ return 0;
+
case SECCOMP_RET_ALLOW:
+ /*
+ * Note that the "match" filter will always be NULL for
+ * this action since SECCOMP_RET_ALLOW is the starting
+ * state in seccomp_run_filters().
+ */
return 0;
- case SECCOMP_RET_KILL:
+ case SECCOMP_RET_KILL_THREAD:
+ case SECCOMP_RET_KILL_PROCESS:
default:
- audit_seccomp(this_syscall, SIGSYS, action);
+ seccomp_log(this_syscall, SIGSYS, action, true);
/* Dump core only if this is the last remaining thread. */
- if (get_nr_threads(current) == 1) {
+ if (action == SECCOMP_RET_KILL_PROCESS ||
+ get_nr_threads(current) == 1) {
siginfo_t info;
/* Show the original registers in the dump. */
@@ -653,13 +739,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
seccomp_init_siginfo(&info, this_syscall, data);
do_coredump(&info);
}
- do_exit(SIGSYS);
+ if (action == SECCOMP_RET_KILL_PROCESS)
+ do_group_exit(SIGSYS);
+ else
+ do_exit(SIGSYS);
}
unreachable();
skip:
- audit_seccomp(this_syscall, 0, action);
+ seccomp_log(this_syscall, 0, action, match ? match->log : false);
return -1;
}
#else
@@ -794,6 +883,29 @@ static inline long seccomp_set_mode_filter(unsigned int flags,
}
#endif
+static long seccomp_get_action_avail(const char __user *uaction)
+{
+ u32 action;
+
+ if (copy_from_user(&action, uaction, sizeof(action)))
+ return -EFAULT;
+
+ switch (action) {
+ case SECCOMP_RET_KILL_PROCESS:
+ case SECCOMP_RET_KILL_THREAD:
+ case SECCOMP_RET_TRAP:
+ case SECCOMP_RET_ERRNO:
+ case SECCOMP_RET_TRACE:
+ case SECCOMP_RET_LOG:
+ case SECCOMP_RET_ALLOW:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
/* Common entry point for both prctl and syscall. */
static long do_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs)
@@ -805,6 +917,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
return seccomp_set_mode_strict();
case SECCOMP_SET_MODE_FILTER:
return seccomp_set_mode_filter(flags, uargs);
+ case SECCOMP_GET_ACTION_AVAIL:
+ if (flags != 0)
+ return -EINVAL;
+
+ return seccomp_get_action_avail(uargs);
default:
return -EINVAL;
}
@@ -922,3 +1039,185 @@ out:
return ret;
}
#endif
+
+#ifdef CONFIG_SYSCTL
+
+/* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
+#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
+#define SECCOMP_RET_TRAP_NAME "trap"
+#define SECCOMP_RET_ERRNO_NAME "errno"
+#define SECCOMP_RET_TRACE_NAME "trace"
+#define SECCOMP_RET_LOG_NAME "log"
+#define SECCOMP_RET_ALLOW_NAME "allow"
+
+static const char seccomp_actions_avail[] =
+ SECCOMP_RET_KILL_PROCESS_NAME " "
+ SECCOMP_RET_KILL_THREAD_NAME " "
+ SECCOMP_RET_TRAP_NAME " "
+ SECCOMP_RET_ERRNO_NAME " "
+ SECCOMP_RET_TRACE_NAME " "
+ SECCOMP_RET_LOG_NAME " "
+ SECCOMP_RET_ALLOW_NAME;
+
+struct seccomp_log_name {
+ u32 log;
+ const char *name;
+};
+
+static const struct seccomp_log_name seccomp_log_names[] = {
+ { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
+ { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
+ { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
+ { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
+ { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+ { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
+ { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
+ { }
+};
+
+static bool seccomp_names_from_actions_logged(char *names, size_t size,
+ u32 actions_logged)
+{
+ const struct seccomp_log_name *cur;
+ bool append_space = false;
+
+ for (cur = seccomp_log_names; cur->name && size; cur++) {
+ ssize_t ret;
+
+ if (!(actions_logged & cur->log))
+ continue;
+
+ if (append_space) {
+ ret = strscpy(names, " ", size);
+ if (ret < 0)
+ return false;
+
+ names += ret;
+ size -= ret;
+ } else
+ append_space = true;
+
+ ret = strscpy(names, cur->name, size);
+ if (ret < 0)
+ return false;
+
+ names += ret;
+ size -= ret;
+ }
+
+ return true;
+}
+
+static bool seccomp_action_logged_from_name(u32 *action_logged,
+ const char *name)
+{
+ const struct seccomp_log_name *cur;
+
+ for (cur = seccomp_log_names; cur->name; cur++) {
+ if (!strcmp(cur->name, name)) {
+ *action_logged = cur->log;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
+{
+ char *name;
+
+ *actions_logged = 0;
+ while ((name = strsep(&names, " ")) && *name) {
+ u32 action_logged = 0;
+
+ if (!seccomp_action_logged_from_name(&action_logged, name))
+ return false;
+
+ *actions_logged |= action_logged;
+ }
+
+ return true;
+}
+
+static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ char names[sizeof(seccomp_actions_avail)];
+ struct ctl_table table;
+ int ret;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ memset(names, 0, sizeof(names));
+
+ if (!write) {
+ if (!seccomp_names_from_actions_logged(names, sizeof(names),
+ seccomp_actions_logged))
+ return -EINVAL;
+ }
+
+ table = *ro_table;
+ table.data = names;
+ table.maxlen = sizeof(names);
+ ret = proc_dostring(&table, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
+ if (write) {
+ u32 actions_logged;
+
+ if (!seccomp_actions_logged_from_names(&actions_logged,
+ table.data))
+ return -EINVAL;
+
+ if (actions_logged & SECCOMP_LOG_ALLOW)
+ return -EINVAL;
+
+ seccomp_actions_logged = actions_logged;
+ }
+
+ return 0;
+}
+
+static struct ctl_path seccomp_sysctl_path[] = {
+ { .procname = "kernel", },
+ { .procname = "seccomp", },
+ { }
+};
+
+static struct ctl_table seccomp_sysctl_table[] = {
+ {
+ .procname = "actions_avail",
+ .data = (void *) &seccomp_actions_avail,
+ .maxlen = sizeof(seccomp_actions_avail),
+ .mode = 0444,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "actions_logged",
+ .mode = 0644,
+ .proc_handler = seccomp_actions_logged_handler,
+ },
+ { }
+};
+
+static int __init seccomp_sysctl_init(void)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
+ if (!hdr)
+ pr_warn("seccomp: sysctl registration failed\n");
+ else
+ kmemleak_not_leak(hdr);
+
+ return 0;
+}
+
+device_initcall(seccomp_sysctl_init)
+
+#endif /* CONFIG_SYSCTL */
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 02e1859f2ca8..58ea8c03662e 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -986,8 +986,9 @@ static ssize_t bin_intvec(struct file *file,
size_t length = oldlen / sizeof(*vec);
char *str, *end;
int i;
+ loff_t pos = 0;
- result = kernel_read(file, 0, buffer, BUFSZ - 1);
+ result = kernel_read(file, buffer, BUFSZ - 1, &pos);
if (result < 0)
goto out_kfree;
@@ -1016,6 +1017,7 @@ static ssize_t bin_intvec(struct file *file,
size_t length = newlen / sizeof(*vec);
char *str, *end;
int i;
+ loff_t pos = 0;
str = buffer;
end = str + BUFSZ;
@@ -1029,7 +1031,7 @@ static ssize_t bin_intvec(struct file *file,
str += scnprintf(str, end - str, "%lu\t", value);
}
- result = kernel_write(file, buffer, str - buffer, 0);
+ result = kernel_write(file, buffer, str - buffer, &pos);
if (result < 0)
goto out_kfree;
}
@@ -1057,8 +1059,9 @@ static ssize_t bin_ulongvec(struct file *file,
size_t length = oldlen / sizeof(*vec);
char *str, *end;
int i;
+ loff_t pos = 0;
- result = kernel_read(file, 0, buffer, BUFSZ - 1);
+ result = kernel_read(file, buffer, BUFSZ - 1, &pos);
if (result < 0)
goto out_kfree;
@@ -1087,6 +1090,7 @@ static ssize_t bin_ulongvec(struct file *file,
size_t length = newlen / sizeof(*vec);
char *str, *end;
int i;
+ loff_t pos = 0;
str = buffer;
end = str + BUFSZ;
@@ -1100,7 +1104,7 @@ static ssize_t bin_ulongvec(struct file *file,
str += scnprintf(str, end - str, "%lu\t", value);
}
- result = kernel_write(file, buffer, str - buffer, 0);
+ result = kernel_write(file, buffer, str - buffer, &pos);
if (result < 0)
goto out_kfree;
}
@@ -1120,8 +1124,9 @@ static ssize_t bin_uuid(struct file *file,
if (oldval && oldlen) {
char buf[UUID_STRING_LEN + 1];
uuid_t uuid;
+ loff_t pos = 0;
- result = kernel_read(file, 0, buf, sizeof(buf) - 1);
+ result = kernel_read(file, buf, sizeof(buf) - 1, &pos);
if (result < 0)
goto out;
@@ -1154,8 +1159,9 @@ static ssize_t bin_dn_node_address(struct file *file,
char buf[15], *nodep;
unsigned long area, node;
__le16 dnaddr;
+ loff_t pos = 0;
- result = kernel_read(file, 0, buf, sizeof(buf) - 1);
+ result = kernel_read(file, buf, sizeof(buf) - 1, &pos);
if (result < 0)
goto out;
@@ -1188,6 +1194,7 @@ static ssize_t bin_dn_node_address(struct file *file,
__le16 dnaddr;
char buf[15];
int len;
+ loff_t pos = 0;
result = -EINVAL;
if (newlen != sizeof(dnaddr))
@@ -1201,7 +1208,7 @@ static ssize_t bin_dn_node_address(struct file *file,
le16_to_cpu(dnaddr) >> 10,
le16_to_cpu(dnaddr) & 0x3ff);
- result = kernel_write(file, buf, len, 0);
+ result = kernel_write(file, buf, len, &pos);
if (result < 0)
goto out;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5360b7aec57a..752e5daf0896 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4020,11 +4020,17 @@ static int tracing_open(struct inode *inode, struct file *file)
/* If this file was open for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
int cpu = tracing_get_cpu(inode);
+ struct trace_buffer *trace_buf = &tr->trace_buffer;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ if (tr->current_trace->print_max)
+ trace_buf = &tr->max_buffer;
+#endif
if (cpu == RING_BUFFER_ALL_CPUS)
- tracing_reset_online_cpus(&tr->trace_buffer);
+ tracing_reset_online_cpus(trace_buf);
else
- tracing_reset(&tr->trace_buffer, cpu);
+ tracing_reset(trace_buf, cpu);
}
if (file->f_mode & FMODE_READ) {
@@ -5358,6 +5364,13 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (t == tr->current_trace)
goto out;
+ /* Some tracers won't work on kernel command line */
+ if (system_state < SYSTEM_RUNNING && t->noboot) {
+ pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
+ t->name);
+ goto out;
+ }
+
/* Some tracers are only allowed for the top level buffer */
if (!trace_ok_for_array(t, tr)) {
ret = -EINVAL;
@@ -5667,7 +5680,7 @@ static int tracing_wait_pipe(struct file *filp)
*
* iter->pos will be 0 if we haven't read anything.
*/
- if (!tracing_is_on() && iter->pos)
+ if (!tracer_tracing_is_on(iter->tr) && iter->pos)
break;
mutex_unlock(&iter->mutex);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fb5d54d0d1b3..652c682707cd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -444,6 +444,8 @@ struct tracer {
#ifdef CONFIG_TRACER_MAX_TRACE
bool use_max_tr;
#endif
+ /* True if tracer cannot be enabled in kernel param */
+ bool noboot;
};
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 181e139a8057..61e7f0678d33 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -702,7 +702,7 @@ static void append_filter_err(struct filter_parse_state *ps,
int pos = ps->lasterr_pos;
char *buf, *pbuf;
- buf = (char *)__get_free_page(GFP_TEMPORARY);
+ buf = (char *)__get_free_page(GFP_KERNEL);
if (!buf)
return;
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index cd7480d0a201..dca78fc48439 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -282,6 +282,7 @@ static struct tracer mmio_tracer __read_mostly =
.close = mmio_close,
.read = mmio_read,
.print_line = mmio_print_line,
+ .noboot = true,
};
__init static int init_mmio_trace(void)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index a4df67cbc711..49cb41412eec 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -96,23 +96,9 @@ check_stack(unsigned long ip, unsigned long *stack)
if (in_nmi())
return;
- /*
- * There's a slight chance that we are tracing inside the
- * RCU infrastructure, and rcu_irq_enter() will not work
- * as expected.
- */
- if (unlikely(rcu_irq_enter_disabled()))
- return;
-
local_irq_save(flags);
arch_spin_lock(&stack_trace_max_lock);
- /*
- * RCU may not be watching, make it see us.
- * The stack trace code uses rcu_sched.
- */
- rcu_irq_enter();
-
/* In case another CPU set the tracer_frame on us */
if (unlikely(!frame_size))
this_size -= tracer_frame;
@@ -205,7 +191,6 @@ check_stack(unsigned long ip, unsigned long *stack)
}
out:
- rcu_irq_exit();
arch_spin_unlock(&stack_trace_max_lock);
local_irq_restore(flags);
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 9c4eef20301c..696afe72d3b1 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -565,7 +565,7 @@ static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
struct syscall_tp_t {
unsigned long long regs;
unsigned long syscall_nr;
- unsigned long args[sys_data->nb_args];
+ unsigned long args[SYSCALL_DEFINE_MAXARGS];
} param;
int i;