8 files changed, 92 insertions, 36 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d6709eb70970..0d302a87f21b 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/file.h>
 #include <linux/kernel.h>
 #include <linux/audit.h>
 #include <linux/kthread.h>
@@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
 	unsigned long ino;
 	dev_t dev;
 
-	rcu_read_lock();
-	exe_file = rcu_dereference(tsk->mm->exe_file);
+	exe_file = get_task_exe_file(tsk);
+	if (!exe_file)
+		return 0;
 	ino = exe_file->f_inode->i_ino;
 	dev = exe_file->f_inode->i_sb->s_dev;
-	rcu_read_unlock();
+	fput(exe_file);
 	return audit_mark_compare(mark, ino, dev);
 }
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index c2de56ab0fce..7fa0c4ae6394 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -1,4 +1,12 @@
+# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_KERNEL_GZIP is not set
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_KERNEL_XZ=y
+# CONFIG_KERNEL_LZO is not set
+# CONFIG_KERNEL_LZ4 is not set
 CONFIG_OPTIMIZE_INLINING=y
+# CONFIG_SLAB is not set
+# CONFIG_SLUB is not set
 CONFIG_SLOB=y
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c7fd2778ed50..c27e53326bef 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2069,6 +2069,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
 	mutex_unlock(&cpuset_mutex);
 }
 
+/*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+ * state from the parent and before it sits on the cgroup's task list.
+ */
+void cpuset_fork(struct task_struct *task)
+{
+	if (task_css_is_root(task, cpuset_cgrp_id))
+		return;
+
+	set_cpus_allowed_ptr(task, &current->cpus_allowed);
+	task->mems_allowed = current->mems_allowed;
+}
+
 struct cgroup_subsys cpuset_cgrp_subsys = {
 	.css_alloc	= cpuset_css_alloc,
 	.css_online	= cpuset_css_online,
@@ -2079,6 +2093,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
 	.attach		= cpuset_attach,
 	.post_attach	= cpuset_post_attach,
 	.bind		= cpuset_bind,
+	.fork		= cpuset_fork,
 	.legacy_cftypes	= files,
 	.early_init	= true,
 };
diff --git a/kernel/exit.c b/kernel/exit.c
index 2f974ae042a6..091a78be3b09 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -848,12 +848,7 @@ void do_exit(long code)
 	TASKS_RCU(preempt_enable());
 	exit_notify(tsk, group_dead);
 	proc_exit_connector(tsk);
-#ifdef CONFIG_NUMA
-	task_lock(tsk);
-	mpol_put(tsk->mempolicy);
-	tsk->mempolicy = NULL;
-	task_unlock(tsk);
-#endif
+	mpol_put_task_policy(tsk);
 #ifdef CONFIG_FUTEX
 	if (unlikely(current->pi_state_cache))
 		kfree(current->pi_state_cache);
diff --git a/kernel/fork.c b/kernel/fork.c
index 52e725d4a866..beb31725f7e2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -799,6 +799,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
 EXPORT_SYMBOL(get_mm_exe_file);
 
 /**
+ * get_task_exe_file - acquire a reference to the task's executable file
+ *
+ * Returns %NULL if task's mm (if any) has no associated executable file or
+ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+ * User must release file via fput().
+ */
+struct file *get_task_exe_file(struct task_struct *task)
+{
+	struct file *exe_file = NULL;
+	struct mm_struct *mm;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm) {
+		if (!(task->flags & PF_KTHREAD))
+			exe_file = get_mm_exe_file(mm);
+	}
+	task_unlock(task);
+	return exe_file;
+}
+EXPORT_SYMBOL(get_task_exe_file);
+
+/**
  * get_task_mm - acquire a reference to the task's mm
  *
  * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
@@ -913,14 +936,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 	deactivate_mm(tsk, mm);
 
 	/*
-	 * If we're exiting normally, clear a user-space tid field if
-	 * requested.  We leave this alone when dying by signal, to leave
-	 * the value intact in a core dump, and to save the unnecessary
-	 * trouble, say, a killed vfork parent shouldn't touch this mm.
-	 * Userland only wants this done for a sys_exit.
+	 * Signal userspace if we're not exiting with a core dump
+	 * because we want to leave the value intact for debugging
+	 * purposes.
 	 */
 	if (tsk->clear_child_tid) {
-		if (!(tsk->flags & PF_SIGNALED) &&
+		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
 		    atomic_read(&mm->mm_users) > 1) {
 			/*
 			 * We don't check the error code - if userspace has
@@ -1404,7 +1425,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->real_start_time = ktime_get_boot_ns();
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	threadgroup_change_begin(current);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
@@ -1556,6 +1576,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	INIT_LIST_HEAD(&p->thread_group);
 	p->task_works = NULL;
 
+	threadgroup_change_begin(current);
 	/*
 	 * Ensure that the cgroup subsystem policies allow the new process to be
 	 * forked. It should be noted the the new process's css_set can be changed
@@ -1656,6 +1677,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 bad_fork_cancel_cgroup:
 	cgroup_cancel_fork(p);
 bad_fork_free_pid:
+	threadgroup_change_end(current);
 	if (pid != &init_struct_pid)
 		free_pid(pid);
 bad_fork_cleanup_thread:
@@ -1688,7 +1710,6 @@ bad_fork_cleanup_policy:
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_threadgroup_lock:
 #endif
-	threadgroup_change_end(current);
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 503bc2d348e5..037c321c5618 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
 	return 0;
 out:
 	vfree(pi->sechdrs);
+	pi->sechdrs = NULL;
+
 	vfree(pi->purgatory_buf);
+	pi->purgatory_buf = NULL;
 	return ret;
 }
 
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
index b69eb8a2876f..16bab471c7e2 100644
--- a/kernel/printk/nmi.c
+++ b/kernel/printk/nmi.c
@@ -99,27 +99,33 @@ again:
 	return add;
 }
 
-/*
- * printk one line from the temporary buffer from @start index until
- * and including the @end index.
- */
-static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end)
+static void printk_nmi_flush_line(const char *text, int len)
 {
-	const char *buf = s->buffer + start;
-
 	/*
 	 * The buffers are flushed in NMI only on panic.  The messages must
 	 * go only into the ring buffer at this stage.  Consoles will get
 	 * explicitly called later when a crashdump is not generated.
 	 */
 	if (in_nmi())
-		printk_deferred("%.*s", (end - start) + 1, buf);
+		printk_deferred("%.*s", len, text);
 	else
-		printk("%.*s", (end - start) + 1, buf);
+		printk("%.*s", len, text);
 
 }
 
 /*
+ * printk one line from the temporary buffer from @start index until
+ * and including the @end index.
+ */
+static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s,
+					int start, int end)
+{
+	const char *buf = s->buffer + start;
+
+	printk_nmi_flush_line(buf, (end - start) + 1);
+}
+
+/*
  * Flush data from the associated per_CPU buffer. The function
  * can be called either via IRQ work or independently.
  */
@@ -150,9 +156,11 @@ more:
 	 * the buffer an unexpected way. If we printed something then
 	 * @len must only increase.
 	 */
-	if (i && i >= len)
-		pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n",
-		       i, len);
+	if (i && i >= len) {
+		const char *msg = "printk_nmi_flush: internal error\n";
+
+		printk_nmi_flush_line(msg, strlen(msg));
+	}
 
 	if (!len)
 		goto out; /* Someone else has already flushed the buffer. */
@@ -166,14 +174,14 @@ more:
 	/* Print line by line. */
 	for (; i < size; i++) {
 		if (s->buffer[i] == '\n') {
-			print_nmi_seq_line(s, last_i, i);
+			printk_nmi_flush_seq_line(s, last_i, i);
 			last_i = i + 1;
 		}
 	}
 	/* Check if there was a partial line. */
 	if (last_i < size) {
-		print_nmi_seq_line(s, last_i, size - 1);
-		pr_cont("\n");
+		printk_nmi_flush_seq_line(s, last_i, size - 1);
+		printk_nmi_flush_line("\n", strlen("\n"));
 	}
 
 	/*
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ef6c6c3f9d8a..0db7c8a2afe2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		ptrace_event(PTRACE_EVENT_SECCOMP, data);
 		/*
 		 * The delivery of a fatal signal during event
-		 * notification may silently skip tracer notification.
-		 * Terminating the task now avoids executing a system
-		 * call that may not be intended.
+		 * notification may silently skip tracer notification,
+		 * which could leave us with a potentially unmodified
+		 * syscall that the tracer would have liked to have
+		 * changed. Since the process is about to die, we just
+		 * force the syscall to be skipped and let the signal
+		 * kill the process and correctly handle any tracer exit
+		 * notifications.
 		 */
 		if (fatal_signal_pending(current))
-			do_exit(SIGSYS);
+			goto skip;
 		/* Check if the tracer forced the syscall to be skipped. */
 		this_syscall = syscall_get_nr(current, task_pt_regs(current));
 		if (this_syscall < 0)