From 1ed0cc5a01a4d868d9907ce96468c4b4c6709556 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Tue, 4 Sep 2018 15:45:41 -0700
Subject: mm: respect arch_dup_mmap() return value

Commit d70f2a14b72a ("include/linux/sched/mm.h: uninline mmdrop_async(),
etc") ignored the return value of arch_dup_mmap(). As a result, on x86,
a failure to duplicate the LDT (e.g. due to memory allocation error)
would leave the duplicated memory mapping in an inconsistent state.

Fix by using the return value, as it was before the change.

Link: http://lkml.kernel.org/r/20180823051229.211856-1-namit@vmware.com
Fixes: d70f2a14b72a4 ("include/linux/sched/mm.h: uninline mmdrop_async(), etc")
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/fork.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index d896e9ca38b0..f0b58479534f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -550,8 +550,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 			goto out;
 	}
 	/* a new mm has just been created */
-	arch_dup_mmap(oldmm, mm);
-	retval = 0;
+	retval = arch_dup_mmap(oldmm, mm);
 out:
 	up_write(&mm->mmap_sem);
 	flush_tlb_mm(oldmm);
-- 
cgit v1.2.3-58-ga151


From f8b7530aa0a1def79c93101216b5b17cf408a70a Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Wed, 5 Sep 2018 11:22:07 +0530
Subject: cpu/hotplug: Adjust misplaced smb() in cpuhp_thread_fun()

The smp_mb() in cpuhp_thread_fun() is misplaced. It needs to be after the
load of st->should_run to prevent reordering of the later load/stores
w.r.t. the load of st->should_run.

Fixes: 4dddfb5faa61 ("smp/hotplug: Rewrite AP state machine core")
Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infraded.org>
Cc: josh@joshtriplett.org
Cc: peterz@infradead.org
Cc: jiangshanlai@gmail.com
Cc: dzickus@redhat.com
Cc: brendan.jackman@arm.com
Cc: malat@debian.org
Cc: mojha@codeaurora.org
Cc: sramana@codeaurora.org
Cc: linux-arm-msm@vger.kernel.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1536126727-11629-1-git-send-email-neeraju@codeaurora.org
---
 kernel/cpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index aa7fe85ad62e..eb4041f78073 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -607,15 +607,15 @@ static void cpuhp_thread_fun(unsigned int cpu)
 	bool bringup = st->bringup;
 	enum cpuhp_state state;
 
+	if (WARN_ON_ONCE(!st->should_run))
+		return;
+
 	/*
 	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
 	 * that if we see ->should_run we also see the rest of the state.
 	 */
 	smp_mb();
 
-	if (WARN_ON_ONCE(!st->should_run))
-		return;
-
 	cpuhp_lock_acquire(bringup);
 
 	if (st->single) {
-- 
cgit v1.2.3-58-ga151


From 69fa6eb7d6a64801ea261025cce9723d9442d773 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 6 Sep 2018 15:21:38 +0200
Subject: cpu/hotplug: Prevent state corruption on error rollback

When a teardown callback fails, the CPU hotplug code brings the CPU back to
the previous state. The previous state becomes the new target state. The
rollback happens in undo_cpu_down() which increments the state
unconditionally even if the state is already the same as the target.

As a consequence the next CPU hotplug operation will start at the wrong
state. This is easily to observe when __cpu_disable() fails.

Prevent the unconditional undo by checking the state vs. target before
incrementing state and fix up the consequently wrong conditional in the
unplug code which handles the failure of the final CPU take down on the
control CPU side.

Fixes: 4dddfb5faa61 ("smp/hotplug: Rewrite AP state machine core")
Reported-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: josh@joshtriplett.org
Cc: peterz@infradead.org
Cc: jiangshanlai@gmail.com
Cc: dzickus@redhat.com
Cc: brendan.jackman@arm.com
Cc: malat@debian.org
Cc: sramana@codeaurora.org
Cc: linux-arm-msm@vger.kernel.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1809051419580.1416@nanos.tec.linutronix.de

----
---
 kernel/cpu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index eb4041f78073..0097acec1c71 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -916,7 +916,8 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 		if (ret) {
 			st->target = prev_state;
-			undo_cpu_down(cpu, st);
+			if (st->state < prev_state)
+				undo_cpu_down(cpu, st);
 			break;
 		}
 	}
@@ -969,7 +970,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 	 * to do the further cleanups.
 	 */
 	ret = cpuhp_down_callbacks(cpu, st, target);
-	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
+	if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
 		cpuhp_reset_state(st, prev_state);
 		__cpuhp_kick_ap(st);
 	}
-- 
cgit v1.2.3-58-ga151


From d1c392c9e2a301f38998a353f467f76414e38725 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 5 Sep 2018 16:29:49 -0400
Subject: printk/tracing: Do not trace printk_nmi_enter()

I hit the following splat in my tests:

------------[ cut here ]------------
IRQs not enabled as expected
WARNING: CPU: 3 PID: 0 at kernel/time/tick-sched.c:982 tick_nohz_idle_enter+0x44/0x8c
Modules linked in: ip6t_REJECT nf_reject_ipv6 ip6table_filter ip6_tables ipv6
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.19.0-rc2-test+ #2
Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
EIP: tick_nohz_idle_enter+0x44/0x8c
Code: ec 05 00 00 00 75 26 83 b8 c0 05 00 00 00 75 1d 80 3d d0 36 3e c1 00
75 14 68 94 63 12 c1 c6 05 d0 36 3e c1 01 e8 04 ee f8 ff <0f> 0b 58 fa bb a0
e5 66 c1 e8 25 0f 04 00 64 03 1d 28 31 52 c1 8b
EAX: 0000001c EBX: f26e7f8c ECX: 00000006 EDX: 00000007
ESI: f26dd1c0 EDI: 00000000 EBP: f26e7f40 ESP: f26e7f38
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010296
CR0: 80050033 CR2: 0813c6b0 CR3: 2f342000 CR4: 001406f0
Call Trace:
 do_idle+0x33/0x202
 cpu_startup_entry+0x61/0x63
 start_secondary+0x18e/0x1ed
 startup_32_smp+0x164/0x168
irq event stamp: 18773830
hardirqs last  enabled at (18773829): [<c040150c>] trace_hardirqs_on_thunk+0xc/0x10
hardirqs last disabled at (18773830): [<c040151c>] trace_hardirqs_off_thunk+0xc/0x10
softirqs last  enabled at (18773824): [<c0ddaa6f>] __do_softirq+0x25f/0x2bf
softirqs last disabled at (18773767): [<c0416bbe>] call_on_stack+0x45/0x4b
---[ end trace b7c64aa79e17954a ]---

After a bit of debugging, I found what was happening. This would trigger
when performing "perf" with a high NMI interrupt rate, while enabling and
disabling function tracer. Ftrace uses breakpoints to convert the nops at
the start of functions to calls to the function trampolines. The breakpoint
traps disable interrupts and this makes calls into lockdep via the
trace_hardirqs_off_thunk in the entry.S code. What happens is the following:

  do_idle {

    [interrupts enabled]

    <interrupt> [interrupts disabled]
	TRACE_IRQS_OFF [lockdep says irqs off]
	[...]
	TRACE_IRQS_IRET
	    test if pt_regs say return to interrupts enabled [yes]
	    TRACE_IRQS_ON [lockdep says irqs are on]

	    <nmi>
		nmi_enter() {
		    printk_nmi_enter() [traced by ftrace]
		    [ hit ftrace breakpoint ]
		    <breakpoint exception>
			TRACE_IRQS_OFF [lockdep says irqs off]
			[...]
			TRACE_IRQS_IRET [return from breakpoint]
			   test if pt_regs say interrupts enabled [no]
			   [iret back to interrupt]
	   [iret back to code]

    tick_nohz_idle_enter() {

	lockdep_assert_irqs_enabled() [lockdep say no!]

Although interrupts are indeed enabled, lockdep thinks it is not, and since
we now do asserts via lockdep, it gives a false warning. The issue here is
that printk_nmi_enter() is called before lockdep_off(), which disables
lockdep (for this reason) in NMIs. By simply not allowing ftrace to see
printk_nmi_enter() (via notrace annotation) we keep lockdep from getting
confused.

Cc: stable@vger.kernel.org
Fixes: 42a0bb3f71383 ("printk/nmi: generic solution for safe printk in NMI")
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/printk/printk_safe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index a0a74c533e4b..0913b4d385de 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -306,12 +306,12 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
 	return printk_safe_log_store(s, fmt, args);
 }
 
-void printk_nmi_enter(void)
+void notrace printk_nmi_enter(void)
 {
 	this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
 }
 
-void printk_nmi_exit(void)
+void notrace printk_nmi_exit(void)
 {
 	this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
 }
-- 
cgit v1.2.3-58-ga151


From e2c631ba75a7e727e8db0a9d30a06bfd434adb3a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 5 Sep 2018 10:41:58 +0200
Subject: clocksource: Revert "Remove kthread"

I turns out that the silly spawn kthread from worker was actually needed.

clocksource_watchdog_kthread() cannot be called directly from
clocksource_watchdog_work(), because clocksource_select() calls
timekeeping_notify() which uses stop_machine(). One cannot use
stop_machine() from a workqueue() due lock inversions wrt CPU hotplug.

Revert the patch but add a comment that explain why we jump through such
apparently silly hoops.

Fixes: 7197e77abcb6 ("clocksource: Remove kthread")
Reported-by: Siegfried Metz <frame@mailbox.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Tested-by: Kevin Shanahan <kevin@shanahan.id.au>
Tested-by: viktor_jaegerskuepper@freenet.de
Tested-by: Siegfried Metz <frame@mailbox.org>
Cc: rafael.j.wysocki@intel.com
Cc: len.brown@intel.com
Cc: diego.viola@gmail.com
Cc: rui.zhang@intel.com
Cc: bjorn.andersson@linaro.org
Link: https://lkml.kernel.org/r/20180905084158.GR24124@hirez.programming.kicks-ass.net
---
 kernel/time/clocksource.c | 40 ++++++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f74fb00d8064..0e6e97a01942 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -133,19 +133,40 @@ static void inline clocksource_watchdog_unlock(unsigned long *flags)
 	spin_unlock_irqrestore(&watchdog_lock, *flags);
 }
 
+static int clocksource_watchdog_kthread(void *data);
+static void __clocksource_change_rating(struct clocksource *cs, int rating);
+
 /*
  * Interval: 0.5sec Threshold: 0.0625s
  */
 #define WATCHDOG_INTERVAL (HZ >> 1)
 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 
+static void clocksource_watchdog_work(struct work_struct *work)
+{
+	/*
+	 * We cannot directly run clocksource_watchdog_kthread() here, because
+	 * clocksource_select() calls timekeeping_notify() which uses
+	 * stop_machine(). One cannot use stop_machine() from a workqueue() due
+	 * lock inversions wrt CPU hotplug.
+	 *
+	 * Also, we only ever run this work once or twice during the lifetime
+	 * of the kernel, so there is no point in creating a more permanent
+	 * kthread for this.
+	 *
+	 * If kthread_run fails the next watchdog scan over the
+	 * watchdog_list will find the unstable clock again.
+	 */
+	kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
+}
+
 static void __clocksource_unstable(struct clocksource *cs)
 {
 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 	cs->flags |= CLOCK_SOURCE_UNSTABLE;
 
 	/*
-	 * If the clocksource is registered clocksource_watchdog_work() will
+	 * If the clocksource is registered clocksource_watchdog_kthread() will
 	 * re-rate and re-select.
 	 */
 	if (list_empty(&cs->list)) {
@@ -156,7 +177,7 @@ static void __clocksource_unstable(struct clocksource *cs)
 	if (cs->mark_unstable)
 		cs->mark_unstable(cs);
 
-	/* kick clocksource_watchdog_work() */
+	/* kick clocksource_watchdog_kthread() */
 	if (finished_booting)
 		schedule_work(&watchdog_work);
 }
@@ -166,7 +187,7 @@ static void __clocksource_unstable(struct clocksource *cs)
  * @cs:		clocksource to be marked unstable
  *
  * This function is called by the x86 TSC code to mark clocksources as unstable;
- * it defers demotion and re-selection to a work.
+ * it defers demotion and re-selection to a kthread.
  */
 void clocksource_mark_unstable(struct clocksource *cs)
 {
@@ -391,9 +412,7 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
 	}
 }
 
-static void __clocksource_change_rating(struct clocksource *cs, int rating);
-
-static int __clocksource_watchdog_work(void)
+static int __clocksource_watchdog_kthread(void)
 {
 	struct clocksource *cs, *tmp;
 	unsigned long flags;
@@ -418,12 +437,13 @@ static int __clocksource_watchdog_work(void)
 	return select;
 }
 
-static void clocksource_watchdog_work(struct work_struct *work)
+static int clocksource_watchdog_kthread(void *data)
 {
 	mutex_lock(&clocksource_mutex);
-	if (__clocksource_watchdog_work())
+	if (__clocksource_watchdog_kthread())
 		clocksource_select();
 	mutex_unlock(&clocksource_mutex);
+	return 0;
 }
 
 static bool clocksource_is_watchdog(struct clocksource *cs)
@@ -442,7 +462,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
 static void clocksource_select_watchdog(bool fallback) { }
 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 static inline void clocksource_resume_watchdog(void) { }
-static inline int __clocksource_watchdog_work(void) { return 0; }
+static inline int __clocksource_watchdog_kthread(void) { return 0; }
 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 void clocksource_mark_unstable(struct clocksource *cs) { }
 
@@ -810,7 +830,7 @@ static int __init clocksource_done_booting(void)
 	/*
 	 * Run the watchdog first to eliminate unstable clock sources
 	 */
-	__clocksource_watchdog_work();
+	__clocksource_watchdog_kthread();
 	clocksource_select();
 	mutex_unlock(&clocksource_mutex);
 	return 0;
-- 
cgit v1.2.3-58-ga151