diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/vmware.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 65 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 1 |
9 files changed, 70 insertions, 53 deletions
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bca0bd8f4846..85168740f76a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -86,7 +86,7 @@ void update_spec_ctrl_cond(u64 val) wrmsrl(MSR_IA32_SPEC_CTRL, val); } -u64 spec_ctrl_current(void) +noinstr u64 spec_ctrl_current(void) { return this_cpu_read(x86_spec_ctrl_current); } diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 02039ec3597d..11f83d07925e 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -143,7 +143,7 @@ static __init int parse_no_stealacc(char *arg) } early_param("no-steal-acc", parse_no_stealacc); -static unsigned long long notrace vmware_sched_clock(void) +static noinstr u64 vmware_sched_clock(void) { unsigned long long ns; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9baa89a8877d..dccce58201b7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -853,12 +853,12 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) * Initialize register state that may prevent from entering low-power idle. * This function will be invoked from the cpuidle driver only when needed. */ -void fpu_idle_fpregs(void) +noinstr void fpu_idle_fpregs(void) { /* Note: AMX_TILE being enabled implies XGETBV1 support */ if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) && (xfeatures_in_use() & XFEATURE_MASK_XTILE)) { tile_release(); - fpregs_deactivate(¤t->thread.fpu); + __this_cpu_write(fpu_fpregs_owner_ctx, NULL); } } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 16333ba1904b..0f35d44c56fe 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -71,12 +71,12 @@ static int kvm_set_wallclock(const struct timespec64 *now) return -ENODEV; } -static u64 kvm_clock_read(void) +static noinstr u64 kvm_clock_read(void) { u64 ret; preempt_disable_notrace(); - ret = pvclock_clocksource_read(this_cpu_pvti()); + ret = pvclock_clocksource_read_nowd(this_cpu_pvti()); preempt_enable_notrace(); return ret; } @@ -86,7 +86,7 @@ static u64 kvm_clock_get_cycles(struct clocksource *cs) return kvm_clock_read(); } -static u64 kvm_sched_clock_read(void) +static noinstr u64 kvm_sched_clock_read(void) { return kvm_clock_read() - kvm_sched_clock_offset; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 327757afb027..5bf4f0b2f35d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -216,6 +216,11 @@ static noinstr void pv_native_set_debugreg(int regno, unsigned long val) native_set_debugreg(regno, val); } +noinstr void pv_native_wbinvd(void) +{ + native_wbinvd(); +} + static noinstr void pv_native_irq_enable(void) { native_irq_enable(); @@ -225,6 +230,11 @@ static noinstr void pv_native_irq_disable(void) { native_irq_disable(); } + +static noinstr void pv_native_safe_halt(void) +{ + native_safe_halt(); +} #endif enum paravirt_lazy_mode paravirt_get_lazy_mode(void) @@ -256,7 +266,7 @@ struct paravirt_patch_template pv_ops = { .cpu.read_cr0 = native_read_cr0, .cpu.write_cr0 = native_write_cr0, .cpu.write_cr4 = native_write_cr4, - .cpu.wbinvd = native_wbinvd, + .cpu.wbinvd = pv_native_wbinvd, .cpu.read_msr = native_read_msr, .cpu.write_msr = native_write_msr, .cpu.read_msr_safe = native_read_msr_safe, @@ -290,7 +300,7 @@ struct paravirt_patch_template pv_ops = { .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), - .irq.safe_halt = native_safe_halt, + .irq.safe_halt = pv_native_safe_halt, .irq.halt = native_halt, #endif /* CONFIG_PARAVIRT_XXL */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 40d156a31676..e57cd31bfec4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -24,6 +24,7 @@ #include <linux/cpuidle.h> #include <linux/acpi.h> #include <linux/elf-randomize.h> +#include <linux/static_call.h> #include <trace/events/power.h> #include <linux/hw_breakpoint.h> #include <asm/cpu.h> @@ -694,7 +695,24 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -static void (*x86_idle)(void); +/* + * We use this if we don't have any better idle routine.. + */ +void __cpuidle default_idle(void) +{ + raw_safe_halt(); + raw_local_irq_disable(); +} +#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) +EXPORT_SYMBOL(default_idle); +#endif + +DEFINE_STATIC_CALL_NULL(x86_idle, default_idle); + +static bool x86_idle_set(void) +{ + return !!static_call_query(x86_idle); +} #ifndef CONFIG_SMP static inline void play_dead(void) @@ -717,28 +735,17 @@ void arch_cpu_idle_dead(void) /* * Called from the generic idle code. */ -void arch_cpu_idle(void) -{ - x86_idle(); -} - -/* - * We use this if we don't have any better idle routine.. - */ -void __cpuidle default_idle(void) +void __cpuidle arch_cpu_idle(void) { - raw_safe_halt(); + static_call(x86_idle)(); } -#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) -EXPORT_SYMBOL(default_idle); -#endif #ifdef CONFIG_XEN bool xen_set_default_idle(void) { - bool ret = !!x86_idle; + bool ret = x86_idle_set(); - x86_idle = default_idle; + static_call_update(x86_idle, default_idle); return ret; } @@ -800,13 +807,7 @@ static void amd_e400_idle(void) default_idle(); - /* - * The switch back from broadcast mode needs to be called with - * interrupts disabled. - */ - raw_local_irq_disable(); tick_broadcast_exit(); - raw_local_irq_enable(); } /* @@ -864,12 +865,10 @@ static __cpuidle void mwait_idle(void) } __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) + if (!need_resched()) { __sti_mwait(0, 0); - else - raw_local_irq_enable(); - } else { - raw_local_irq_enable(); + raw_local_irq_disable(); + } } __current_clr_polling(); } @@ -880,20 +879,20 @@ void select_idle_routine(const struct cpuinfo_x86 *c) if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); #endif - if (x86_idle || boot_option_idle_override == IDLE_POLL) + if (x86_idle_set() || boot_option_idle_override == IDLE_POLL) return; if (boot_cpu_has_bug(X86_BUG_AMD_E400)) { pr_info("using AMD E400 aware idle routine\n"); - x86_idle = amd_e400_idle; + static_call_update(x86_idle, amd_e400_idle); } else if (prefer_mwait_c1_over_halt(c)) { pr_info("using mwait in idle threads\n"); - x86_idle = mwait_idle; + static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - x86_idle = tdx_safe_halt; + static_call_update(x86_idle, tdx_safe_halt); } else - x86_idle = default_idle; + static_call_update(x86_idle, default_idle); } void amd_e400_c1e_apic_setup(void) @@ -946,7 +945,7 @@ static int __init idle_setup(char *str) * To continue to load the CPU idle driver, don't touch * the boot_option_idle_override. */ - x86_idle = default_idle; + static_call_update(x86_idle, default_idle); boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { /* diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index eda37df016f0..56acf53a782a 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -64,7 +64,8 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) return flags & valid_flags; } -u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) +static __always_inline +u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd) { unsigned version; u64 ret; @@ -77,7 +78,7 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) flags = src->flags; } while (pvclock_read_retry(src, version)); - if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { + if (dowd && unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { src->flags &= ~PVCLOCK_GUEST_STOPPED; pvclock_touch_watchdogs(); } @@ -100,16 +101,25 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) * updating at the same time, and one of them could be slightly behind, * making the assumption that last_value always go forward fail to hold. */ - last = atomic64_read(&last_value); + last = arch_atomic64_read(&last_value); do { - if (ret < last) + if (ret <= last) return last; - last = atomic64_cmpxchg(&last_value, last, ret); - } while (unlikely(last != ret)); + } while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret)); return ret; } +u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) +{ + return __pvclock_clocksource_read(src, true); +} + +noinstr u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src) +{ + return __pvclock_clocksource_read(src, false); +} + void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, struct pvclock_vcpu_time_info *vcpu_time, struct timespec64 *ts) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a78e73da4a74..8c33936b017d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -215,7 +215,7 @@ static void __init cyc2ns_init_secondary_cpus(void) /* * Scheduler clock - returns current time in nanosec units. */ -u64 native_sched_clock(void) +noinstr u64 native_sched_clock(void) { if (static_branch_likely(&__use_tsc)) { u64 tsc_now = rdtsc(); @@ -248,7 +248,7 @@ u64 native_sched_clock_from_tsc(u64 tsc) /* We need to define a real function for sched_clock, to override the weak default version */ #ifdef CONFIG_PARAVIRT -unsigned long long sched_clock(void) +noinstr u64 sched_clock(void) { return paravirt_sched_clock(); } @@ -258,8 +258,7 @@ bool using_native_sched_clock(void) return static_call_query(pv_sched_clock) == native_sched_clock; } #else -unsigned long long -sched_clock(void) __attribute__((alias("native_sched_clock"))); +u64 sched_clock(void) __attribute__((alias("native_sched_clock"))); bool using_native_sched_clock(void) { return true; } #endif diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 2e0ee14229bf..25f155205770 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -129,7 +129,6 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT SOFTIRQENTRY_TEXT |