diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 19:56:15 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 19:56:15 -0800 |
commit | 9465d9cc31fa732089cd8bec9f1bdfcdc174a5ce (patch) | |
tree | fb31a0a6271b255ffe6e29b4f9eb4192253f8c7f /kernel | |
parent | e71c3978d6f97659f6c3ee942c3e581299e4adf2 (diff) | |
parent | c029a2bec66e42e57538cb65e28618baf6a4b311 (diff) |
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner:
"The time/timekeeping/timer folks deliver with this update:
- Fix a reintroduced signed/unsigned issue and cleanup the whole
signed/unsigned mess in the timekeeping core so this wont happen
accidentaly again.
- Add a new trace clock based on boot time
- Prevent injection of random sleep times when PM tracing abuses the
RTC for storage
- Make posix timers configurable for real tiny systems
- Add tracepoints for the alarm timer subsystem so timer based
suspend wakeups can be instrumented
- The usual pile of fixes and updates to core and drivers"
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
timekeeping: Use mul_u64_u32_shr() instead of open coding it
timekeeping: Get rid of pointless typecasts
timekeeping: Make the conversion call chain consistently unsigned
timekeeping_Force_unsigned_clocksource_to_nanoseconds_conversion
alarmtimer: Add tracepoints for alarm timers
trace: Update documentation for mono, mono_raw and boot clock
trace: Add an option for boot clock as trace clock
timekeeping: Add a fast and NMI safe boot clock
timekeeping/clocksource_cyc2ns: Document intended range limitation
timekeeping: Ignore the bogus sleep time if pm_trace is enabled
selftests/timers: Fix spelling mistake "Asyncrhonous" -> "Asynchronous"
clocksource/drivers/bcm2835_timer: Unmap region obtained by of_iomap
clocksource/drivers/arm_arch_timer: Map frame with of_io_request_and_map()
arm64: dts: rockchip: Arch counter doesn't tick in system suspend
clocksource/drivers/arm_arch_timer: Don't assume clock runs in suspend
posix-timers: Make them configurable
posix_cpu_timers: Move the add_device_randomness() call to a proper place
timer: Move sys_alarm from timer.c to itimer.c
ptp_clock: Allow for it to be optional
Kconfig: Regenerate *.c_shipped files after previous changes
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/compat.c | 8 | ||||
-rw-r--r-- | kernel/exit.c | 15 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/signal.c | 6 | ||||
-rw-r--r-- | kernel/sys.c | 3 | ||||
-rw-r--r-- | kernel/time/Makefile | 10 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 59 | ||||
-rw-r--r-- | kernel/time/hrtimer.c | 20 | ||||
-rw-r--r-- | kernel/time/itimer.c | 15 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 4 | ||||
-rw-r--r-- | kernel/time/posix-stubs.c | 123 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 90 | ||||
-rw-r--r-- | kernel/time/timer.c | 48 | ||||
-rw-r--r-- | kernel/trace/trace.c | 1 |
14 files changed, 307 insertions, 97 deletions
diff --git a/kernel/compat.c b/kernel/compat.c index 333d364be29d..b3a047f208a7 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -307,12 +307,17 @@ static inline long put_compat_itimerval(struct compat_itimerval __user *o, __put_user(i->it_value.tv_usec, &o->it_value.tv_usec))); } +asmlinkage long sys_ni_posix_timers(void); + COMPAT_SYSCALL_DEFINE2(getitimer, int, which, struct compat_itimerval __user *, it) { struct itimerval kit; int error; + if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) + return sys_ni_posix_timers(); + error = do_getitimer(which, &kit); if (!error && put_compat_itimerval(it, &kit)) error = -EFAULT; @@ -326,6 +331,9 @@ COMPAT_SYSCALL_DEFINE3(setitimer, int, which, struct itimerval kin, kout; int error; + if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) + return sys_ni_posix_timers(); + if (in) { if (get_compat_itimerval(&kin, in)) return -EFAULT; diff --git a/kernel/exit.c b/kernel/exit.c index 3076f3089919..aacff8e2aec0 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,6 +54,7 @@ #include <linux/writeback.h> #include <linux/shm.h> #include <linux/kcov.h> +#include <linux/random.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -91,11 +92,10 @@ static void __exit_signal(struct task_struct *tsk) lockdep_tasklist_lock_is_held()); spin_lock(&sighand->siglock); +#ifdef CONFIG_POSIX_TIMERS posix_cpu_timers_exit(tsk); if (group_dead) { posix_cpu_timers_exit_group(tsk); - tty = sig->tty; - sig->tty = NULL; } else { /* * This can only happen if the caller is de_thread(). @@ -104,7 +104,13 @@ static void __exit_signal(struct task_struct *tsk) */ if (unlikely(has_group_leader_pid(tsk))) posix_cpu_timers_exit_group(tsk); + } +#endif + if (group_dead) { + tty = sig->tty; + sig->tty = NULL; + } else { /* * If there is any task waiting for the group exit * then notify it: @@ -116,6 +122,9 @@ static void __exit_signal(struct task_struct *tsk) sig->curr_target = next_thread(tsk); } + add_device_randomness((const void*) &tsk->se.sum_exec_runtime, + sizeof(unsigned long long)); + /* * Accumulate here the counters for all threads as they die. We could * skip the group leader because it is the last user of signal_struct, @@ -799,8 +808,10 @@ void __noreturn do_exit(long code) acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { +#ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); +#endif if (tsk->mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); } diff --git a/kernel/fork.c b/kernel/fork.c index 7ffa16033ded..5957cf8b4c4b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1347,8 +1347,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) seqlock_init(&sig->stats_lock); prev_cputime_init(&sig->prev_cputime); +#ifdef CONFIG_POSIX_TIMERS hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; +#endif task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); diff --git a/kernel/signal.c b/kernel/signal.c index 75761acc77cf..29a410780aa9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -427,6 +427,7 @@ void flush_signals(struct task_struct *t) spin_unlock_irqrestore(&t->sighand->siglock, flags); } +#ifdef CONFIG_POSIX_TIMERS static void __flush_itimer_signals(struct sigpending *pending) { sigset_t signal, retain; @@ -460,6 +461,7 @@ void flush_itimer_signals(void) __flush_itimer_signals(&tsk->signal->shared_pending); spin_unlock_irqrestore(&tsk->sighand->siglock, flags); } +#endif void ignore_signals(struct task_struct *t) { @@ -567,6 +569,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, mask, info); +#ifdef CONFIG_POSIX_TIMERS /* * itimer signal ? * @@ -590,6 +593,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) hrtimer_restart(tmr); } } +#endif } recalc_sigpending(); @@ -611,6 +615,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) */ current->jobctl |= JOBCTL_STOP_DEQUEUED; } +#ifdef CONFIG_POSIX_TIMERS if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { /* * Release the siglock to ensure proper locking order @@ -622,6 +627,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) do_schedule_next_timer(info); spin_lock(&tsk->sighand->siglock); } +#endif return signr; } diff --git a/kernel/sys.c b/kernel/sys.c index 89d5be418157..78c9fb7dd680 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1416,7 +1416,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, * applications, so we live with it */ if (!retval && new_rlim && resource == RLIMIT_CPU && - new_rlim->rlim_cur != RLIM_INFINITY) + new_rlim->rlim_cur != RLIM_INFINITY && + IS_ENABLED(CONFIG_POSIX_TIMERS)) update_rlimit_cpu(tsk, new_rlim->rlim_cur); out: read_unlock(&tasklist_lock); diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 49eca0beed32..976840d29a71 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1,6 +1,12 @@ -obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o +obj-y += time.o timer.o hrtimer.o obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o -obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o +obj-y += timeconv.o timecounter.o alarmtimer.o + +ifeq ($(CONFIG_POSIX_TIMERS),y) + obj-y += posix-timers.o posix-cpu-timers.o posix-clock.o itimer.o +else + obj-y += posix-stubs.o +endif obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o tick-common.o ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 12dd190634ab..9b08ca391aed 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -26,6 +26,9 @@ #include <linux/workqueue.h> #include <linux/freezer.h> +#define CREATE_TRACE_POINTS +#include <trace/events/alarmtimer.h> + /** * struct alarm_base - Alarm timer bases * @lock: Lock for syncrhonized access to the base @@ -40,7 +43,9 @@ static struct alarm_base { clockid_t base_clockid; } alarm_bases[ALARM_NUMTYPE]; -/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */ +/* freezer information to handle clock_nanosleep triggered wakeups */ +static enum alarmtimer_type freezer_alarmtype; +static ktime_t freezer_expires; static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); @@ -194,6 +199,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } spin_unlock_irqrestore(&base->lock, flags); + trace_alarmtimer_fired(alarm, base->gettime()); return ret; } @@ -218,15 +224,16 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining); */ static int alarmtimer_suspend(struct device *dev) { - struct rtc_time tm; - ktime_t min, now; - unsigned long flags; + ktime_t min, now, expires; + int i, ret, type; struct rtc_device *rtc; - int i; - int ret; + unsigned long flags; + struct rtc_time tm; spin_lock_irqsave(&freezer_delta_lock, flags); min = freezer_delta; + expires = freezer_expires; + type = freezer_alarmtype; freezer_delta = ktime_set(0, 0); spin_unlock_irqrestore(&freezer_delta_lock, flags); @@ -247,8 +254,11 @@ static int alarmtimer_suspend(struct device *dev) if (!next) continue; delta = ktime_sub(next->expires, base->gettime()); - if (!min.tv64 || (delta.tv64 < min.tv64)) + if (!min.tv64 || (delta.tv64 < min.tv64)) { + expires = next->expires; min = delta; + type = i; + } } if (min.tv64 == 0) return 0; @@ -258,6 +268,8 @@ static int alarmtimer_suspend(struct device *dev) return -EBUSY; } + trace_alarmtimer_suspend(expires, type); + /* Setup an rtc timer to fire that far in the future */ rtc_timer_cancel(rtc, &rtctimer); rtc_read_time(rtc, &tm); @@ -295,15 +307,32 @@ static int alarmtimer_resume(struct device *dev) static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) { - ktime_t delta; + struct alarm_base *base; unsigned long flags; - struct alarm_base *base = &alarm_bases[type]; + ktime_t delta; + + switch(type) { + case ALARM_REALTIME: + base = &alarm_bases[ALARM_REALTIME]; + type = ALARM_REALTIME_FREEZER; + break; + case ALARM_BOOTTIME: + base = &alarm_bases[ALARM_BOOTTIME]; + type = ALARM_BOOTTIME_FREEZER; + break; + default: + WARN_ONCE(1, "Invalid alarm type: %d\n", type); + return; + } delta = ktime_sub(absexp, base->gettime()); spin_lock_irqsave(&freezer_delta_lock, flags); - if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64)) + if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64)) { freezer_delta = delta; + freezer_expires = absexp; + freezer_alarmtype = type; + } spin_unlock_irqrestore(&freezer_delta_lock, flags); } @@ -342,6 +371,8 @@ void alarm_start(struct alarm *alarm, ktime_t start) alarmtimer_enqueue(base, alarm); hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS); spin_unlock_irqrestore(&base->lock, flags); + + trace_alarmtimer_start(alarm, base->gettime()); } EXPORT_SYMBOL_GPL(alarm_start); @@ -390,6 +421,8 @@ int alarm_try_to_cancel(struct alarm *alarm) if (ret >= 0) alarmtimer_dequeue(base, alarm); spin_unlock_irqrestore(&base->lock, flags); + + trace_alarmtimer_cancel(alarm, base->gettime()); return ret; } EXPORT_SYMBOL_GPL(alarm_try_to_cancel); @@ -846,8 +879,10 @@ static int __init alarmtimer_init(void) alarmtimer_rtc_timer_init(); - posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); - posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); + if (IS_ENABLED(CONFIG_POSIX_TIMERS)) { + posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); + posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); + } /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index bb5ec425dfe0..08be5c99d26b 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1742,15 +1742,19 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to - * pass before the routine returns. + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * - * Returns 0 when the timer has expired otherwise -EINTR + * Returns 0 when the timer has expired. If the task was woken before the + * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or + * by an explicit wakeup, it returns -EINTR. */ int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode) @@ -1772,15 +1776,19 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to - * pass before the routine returns. + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * - * Returns 0 when the timer has expired otherwise -EINTR + * Returns 0 when the timer has expired. If the task was woken before the + * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or + * by an explicit wakeup, it returns -EINTR. */ int __sched schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode) diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 1d5c7204ddc9..2b9f45bc955d 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -238,6 +238,8 @@ again: return 0; } +#ifdef __ARCH_WANT_SYS_ALARM + /** * alarm_setitimer - set alarm in seconds * @@ -250,7 +252,7 @@ again: * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid * negative timeval settings which would cause immediate expiry. */ -unsigned int alarm_setitimer(unsigned int seconds) +static unsigned int alarm_setitimer(unsigned int seconds) { struct itimerval it_new, it_old; @@ -275,6 +277,17 @@ unsigned int alarm_setitimer(unsigned int seconds) return it_old.it_value.tv_sec; } +/* + * For backwards compatibility? This can be done in libc so Alpha + * and all newer ports shouldn't need it. + */ +SYSCALL_DEFINE1(alarm, unsigned int, seconds) +{ + return alarm_setitimer(seconds); +} + +#endif + SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, struct itimerval __user *, ovalue) { diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index e887ffc8eef3..f246763c9947 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -9,7 +9,6 @@ #include <asm/uaccess.h> #include <linux/kernel_stat.h> #include <trace/events/timer.h> -#include <linux/random.h> #include <linux/tick.h> #include <linux/workqueue.h> @@ -447,10 +446,7 @@ static void cleanup_timers(struct list_head *head) */ void posix_cpu_timers_exit(struct task_struct *tsk) { - add_device_randomness((const void*) &tsk->se.sum_exec_runtime, - sizeof(unsigned long long)); cleanup_timers(tsk->cpu_timers); - } void posix_cpu_timers_exit_group(struct task_struct *tsk) { diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c new file mode 100644 index 000000000000..cd6716e115e8 --- /dev/null +++ b/kernel/time/posix-stubs.c @@ -0,0 +1,123 @@ +/* + * Dummy stubs used when CONFIG_POSIX_TIMERS=n + * + * Created by: Nicolas Pitre, July 2016 + * Copyright: (C) 2016 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/syscalls.h> +#include <linux/ktime.h> +#include <linux/timekeeping.h> +#include <linux/posix-timers.h> + +asmlinkage long sys_ni_posix_timers(void) +{ + pr_err_once("process %d (%s) attempted a POSIX timer syscall " + "while CONFIG_POSIX_TIMERS is not set\n", + current->pid, current->comm); + return -ENOSYS; +} + +#define SYS_NI(name) SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers) + +SYS_NI(timer_create); +SYS_NI(timer_gettime); +SYS_NI(timer_getoverrun); +SYS_NI(timer_settime); +SYS_NI(timer_delete); +SYS_NI(clock_adjtime); +SYS_NI(getitimer); +SYS_NI(setitimer); +#ifdef __ARCH_WANT_SYS_ALARM +SYS_NI(alarm); +#endif + +/* + * We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC + * as it is easy to remain compatible with little code. CLOCK_BOOTTIME + * is also included for convenience as at least systemd uses it. + */ + +SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, + const struct timespec __user *, tp) +{ + struct timespec new_tp; + + if (which_clock != CLOCK_REALTIME) + return -EINVAL; + if (copy_from_user(&new_tp, tp, sizeof (*tp))) + return -EFAULT; + return do_sys_settimeofday(&new_tp, NULL); +} + +SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, + struct timespec __user *,tp) +{ + struct timespec kernel_tp; + + switch (which_clock) { + case CLOCK_REALTIME: ktime_get_real_ts(&kernel_tp); break; + case CLOCK_MONOTONIC: ktime_get_ts(&kernel_tp); break; + case CLOCK_BOOTTIME: get_monotonic_boottime(&kernel_tp); break; + default: return -EINVAL; + } + if (copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) + return -EFAULT; + return 0; +} + +SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp) +{ + struct timespec rtn_tp = { + .tv_sec = 0, + .tv_nsec = hrtimer_resolution, + }; + + switch (which_clock) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + case CLOCK_BOOTTIME: + if (copy_to_user(tp, &rtn_tp, sizeof(rtn_tp))) + return -EFAULT; + return 0; + default: + return -EINVAL; + } +} + +SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, + const struct timespec __user *, rqtp, + struct timespec __user *, rmtp) +{ + struct timespec t; + + switch (which_clock) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + case CLOCK_BOOTTIME: + if (copy_from_user(&t, rqtp, sizeof (struct timespec))) + return -EFAULT; + if (!timespec_valid(&t)) + return -EINVAL; + return hrtimer_nanosleep(&t, rmtp, flags & TIMER_ABSTIME ? + HRTIMER_MODE_ABS : HRTIMER_MODE_REL, + which_clock); + default: + return -EINVAL; + } +} + +#ifdef CONFIG_COMPAT +long clock_nanosleep_restart(struct restart_block *restart_block) +{ + return hrtimer_nanosleep_restart(restart_block); +} +#endif diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 37dec7e3db43..da233cdf89b0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -258,10 +258,9 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) tk->cycle_interval = interval; /* Go back from cycles -> shifted ns */ - tk->xtime_interval = (u64) interval * clock->mult; + tk->xtime_interval = interval * clock->mult; tk->xtime_remainder = ntpinterval - tk->xtime_interval; - tk->raw_interval = - ((u64) interval * clock->mult) >> clock->shift; + tk->raw_interval = (interval * clock->mult) >> clock->shift; /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { @@ -299,10 +298,10 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset; static inline u32 arch_gettimeoffset(void) { return 0; } #endif -static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr, +static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, cycle_t delta) { - s64 nsec; + u64 nsec; nsec = delta * tkr->mult + tkr->xtime_nsec; nsec >>= tkr->shift; @@ -311,7 +310,7 @@ static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr, return nsec + arch_gettimeoffset(); } -static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) +static inline u64 timekeeping_get_ns(struct tk_read_base *tkr) { cycle_t delta; @@ -319,8 +318,8 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) return timekeeping_delta_to_ns(tkr, delta); } -static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, - cycle_t cycles) +static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, + cycle_t cycles) { cycle_t delta; @@ -425,6 +424,35 @@ u64 ktime_get_raw_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); +/** + * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. + * + * To keep it NMI safe since we're accessing from tracing, we're not using a + * separate timekeeper with updates to monotonic clock and boot offset + * protected with seqlocks. This has the following minor side effects: + * + * (1) Its possible that a timestamp be taken after the boot offset is updated + * but before the timekeeper is updated. If this happens, the new boot offset + * is added to the old timekeeping making the clock appear to update slightly + * earlier: + * CPU 0 CPU 1 + * timekeeping_inject_sleeptime64() + * __timekeeping_inject_sleeptime(tk, delta); + * timestamp(); + * timekeeping_update(tk, TK_CLEAR_NTP...); + * + * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be + * partially updated. Since the tk->offs_boot update is a rare event, this + * should be a rare occurrence which postprocessing should be able to handle. + */ +u64 notrace ktime_get_boot_fast_ns(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); +} +EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); + /* Suspend-time cycles value for halted fast timekeeper. */ static cycle_t cycles_at_suspend; @@ -623,7 +651,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) { struct clocksource *clock = tk->tkr_mono.clock; cycle_t cycle_now, delta; - s64 nsec; + u64 nsec; cycle_now = tk->tkr_mono.read(clock); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); @@ -652,7 +680,7 @@ int __getnstimeofday64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long seq; - s64 nsecs = 0; + u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); @@ -692,7 +720,7 @@ ktime_t ktime_get(void) struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base; - s64 nsecs; + u64 nsecs; WARN_ON(timekeeping_suspended); @@ -735,7 +763,7 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs) struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base, *offset = offsets[offs]; - s64 nsecs; + u64 nsecs; WARN_ON(timekeeping_suspended); @@ -779,7 +807,7 @@ ktime_t ktime_get_raw(void) struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base; - s64 nsecs; + u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); @@ -804,8 +832,8 @@ void ktime_get_ts64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 tomono; - s64 nsec; unsigned int seq; + u64 nsec; WARN_ON(timekeeping_suspended); @@ -893,8 +921,8 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) unsigned long seq; ktime_t base_raw; ktime_t base_real; - s64 nsec_raw; - s64 nsec_real; + u64 nsec_raw; + u64 nsec_real; cycle_t now; WARN_ON_ONCE(timekeeping_suspended); @@ -1052,7 +1080,7 @@ int get_device_system_crosststamp(int (*get_time_fn) cycle_t cycles, now, interval_start; unsigned int clock_was_set_seq = 0; ktime_t base_real, base_raw; - s64 nsec_real, nsec_raw; + u64 nsec_real, nsec_raw; u8 cs_was_changed_seq; unsigned long seq; bool do_interp; @@ -1365,7 +1393,7 @@ void getrawmonotonic64(struct timespec64 *ts) struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 ts64; unsigned long seq; - s64 nsecs; + u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); @@ -1616,7 +1644,7 @@ void timekeeping_resume(void) struct clocksource *clock = tk->tkr_mono.clock; unsigned long flags; struct timespec64 ts_new, ts_delta; - cycle_t cycle_now, cycle_delta; + cycle_t cycle_now; sleeptime_injected = false; read_persistent_clock64(&ts_new); @@ -1642,27 +1670,11 @@ void timekeeping_resume(void) cycle_now = tk->tkr_mono.read(clock); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && cycle_now > tk->tkr_mono.cycle_last) { - u64 num, max = ULLONG_MAX; - u32 mult = clock->mult; - u32 shift = clock->shift; - s64 nsec = 0; - - cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, - tk->tkr_mono.mask); - - /* - * "cycle_delta * mutl" may cause 64 bits overflow, if the - * suspended time is too long. In that case we need do the - * 64 bits math carefully - */ - do_div(max, mult); - if (cycle_delta > max) { - num = div64_u64(cycle_delta, max); - nsec = (((u64) max * mult) >> shift) * num; - cycle_delta -= num * max; - } - nsec += ((u64) cycle_delta * mult) >> shift; + u64 nsec, cyc_delta; + cyc_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, + tk->tkr_mono.mask); + nsec = mul_u64_u32_shr(cyc_delta, clock->mult, clock->shift); ts_delta = ns_to_timespec64(nsec); sleeptime_injected = true; } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { diff --git a/kernel/time/timer.c b/kernel/time/timer.c index c611c47de884..ea4fbf8477a9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1615,7 +1615,8 @@ void update_process_times(int user_tick) irq_work_tick(); #endif scheduler_tick(); - run_posix_cpu_timers(p); + if (IS_ENABLED(CONFIG_POSIX_TIMERS)) + run_posix_cpu_timers(p); } /** @@ -1676,19 +1677,6 @@ void run_local_timers(void) raise_softirq(TIMER_SOFTIRQ); } -#ifdef __ARCH_WANT_SYS_ALARM - -/* - * For backwards compatibility? This can be done in libc so Alpha - * and all newer ports shouldn't need it. - */ -SYSCALL_DEFINE1(alarm, unsigned int, seconds) -{ - return alarm_setitimer(seconds); -} - -#endif - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); @@ -1705,11 +1693,12 @@ static void process_timeout(unsigned long __data) * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to - * pass before the routine returns. The routine will return 0 + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process())". * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. In this case the remaining time - * in jiffies will be returned, or 0 if the timer expired in time + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. @@ -1718,7 +1707,9 @@ static void process_timeout(unsigned long __data) * the CPU away without a bound on the timeout. In this case the return * value will be %MAX_SCHEDULE_TIMEOUT. * - * In all cases the return value is guaranteed to be non-negative. + * Returns 0 when the timer has expired otherwise the remaining time in + * jiffies will be returned. In all cases the return value is guaranteed + * to be non-negative. */ signed long __sched schedule_timeout(signed long timeout) { @@ -1910,16 +1901,6 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); -static void __sched do_usleep_range(unsigned long min, unsigned long max) -{ - ktime_t kmin; - u64 delta; - - kmin = ktime_set(0, min * NSEC_PER_USEC); - delta = (u64)(max - min) * NSEC_PER_USEC; - schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); -} - /** * usleep_range - Sleep for an approximate time * @min: Minimum time in usecs to sleep @@ -1933,7 +1914,14 @@ static void __sched do_usleep_range(unsigned long min, unsigned long max) */ void __sched usleep_range(unsigned long min, unsigned long max) { - __set_current_state(TASK_UNINTERRUPTIBLE); - do_usleep_range(min, max); + ktime_t exp = ktime_add_us(ktime_get(), min); + u64 delta = (u64)(max - min) * NSEC_PER_USEC; + + for (;;) { + __set_current_state(TASK_UNINTERRUPTIBLE); + /* Do not return before the requested sleep time has elapsed */ + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) + break; + } } EXPORT_SYMBOL(usleep_range); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 465d56febc5b..54d5270a5042 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1125,6 +1125,7 @@ static struct { { trace_clock, "perf", 1 }, { ktime_get_mono_fast_ns, "mono", 1 }, { ktime_get_raw_fast_ns, "mono_raw", 1 }, + { ktime_get_boot_fast_ns, "boot", 1 }, ARCH_TRACE_CLOCKS }; |