diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-13 17:56:58 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-13 17:56:58 -0800 |
commit | 2bcc673101268dc50e52b83226c5bbf38391e16d (patch) | |
tree | 0cdaf6affa8b05d436c2e8b80ff23e8c7f03a30a /kernel | |
parent | 670310dfbae0eefe7318ff6a61e29e67a7a7bbce (diff) | |
parent | b24591e2fcf852ad7ad2ccf745c8220bf378d312 (diff) |
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner:
"Yet another big pile of changes:
- More year 2038 work from Arnd slowly reaching the point where we
need to think about the syscalls themself.
- A new timer function which allows to conditionally (re)arm a timer
only when it's either not running or the new expiry time is sooner
than the armed expiry time. This allows to use a single timer for
multiple timeout requirements w/o caring about the first expiry
time at the call site.
- A new NMI safe accessor to clock real time for the printk timestamp
work. Can be used by tracing, perf as well if required.
- A large number of timer setup conversions from Kees which got
collected here because either maintainers requested so or they
simply got ignored. As Kees pointed out already there are a few
trivial merge conflicts and some redundant commits which was
unavoidable due to the size of this conversion effort.
- Avoid a redundant iteration in the timer wheel softirq processing.
- Provide a mechanism to treat RTC implementations depending on their
hardware properties, i.e. don't inflict the write at the 0.5
seconds boundary which originates from the PC CMOS RTC to all RTCs.
No functional change as drivers need to be updated separately.
- The usual small updates to core code clocksource drivers. Nothing
really exciting"
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (111 commits)
timers: Add a function to start/reduce a timer
pstore: Use ktime_get_real_fast_ns() instead of __getnstimeofday()
timer: Prepare to change all DEFINE_TIMER() callbacks
netfilter: ipvs: Convert timers to use timer_setup()
scsi: qla2xxx: Convert timers to use timer_setup()
block/aoe: discover_timer: Convert timers to use timer_setup()
ide: Convert timers to use timer_setup()
drbd: Convert timers to use timer_setup()
mailbox: Convert timers to use timer_setup()
crypto: Convert timers to use timer_setup()
drivers/pcmcia: omap1: Fix error in automated timer conversion
ARM: footbridge: Fix typo in timer conversion
drivers/sgi-xp: Convert timers to use timer_setup()
drivers/pcmcia: Convert timers to use timer_setup()
drivers/memstick: Convert timers to use timer_setup()
drivers/macintosh: Convert timers to use timer_setup()
hwrng/xgene-rng: Convert timers to use timer_setup()
auxdisplay: Convert timers to use timer_setup()
sparc/led: Convert timers to use timer_setup()
mips: ip22/32: Convert timers to use timer_setup()
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/irq/spurious.c | 2 | ||||
-rw-r--r-- | kernel/kthread.c | 10 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 4 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 9 | ||||
-rw-r--r-- | kernel/time/Kconfig | 2 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 21 | ||||
-rw-r--r-- | kernel/time/ntp.c | 227 | ||||
-rw-r--r-- | kernel/time/ntp_internal.h | 1 | ||||
-rw-r--r-- | kernel/time/posix-stubs.c | 20 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 1 | ||||
-rw-r--r-- | kernel/time/time.c | 71 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 182 | ||||
-rw-r--r-- | kernel/time/timekeeping.h | 2 | ||||
-rw-r--r-- | kernel/time/timer.c | 82 | ||||
-rw-r--r-- | kernel/workqueue.c | 29 |
15 files changed, 396 insertions, 267 deletions
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 987d7bca4864..1215229d1c12 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -21,7 +21,7 @@ static int irqfixup __read_mostly; #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) static void poll_spurious_irqs(unsigned long dummy); -static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0); +static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); static int irq_poll_cpu; static atomic_t irq_poll_active; diff --git a/kernel/kthread.c b/kernel/kthread.c index 1c19edf82427..ba3992c8c375 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -798,15 +798,14 @@ EXPORT_SYMBOL_GPL(kthread_queue_work); /** * kthread_delayed_work_timer_fn - callback that queues the associated kthread * delayed work when the timer expires. - * @__data: pointer to the data associated with the timer + * @t: pointer to the expired timer * * The format of the function is defined by struct timer_list. * It should have been called from irqsafe timer with irq already off. */ -void kthread_delayed_work_timer_fn(unsigned long __data) +void kthread_delayed_work_timer_fn(struct timer_list *t) { - struct kthread_delayed_work *dwork = - (struct kthread_delayed_work *)__data; + struct kthread_delayed_work *dwork = from_timer(dwork, t, timer); struct kthread_work *work = &dwork->work; struct kthread_worker *worker = work->worker; @@ -837,8 +836,7 @@ void __kthread_queue_delayed_work(struct kthread_worker *worker, struct timer_list *timer = &dwork->timer; struct kthread_work *work = &dwork->work; - WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn || - timer->data != (unsigned long)dwork); + WARN_ON_ONCE(timer->function != (TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn); /* * If @delay is 0, queue @dwork->work immediately. This is for diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 362eb2f78b3c..74f6b0146b98 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1078,7 +1078,7 @@ static void rcu_torture_timer_cb(struct rcu_head *rhp) * counter in the element should never be greater than 1, otherwise, the * RCU implementation is broken. */ -static void rcu_torture_timer(unsigned long unused) +static void rcu_torture_timer(struct timer_list *unused) { int idx; unsigned long started; @@ -1165,7 +1165,7 @@ rcu_torture_reader(void *arg) VERBOSE_TOROUT_STRING("rcu_torture_reader task started"); set_user_nice(current, MAX_NICE); if (irqreader && cur_ops->irq_capable) - setup_timer_on_stack(&t, rcu_torture_timer, 0); + timer_setup_on_stack(&t, rcu_torture_timer, 0); do { if (irqreader && cur_ops->irq_capable) { diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 910405dc6e5c..db85ca3975f1 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2266,9 +2266,11 @@ static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp) } /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ -static void do_nocb_deferred_wakeup_timer(unsigned long x) +static void do_nocb_deferred_wakeup_timer(struct timer_list *t) { - do_nocb_deferred_wakeup_common((struct rcu_data *)x); + struct rcu_data *rdp = from_timer(rdp, t, nocb_timer); + + do_nocb_deferred_wakeup_common(rdp); } /* @@ -2332,8 +2334,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) init_swait_queue_head(&rdp->nocb_wq); rdp->nocb_follower_tail = &rdp->nocb_follower_head; raw_spin_lock_init(&rdp->nocb_lock); - setup_timer(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, - (unsigned long)rdp); + timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); } /* diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index ac09bc29eb08..d689a9557e17 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -56,7 +56,7 @@ menu "Timers subsystem" # Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices -# are supported independ of this. +# are supported independent of this. config TICK_ONESHOT bool diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 4237e0744e26..16c027e9cc73 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -280,17 +280,22 @@ static int clockevents_program_min_delta(struct clock_event_device *dev) static int clockevents_program_min_delta(struct clock_event_device *dev) { unsigned long long clc; - int64_t delta; + int64_t delta = 0; + int i; - delta = dev->min_delta_ns; - dev->next_event = ktime_add_ns(ktime_get(), delta); + for (i = 0; i < 10; i++) { + delta += dev->min_delta_ns; + dev->next_event = ktime_add_ns(ktime_get(), delta); - if (clockevent_state_shutdown(dev)) - return 0; + if (clockevent_state_shutdown(dev)) + return 0; - dev->retries++; - clc = ((unsigned long long) delta * dev->mult) >> dev->shift; - return dev->set_next_event((unsigned long) clc, dev); + dev->retries++; + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + if (dev->set_next_event((unsigned long) clc, dev) == 0) + return 0; + } + return -ETIME; } #endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 99e03bec68e4..8d70da1b9a0d 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -493,6 +493,67 @@ out: return leap; } +static void sync_hw_clock(struct work_struct *work); +static DECLARE_DELAYED_WORK(sync_work, sync_hw_clock); + +static void sched_sync_hw_clock(struct timespec64 now, + unsigned long target_nsec, bool fail) + +{ + struct timespec64 next; + + getnstimeofday64(&next); + if (!fail) + next.tv_sec = 659; + else { + /* + * Try again as soon as possible. Delaying long periods + * decreases the accuracy of the work queue timer. Due to this + * the algorithm is very likely to require a short-sleep retry + * after the above long sleep to synchronize ts_nsec. + */ + next.tv_sec = 0; + } + + /* Compute the needed delay that will get to tv_nsec == target_nsec */ + next.tv_nsec = target_nsec - next.tv_nsec; + if (next.tv_nsec <= 0) + next.tv_nsec += NSEC_PER_SEC; + if (next.tv_nsec >= NSEC_PER_SEC) { + next.tv_sec++; + next.tv_nsec -= NSEC_PER_SEC; + } + + queue_delayed_work(system_power_efficient_wq, &sync_work, + timespec64_to_jiffies(&next)); +} + +static void sync_rtc_clock(void) +{ + unsigned long target_nsec; + struct timespec64 adjust, now; + int rc; + + if (!IS_ENABLED(CONFIG_RTC_SYSTOHC)) + return; + + getnstimeofday64(&now); + + adjust = now; + if (persistent_clock_is_local) + adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); + + /* + * The current RTC in use will provide the target_nsec it wants to be + * called at, and does rtc_tv_nsec_ok internally. + */ + rc = rtc_set_ntp_time(adjust, &target_nsec); + if (rc == -ENODEV) + return; + + sched_sync_hw_clock(now, target_nsec, rc); +} + #ifdef CONFIG_GENERIC_CMOS_UPDATE int __weak update_persistent_clock(struct timespec now) { @@ -508,76 +569,75 @@ int __weak update_persistent_clock64(struct timespec64 now64) } #endif -#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) -static void sync_cmos_clock(struct work_struct *work); - -static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); - -static void sync_cmos_clock(struct work_struct *work) +static bool sync_cmos_clock(void) { + static bool no_cmos; struct timespec64 now; - struct timespec64 next; - int fail = 1; + struct timespec64 adjust; + int rc = -EPROTO; + long target_nsec = NSEC_PER_SEC / 2; + + if (!IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE)) + return false; + + if (no_cmos) + return false; /* - * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... - * We want the clock to be within a couple of ticks from the target. + * Historically update_persistent_clock64() has followed x86 + * semantics, which match the MC146818A/etc RTC. This RTC will store + * 'adjust' and then in .5s it will advance once second. + * + * Architectures are strongly encouraged to use rtclib and not + * implement this legacy API. */ - if (!ntp_synced()) { - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ - return; - } - getnstimeofday64(&now); - if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) { - struct timespec64 adjust = now; - - fail = -ENODEV; + if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) { if (persistent_clock_is_local) adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); -#ifdef CONFIG_GENERIC_CMOS_UPDATE - fail = update_persistent_clock64(adjust); -#endif - -#ifdef CONFIG_RTC_SYSTOHC - if (fail == -ENODEV) - fail = rtc_set_ntp_time(adjust); -#endif + rc = update_persistent_clock64(adjust); + /* + * The machine does not support update_persistent_clock64 even + * though it defines CONFIG_GENERIC_CMOS_UPDATE. + */ + if (rc == -ENODEV) { + no_cmos = true; + return false; + } } - next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2); - if (next.tv_nsec <= 0) - next.tv_nsec += NSEC_PER_SEC; + sched_sync_hw_clock(now, target_nsec, rc); + return true; +} - if (!fail || fail == -ENODEV) - next.tv_sec = 659; - else - next.tv_sec = 0; +/* + * If we have an externally synchronized Linux clock, then update RTC clock + * accordingly every ~11 minutes. Generally RTCs can only store second + * precision, but many RTCs will adjust the phase of their second tick to + * match the moment of update. This infrastructure arranges to call to the RTC + * set at the correct moment to phase synchronize the RTC second tick over + * with the kernel clock. + */ +static void sync_hw_clock(struct work_struct *work) +{ + if (!ntp_synced()) + return; - if (next.tv_nsec >= NSEC_PER_SEC) { - next.tv_sec++; - next.tv_nsec -= NSEC_PER_SEC; - } - queue_delayed_work(system_power_efficient_wq, - &sync_cmos_work, timespec64_to_jiffies(&next)); + if (sync_cmos_clock()) + return; + + sync_rtc_clock(); } void ntp_notify_cmos_timer(void) { - queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); -} - -#else -void ntp_notify_cmos_timer(void) { } -#endif + if (!ntp_synced()) + return; + if (IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE) || + IS_ENABLED(CONFIG_RTC_SYSTOHC)) + queue_delayed_work(system_power_efficient_wq, &sync_work, 0); +} /* * Propagate a new txc->status value into the NTP state: @@ -654,67 +714,6 @@ static inline void process_adjtimex_modes(struct timex *txc, } - -/** - * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex - */ -int ntp_validate_timex(struct timex *txc) -{ - if (txc->modes & ADJ_ADJTIME) { - /* singleshot must not be used with any other mode bits */ - if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) - return -EINVAL; - if (!(txc->modes & ADJ_OFFSET_READONLY) && - !capable(CAP_SYS_TIME)) - return -EPERM; - } else { - /* In order to modify anything, you gotta be super-user! */ - if (txc->modes && !capable(CAP_SYS_TIME)) - return -EPERM; - /* - * if the quartz is off by more than 10% then - * something is VERY wrong! - */ - if (txc->modes & ADJ_TICK && - (txc->tick < 900000/USER_HZ || - txc->tick > 1100000/USER_HZ)) - return -EINVAL; - } - - if (txc->modes & ADJ_SETOFFSET) { - /* In order to inject time, you gotta be super-user! */ - if (!capable(CAP_SYS_TIME)) - return -EPERM; - - if (txc->modes & ADJ_NANO) { - struct timespec ts; - - ts.tv_sec = txc->time.tv_sec; - ts.tv_nsec = txc->time.tv_usec; - if (!timespec_inject_offset_valid(&ts)) - return -EINVAL; - - } else { - if (!timeval_inject_offset_valid(&txc->time)) - return -EINVAL; - } - } - - /* - * Check for potential multiplication overflows that can - * only happen on 64-bit systems: - */ - if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { - if (LLONG_MIN / PPM_SCALE > txc->freq) - return -EINVAL; - if (LLONG_MAX / PPM_SCALE < txc->freq) - return -EINVAL; - } - - return 0; -} - - /* * adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 0a53e6ea47b1..909bd1f1bfb1 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -8,7 +8,6 @@ extern void ntp_clear(void); extern u64 ntp_tick_length(void); extern ktime_t ntp_get_next_leap(void); extern int second_overflow(time64_t secs); -extern int ntp_validate_timex(struct timex *); extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *); extern void __hardpps(const struct timespec64 *, const struct timespec64 *); #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 06f34feb635e..b258bee13b02 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -117,8 +117,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct timespec __user *, rqtp, struct timespec __user *, rmtp) { - struct timespec64 t64; - struct timespec t; + struct timespec64 t; switch (which_clock) { case CLOCK_REALTIME: @@ -129,16 +128,15 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, return -EINVAL; } - if (copy_from_user(&t, rqtp, sizeof (struct timespec))) + if (get_timespec64(&t, rqtp)) return -EFAULT; - t64 = timespec_to_timespec64(t); - if (!timespec64_valid(&t64)) + if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; - return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ? + return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } @@ -203,8 +201,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, struct compat_timespec __user *, rqtp, struct compat_timespec __user *, rmtp) { - struct timespec64 t64; - struct timespec t; + struct timespec64 t; switch (which_clock) { case CLOCK_REALTIME: @@ -215,16 +212,15 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, return -EINVAL; } - if (compat_get_timespec(&t, rqtp)) + if (compat_get_timespec64(&t, rqtp)) return -EFAULT; - t64 = timespec_to_timespec64(t); - if (!timespec64_valid(&t64)) + if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; - return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ? + return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 6b009c207671..c1f518e7aa80 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -33,6 +33,7 @@ int tick_program_event(ktime_t expires, int force) * We don't need the clock event device any more, stop it. */ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); + dev->next_event = KTIME_MAX; return 0; } diff --git a/kernel/time/time.c b/kernel/time/time.c index 44a8c1402133..bd4e6c7dd689 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -82,7 +82,7 @@ SYSCALL_DEFINE1(time, time_t __user *, tloc) SYSCALL_DEFINE1(stime, time_t __user *, tptr) { - struct timespec tv; + struct timespec64 tv; int err; if (get_user(tv.tv_sec, tptr)) @@ -90,11 +90,11 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr) tv.tv_nsec = 0; - err = security_settime(&tv, NULL); + err = security_settime64(&tv, NULL); if (err) return err; - do_settimeofday(&tv); + do_settimeofday64(&tv); return 0; } @@ -122,7 +122,7 @@ COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc) COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr) { - struct timespec tv; + struct timespec64 tv; int err; if (get_user(tv.tv_sec, tptr)) @@ -130,11 +130,11 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr) tv.tv_nsec = 0; - err = security_settime(&tv, NULL); + err = security_settime64(&tv, NULL); if (err) return err; - do_settimeofday(&tv); + do_settimeofday64(&tv); return 0; } @@ -158,40 +158,6 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, } /* - * Indicates if there is an offset between the system clock and the hardware - * clock/persistent clock/rtc. - */ -int persistent_clock_is_local; - -/* - * Adjust the time obtained from the CMOS to be UTC time instead of - * local time. - * - * This is ugly, but preferable to the alternatives. Otherwise we - * would either need to write a program to do it in /etc/rc (and risk - * confusion if the program gets run more than once; it would also be - * hard to make the program warp the clock precisely n hours) or - * compile in the timezone information into the kernel. Bad, bad.... - * - * - TYT, 1992-01-01 - * - * The best thing to do is to keep the CMOS clock in universal time (UTC) - * as real UNIX machines always do it. This avoids all headaches about - * daylight saving times and warping kernel clocks. - */ -static inline void warp_clock(void) -{ - if (sys_tz.tz_minuteswest != 0) { - struct timespec adjust; - - persistent_clock_is_local = 1; - adjust.tv_sec = sys_tz.tz_minuteswest * 60; - adjust.tv_nsec = 0; - timekeeping_inject_offset(&adjust); - } -} - -/* * In case for some reason the CMOS clock has not already been running * in UTC, but in some local time: The first time we set the timezone, * we will warp the clock so that it is ticking UTC time instead of @@ -224,7 +190,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz if (firsttime) { firsttime = 0; if (!tv) - warp_clock(); + timekeeping_warp_clock(); } } if (tv) @@ -441,6 +407,7 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0, } EXPORT_SYMBOL(mktime64); +#if __BITS_PER_LONG == 32 /** * set_normalized_timespec - set timespec sec and nsec parts and normalize * @@ -501,6 +468,7 @@ struct timespec ns_to_timespec(const s64 nsec) return ts; } EXPORT_SYMBOL(ns_to_timespec); +#endif /** * ns_to_timeval - Convert nanoseconds to timeval @@ -520,7 +488,6 @@ struct timeval ns_to_timeval(const s64 nsec) } EXPORT_SYMBOL(ns_to_timeval); -#if BITS_PER_LONG == 32 /** * set_normalized_timespec - set timespec sec and nsec parts and normalize * @@ -581,7 +548,7 @@ struct timespec64 ns_to_timespec64(const s64 nsec) return ts; } EXPORT_SYMBOL(ns_to_timespec64); -#endif + /** * msecs_to_jiffies: - convert milliseconds to jiffies * @m: time in milliseconds @@ -853,24 +820,6 @@ unsigned long nsecs_to_jiffies(u64 n) EXPORT_SYMBOL_GPL(nsecs_to_jiffies); /* - * Add two timespec values and do a safety check for overflow. - * It's assumed that both values are valid (>= 0) - */ -struct timespec timespec_add_safe(const struct timespec lhs, - const struct timespec rhs) -{ - struct timespec res; - - set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec, - lhs.tv_nsec + rhs.tv_nsec); - - if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec) - res.tv_sec = TIME_T_MAX; - - return res; -} - -/* * Add two timespec64 values and do a safety check for overflow. * It's assumed that both values are valid (>= 0). * And, each timespec64 is in normalized form. diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2cafb49aa65e..198afa78bf69 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -60,8 +60,27 @@ struct tk_fast { struct tk_read_base base[2]; }; -static struct tk_fast tk_fast_mono ____cacheline_aligned; -static struct tk_fast tk_fast_raw ____cacheline_aligned; +/* Suspend-time cycles value for halted fast timekeeper. */ +static u64 cycles_at_suspend; + +static u64 dummy_clock_read(struct clocksource *cs) +{ + return cycles_at_suspend; +} + +static struct clocksource dummy_clock = { + .read = dummy_clock_read, +}; + +static struct tk_fast tk_fast_mono ____cacheline_aligned = { + .base[0] = { .clock = &dummy_clock, }, + .base[1] = { .clock = &dummy_clock, }, +}; + +static struct tk_fast tk_fast_raw ____cacheline_aligned = { + .base[0] = { .clock = &dummy_clock, }, + .base[1] = { .clock = &dummy_clock, }, +}; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -477,17 +496,39 @@ u64 notrace ktime_get_boot_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); -/* Suspend-time cycles value for halted fast timekeeper. */ -static u64 cycles_at_suspend; -static u64 dummy_clock_read(struct clocksource *cs) +/* + * See comment for __ktime_get_fast_ns() vs. timestamp ordering + */ +static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf) { - return cycles_at_suspend; + struct tk_read_base *tkr; + unsigned int seq; + u64 now; + + do { + seq = raw_read_seqcount_latch(&tkf->seq); + tkr = tkf->base + (seq & 0x01); + now = ktime_to_ns(tkr->base_real); + + now += timekeeping_delta_to_ns(tkr, + clocksource_delta( + tk_clock_read(tkr), + tkr->cycle_last, + tkr->mask)); + } while (read_seqcount_retry(&tkf->seq, seq)); + + return now; } -static struct clocksource dummy_clock = { - .read = dummy_clock_read, -}; +/** + * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime. + */ +u64 ktime_get_real_fast_ns(void) +{ + return __ktime_get_real_fast_ns(&tk_fast_mono); +} +EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns); /** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. @@ -507,6 +548,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk) memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); cycles_at_suspend = tk_clock_read(tkr); tkr_dummy.clock = &dummy_clock; + tkr_dummy.base_real = tkr->base + tk->offs_real; update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); tkr = &tk->tkr_raw; @@ -654,6 +696,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) update_vsyscall(tk); update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); + tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); @@ -1264,33 +1307,31 @@ EXPORT_SYMBOL(do_settimeofday64); * * Adds or subtracts an offset value from the current time. */ -int timekeeping_inject_offset(struct timespec *ts) +static int timekeeping_inject_offset(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; - struct timespec64 ts64, tmp; + struct timespec64 tmp; int ret = 0; - if (!timespec_inject_offset_valid(ts)) + if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - ts64 = timespec_to_timespec64(*ts); - raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); /* Make sure the proposed value is valid */ - tmp = timespec64_add(tk_xtime(tk), ts64); - if (timespec64_compare(&tk->wall_to_monotonic, &ts64) > 0 || + tmp = timespec64_add(tk_xtime(tk), *ts); + if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 || !timespec64_valid_strict(&tmp)) { ret = -EINVAL; goto error; } - tk_xtime_add(tk, &ts64); - tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts64)); + tk_xtime_add(tk, ts); + tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts)); error: /* even if we error out, we forwarded the time, so call update */ timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1303,7 +1344,40 @@ error: /* even if we error out, we forwarded the time, so call update */ return ret; } -EXPORT_SYMBOL(timekeeping_inject_offset); + +/* + * Indicates if there is an offset between the system clock and the hardware + * clock/persistent clock/rtc. + */ +int persistent_clock_is_local; + +/* + * Adjust the time obtained from the CMOS to be UTC time instead of + * local time. + * + * This is ugly, but preferable to the alternatives. Otherwise we + * would either need to write a program to do it in /etc/rc (and risk + * confusion if the program gets run more than once; it would also be + * hard to make the program warp the clock precisely n hours) or + * compile in the timezone information into the kernel. Bad, bad.... + * + * - TYT, 1992-01-01 + * + * The best thing to do is to keep the CMOS clock in universal time (UTC) + * as real UNIX machines always do it. This avoids all headaches about + * daylight saving times and warping kernel clocks. + */ +void timekeeping_warp_clock(void) +{ + if (sys_tz.tz_minuteswest != 0) { + struct timespec64 adjust; + + persistent_clock_is_local = 1; + adjust.tv_sec = sys_tz.tz_minuteswest * 60; + adjust.tv_nsec = 0; + timekeeping_inject_offset(&adjust); + } +} /** * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic @@ -2248,6 +2322,72 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, } /** + * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex + */ +static int timekeeping_validate_timex(struct timex *txc) +{ + if (txc->modes & ADJ_ADJTIME) { + /* singleshot must not be used with any other mode bits */ + if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) + return -EINVAL; + if (!(txc->modes & ADJ_OFFSET_READONLY) && + !capable(CAP_SYS_TIME)) + return -EPERM; + } else { + /* In order to modify anything, you gotta be super-user! */ + if (txc->modes && !capable(CAP_SYS_TIME)) + return -EPERM; + /* + * if the quartz is off by more than 10% then + * something is VERY wrong! + */ + if (txc->modes & ADJ_TICK && + (txc->tick < 900000/USER_HZ || + txc->tick > 1100000/USER_HZ)) + return -EINVAL; + } + + if (txc->modes & ADJ_SETOFFSET) { + /* In order to inject time, you gotta be super-user! */ + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + /* + * Validate if a timespec/timeval used to inject a time + * offset is valid. Offsets can be postive or negative, so + * we don't check tv_sec. The value of the timeval/timespec + * is the sum of its fields,but *NOTE*: + * The field tv_usec/tv_nsec must always be non-negative and + * we can't have more nanoseconds/microseconds than a second. + */ + if (txc->time.tv_usec < 0) + return -EINVAL; + + if (txc->modes & ADJ_NANO) { + if (txc->time.tv_usec >= NSEC_PER_SEC) + return -EINVAL; + } else { + if (txc->time.tv_usec >= USEC_PER_SEC) + return -EINVAL; + } + } + + /* + * Check for potential multiplication overflows that can + * only happen on 64-bit systems: + */ + if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { + if (LLONG_MIN / PPM_SCALE > txc->freq) + return -EINVAL; + if (LLONG_MAX / PPM_SCALE < txc->freq) + return -EINVAL; + } + + return 0; +} + + +/** * do_adjtimex() - Accessor function to NTP __do_adjtimex function */ int do_adjtimex(struct timex *txc) @@ -2259,12 +2399,12 @@ int do_adjtimex(struct timex *txc) int ret; /* Validate the data before disabling interrupts */ - ret = ntp_validate_timex(txc); + ret = timekeeping_validate_timex(txc); if (ret) return ret; if (txc->modes & ADJ_SETOFFSET) { - struct timespec delta; + struct timespec64 delta; delta.tv_sec = txc->time.tv_sec; delta.tv_nsec = txc->time.tv_usec; if (!(txc->modes & ADJ_NANO)) diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index c9f9af339914..7a9b4eb7a1d5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -11,7 +11,7 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); -extern int timekeeping_inject_offset(struct timespec *ts); +extern void timekeeping_warp_clock(void); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index f2674a056c26..af0b8bae4502 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -610,7 +610,7 @@ static bool timer_fixup_init(void *addr, enum debug_obj_state state) } /* Stub timer callback for improperly used timers. */ -static void stub_timer(unsigned long data) +static void stub_timer(struct timer_list *unused) { WARN_ON(1); } @@ -626,7 +626,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state) switch (state) { case ODEBUG_STATE_NOTAVAILABLE: - setup_timer(timer, stub_timer, 0); + timer_setup(timer, stub_timer, 0); return true; case ODEBUG_STATE_ACTIVE: @@ -665,7 +665,7 @@ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) switch (state) { case ODEBUG_STATE_NOTAVAILABLE: - setup_timer(timer, stub_timer, 0); + timer_setup(timer, stub_timer, 0); return true; default: return false; @@ -929,8 +929,11 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, } } +#define MOD_TIMER_PENDING_ONLY 0x01 +#define MOD_TIMER_REDUCE 0x02 + static inline int -__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) +__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) { struct timer_base *base, *new_base; unsigned int idx = UINT_MAX; @@ -950,7 +953,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * larger granularity than you would get from adding a new * timer with this expiry. */ - if (timer->expires == expires) + long diff = timer->expires - expires; + + if (!diff) + return 1; + if (options & MOD_TIMER_REDUCE && diff <= 0) return 1; /* @@ -962,6 +969,12 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) base = lock_timer_base(timer, &flags); forward_timer_base(base); + if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) && + time_before_eq(timer->expires, expires)) { + ret = 1; + goto out_unlock; + } + clk = base->clk; idx = calc_wheel_index(expires, clk); @@ -971,7 +984,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * subsequent call will exit in the expires check above. */ if (idx == timer_get_idx(timer)) { - timer->expires = expires; + if (!(options & MOD_TIMER_REDUCE)) + timer->expires = expires; + else if (time_after(timer->expires, expires)) + timer->expires = expires; ret = 1; goto out_unlock; } @@ -981,7 +997,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } ret = detach_if_pending(timer, base, false); - if (!ret && pending_only) + if (!ret && (options & MOD_TIMER_PENDING_ONLY)) goto out_unlock; debug_activate(timer, expires); @@ -1042,7 +1058,7 @@ out_unlock: */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { - return __mod_timer(timer, expires, true); + return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY); } EXPORT_SYMBOL(mod_timer_pending); @@ -1068,11 +1084,26 @@ EXPORT_SYMBOL(mod_timer_pending); */ int mod_timer(struct timer_list *timer, unsigned long expires) { - return __mod_timer(timer, expires, false); + return __mod_timer(timer, expires, 0); } EXPORT_SYMBOL(mod_timer); /** + * timer_reduce - Modify a timer's timeout if it would reduce the timeout + * @timer: The timer to be modified + * @expires: New timeout in jiffies + * + * timer_reduce() is very similar to mod_timer(), except that it will only + * modify a running timer if that would reduce the expiration time (it will + * start a timer that isn't running). + */ +int timer_reduce(struct timer_list *timer, unsigned long expires) +{ + return __mod_timer(timer, expires, MOD_TIMER_REDUCE); +} +EXPORT_SYMBOL(timer_reduce); + +/** * add_timer - start a timer * @timer: the timer to be added * @@ -1560,8 +1591,11 @@ static int collect_expired_timers(struct timer_base *base, * jiffies, otherwise forward to the next expiry time: */ if (time_after(next, jiffies)) { - /* The call site will increment clock! */ - base->clk = jiffies - 1; + /* + * The call site will increment base->clk and then + * terminate the expiry loop immediately. + */ + base->clk = jiffies; return 0; } base->clk = next; @@ -1668,9 +1702,20 @@ void run_local_timers(void) raise_softirq(TIMER_SOFTIRQ); } -static void process_timeout(unsigned long __data) +/* + * Since schedule_timeout()'s timer is defined on the stack, it must store + * the target task on the stack as well. + */ +struct process_timer { + struct timer_list timer; + struct task_struct *task; +}; + +static void process_timeout(struct timer_list *t) { - wake_up_process((struct task_struct *)__data); + struct process_timer *timeout = from_timer(timeout, t, timer); + + wake_up_process(timeout->task); } /** @@ -1704,7 +1749,7 @@ static void process_timeout(unsigned long __data) */ signed long __sched schedule_timeout(signed long timeout) { - struct timer_list timer; + struct process_timer timer; unsigned long expire; switch (timeout) @@ -1738,13 +1783,14 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; - setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); - __mod_timer(&timer, expire, false); + timer.task = current; + timer_setup_on_stack(&timer.timer, process_timeout, 0); + __mod_timer(&timer.timer, expire, 0); schedule(); - del_singleshot_timer_sync(&timer); + del_singleshot_timer_sync(&timer.timer); /* Remove the timer from the object tracker */ - destroy_timer_on_stack(&timer); + destroy_timer_on_stack(&timer.timer); timeout = expire - jiffies; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 13f67b5a0a0c..7368b57842ea 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1493,9 +1493,9 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL(queue_work_on); -void delayed_work_timer_fn(unsigned long __data) +void delayed_work_timer_fn(struct timer_list *t) { - struct delayed_work *dwork = (struct delayed_work *)__data; + struct delayed_work *dwork = from_timer(dwork, t, timer); /* should have been called from irqsafe timer with irq already off */ __queue_work(dwork->cpu, dwork->wq, &dwork->work); @@ -1509,8 +1509,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); - WARN_ON_ONCE(timer->function != delayed_work_timer_fn || - timer->data != (unsigned long)dwork); + WARN_ON_ONCE(timer->function != (TIMER_FUNC_TYPE)delayed_work_timer_fn); WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); @@ -1833,9 +1832,9 @@ static void destroy_worker(struct worker *worker) wake_up_process(worker->task); } -static void idle_worker_timeout(unsigned long __pool) +static void idle_worker_timeout(struct timer_list *t) { - struct worker_pool *pool = (void *)__pool; + struct worker_pool *pool = from_timer(pool, t, idle_timer); spin_lock_irq(&pool->lock); @@ -1881,9 +1880,9 @@ static void send_mayday(struct work_struct *work) } } -static void pool_mayday_timeout(unsigned long __pool) +static void pool_mayday_timeout(struct timer_list *t) { - struct worker_pool *pool = (void *)__pool; + struct worker_pool *pool = from_timer(pool, t, mayday_timer); struct work_struct *work; spin_lock_irq(&pool->lock); @@ -3223,11 +3222,9 @@ static int init_worker_pool(struct worker_pool *pool) INIT_LIST_HEAD(&pool->idle_list); hash_init(pool->busy_hash); - setup_deferrable_timer(&pool->idle_timer, idle_worker_timeout, - (unsigned long)pool); + timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE); - setup_timer(&pool->mayday_timer, pool_mayday_timeout, - (unsigned long)pool); + timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0); mutex_init(&pool->attach_mutex); INIT_LIST_HEAD(&pool->workers); @@ -5370,11 +5367,8 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { } */ #ifdef CONFIG_WQ_WATCHDOG -static void wq_watchdog_timer_fn(unsigned long data); - static unsigned long wq_watchdog_thresh = 30; -static struct timer_list wq_watchdog_timer = - TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0); +static struct timer_list wq_watchdog_timer; static unsigned long wq_watchdog_touched = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES; @@ -5388,7 +5382,7 @@ static void wq_watchdog_reset_touched(void) per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; } -static void wq_watchdog_timer_fn(unsigned long data) +static void wq_watchdog_timer_fn(struct timer_list *unused) { unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; bool lockup_detected = false; @@ -5490,6 +5484,7 @@ module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh, static void wq_watchdog_init(void) { + timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE); wq_watchdog_set_thresh(wq_watchdog_thresh); } |