time: Fix CLOCK_MONOTONIC_RAW sub-nanosecond accounting
authorJohn Stultz <john.stultz@linaro.org>
Thu, 8 Jun 2017 23:44:21 +0000 (16:44 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Jun 2017 11:00:30 +0000 (13:00 +0200)
commit 3d88d56c5873f6eebe23e05c3da701960146b801 upstream.

Due to how the MONOTONIC_RAW accumulation logic was handled,
there is the potential for a 1ns discontinuity when we do
accumulations. This small discontinuity has for the most part
gone un-noticed, but since ARM64 enabled CLOCK_MONOTONIC_RAW
in their vDSO clock_gettime implementation, we've seen failures
with the inconsistency-check test in kselftest.

This patch addresses the issue by using the same sub-ns
accumulation handling that CLOCK_MONOTONIC uses, which avoids
the issue for in-kernel users.

Since the ARM64 vDSO implementation has its own clock_gettime
calculation logic, this patch reduces the frequency of errors,
but failures are still seen. The ARM64 vDSO will need to be
updated to include the sub-nanosecond xtime_nsec values in its
calculation for this issue to be completely fixed.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Tested-by: Daniel Mentz <danielmentz@google.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Link: http://lkml.kernel.org/r/1496965462-20003-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/linux/timekeeper_internal.h
kernel/time/timekeeping.c

index 100e47d..2c225d4 100644 (file)
@@ -57,7 +57,7 @@ struct tk_read_base {
  *                     interval.
  * @xtime_remainder:   Shifted nano seconds left over when rounding
  *                     @cycle_interval
- * @raw_interval:      Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:      Shifted raw nano seconds accumulated per NTP interval.
  * @ntp_error:         Difference between accumulated time and NTP time in ntp
  *                     shifted nano seconds.
  * @ntp_error_shift:   Shift conversion between clock shifted nano seconds and
@@ -99,7 +99,7 @@ struct timekeeper {
        cycle_t                 cycle_interval;
        u64                     xtime_interval;
        s64                     xtime_remainder;
-       u32                     raw_interval;
+       u64                     raw_interval;
        /* The ntp_tick_length() value currently being used.
         * This cached copy ensures we consistently apply the tick
         * length for an entire tick, as ntp_tick_length may change
index 9d86184..d831827 100644 (file)
@@ -278,8 +278,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        /* Go back from cycles -> shifted ns */
        tk->xtime_interval = (u64) interval * clock->mult;
        tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-       tk->raw_interval =
-               ((u64) interval * clock->mult) >> clock->shift;
+       tk->raw_interval = interval * clock->mult;
 
         /* if changing clocks, convert xtime_nsec shift units */
        if (old_clock) {
@@ -2023,7 +2022,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
                                                unsigned int *clock_set)
 {
        cycle_t interval = tk->cycle_interval << shift;
-       u64 raw_nsecs;
+       u64 snsec_per_sec;
 
        /* If the offset is smaller than a shifted interval, do nothing */
        if (offset < interval)
@@ -2038,14 +2037,15 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
        *clock_set |= accumulate_nsecs_to_secs(tk);
 
        /* Accumulate raw time */
-       raw_nsecs = (u64)tk->raw_interval << shift;
-       raw_nsecs += tk->raw_time.tv_nsec;
-       if (raw_nsecs >= NSEC_PER_SEC) {
-               u64 raw_secs = raw_nsecs;
-               raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-               tk->raw_time.tv_sec += raw_secs;
+       tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+       tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+       snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+       while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+               tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+               tk->raw_time.tv_sec++;
        }
-       tk->raw_time.tv_nsec = raw_nsecs;
+       tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+       tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
 
        /* Accumulate error between NTP and clock interval */
        tk->ntp_error += tk->ntp_tick << shift;