From be76ea61446095c045641404128bc6862545cda1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Feb 2023 17:39:42 +0100 Subject: [PATCH] s390/idle: remove arch_cpu_idle_time() and corresponding code arch_cpu_idle_time() returns the idle time of any given cpu if it is in idle, or zero if not. All if this is racy and partially incorrect. Time stamps taken with store clock extended and store clock fast from different cpus are compared, while the architecture states that this is nothing which can be relied on (see Principles of Operation; Chapter 4, "Setting and Inspecting the Clock"). A more fundamental problem is that the timestamp when a cpu is leaving idle is taken early in the assembler part of the interrupt handler, and this value is only transferred many cycles later to the cpu's per-cpu idle data structure. This per cpu data structure is read by arch_cpu_idle() to tell for which period of time a remote cpu is idle: if only an idle_enter value is present, the assumed idle time of the cpu is calculated by taking a local timestamp and returning the difference of the local timestamp and the idle_enter value. This is potentially incorrect, since the remote cpu may have already left idle, but the taken timestamp may not have been transferred to the per-cpu data structure. This in turn means that too much idle time may be reported for a cpu, and a subsequent calculation of system idle time may result in a smaller value. Instead of coming up with even more complex code trying to fix this, just remove this code, and only account idle time of a cpu, after idle state is left. Another minor bug is that it is assumed that timestamps are non-zero, which is not necessarily the case for timestamps taken with store clock fast. This however is just a very minor problem, since this can only happen when the epoch increases. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cputime.h | 4 --- arch/s390/include/asm/idle.h | 4 --- arch/s390/kernel/idle.c | 77 ++++++----------------------------------- 3 files changed, 11 insertions(+), 74 deletions(-) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 7f9284e..30bb3ec 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -16,10 +16,6 @@ */ #define cputime_to_nsecs(cputime) tod_to_ns(cputime) -u64 arch_cpu_idle_time(int cpu); - -#define arch_idle_time(cpu) arch_cpu_idle_time(cpu) - void account_idle_time_irq(void); #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 5cea629c..af72a2b 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -10,16 +10,12 @@ #include #include -#include struct s390_idle_data { - seqcount_t seqcount; unsigned long idle_count; unsigned long idle_time; unsigned long clock_idle_enter; - unsigned long clock_idle_exit; unsigned long timer_idle_enter; - unsigned long timer_idle_exit; unsigned long mt_cycles_enter[8]; }; diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 0f45850..dd8351e 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -35,24 +35,18 @@ void account_idle_time_irq(void) this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); } - idle->clock_idle_exit = S390_lowcore.int_clock; - idle->timer_idle_exit = S390_lowcore.sys_enter_timer; + idle_time = S390_lowcore.int_clock - idle->clock_idle_enter; S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock; - S390_lowcore.last_update_clock = idle->clock_idle_exit; + S390_lowcore.last_update_clock = S390_lowcore.int_clock; S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter; - S390_lowcore.last_update_timer = idle->timer_idle_exit; + S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer; /* Account time spent with enabled wait psw loaded as idle time. */ - raw_write_seqcount_begin(&idle->seqcount); - idle_time = idle->clock_idle_exit - idle->clock_idle_enter; - idle->clock_idle_enter = 0; - idle->clock_idle_exit = 0; - idle->idle_time += idle_time; - idle->idle_count++; + WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time); + WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1); account_idle_time(cputime_to_nsecs(idle_time)); - raw_write_seqcount_end(&idle->seqcount); } void noinstr arch_cpu_idle(void) @@ -71,71 +65,22 @@ void noinstr arch_cpu_idle(void) } static ssize_t show_idle_count(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long idle_count; - unsigned int seq; - - do { - seq = read_seqcount_begin(&idle->seqcount); - idle_count = READ_ONCE(idle->idle_count); - if (READ_ONCE(idle->clock_idle_enter)) - idle_count++; - } while (read_seqcount_retry(&idle->seqcount, seq)); - return sprintf(buf, "%lu\n", idle_count); + + return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count)); } DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { - unsigned long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned int seq; - - do { - seq = read_seqcount_begin(&idle->seqcount); - idle_time = READ_ONCE(idle->idle_time); - idle_enter = READ_ONCE(idle->clock_idle_enter); - idle_exit = READ_ONCE(idle->clock_idle_exit); - } while (read_seqcount_retry(&idle->seqcount, seq)); - in_idle = 0; - now = get_tod_clock(); - if (idle_enter) { - if (idle_exit) { - in_idle = idle_exit - idle_enter; - } else if (now > idle_enter) { - in_idle = now - idle_enter; - } - } - idle_time += in_idle; - return sprintf(buf, "%lu\n", idle_time >> 12); -} -DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); -u64 arch_cpu_idle_time(int cpu) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long now, idle_enter, idle_exit, in_idle; - unsigned int seq; - - do { - seq = read_seqcount_begin(&idle->seqcount); - idle_enter = READ_ONCE(idle->clock_idle_enter); - idle_exit = READ_ONCE(idle->clock_idle_exit); - } while (read_seqcount_retry(&idle->seqcount, seq)); - in_idle = 0; - now = get_tod_clock(); - if (idle_enter) { - if (idle_exit) { - in_idle = idle_exit - idle_enter; - } else if (now > idle_enter) { - in_idle = now - idle_enter; - } - } - return cputime_to_nsecs(in_idle); + return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12); } +DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); void arch_cpu_idle_enter(void) { -- 2.7.4