2 * drivers/cpufreq/cpufreq_adaptive.c
4 * Copyright (C) 2001 Russell King
5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6 * Jun Nakajima <jun.nakajima@intel.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/cpufreq.h>
17 #include <linux/cpu.h>
18 #include <linux/jiffies.h>
19 #include <linux/kernel_stat.h>
20 #include <linux/mutex.h>
21 #include <linux/hrtimer.h>
22 #include <linux/tick.h>
23 #include <linux/ktime.h>
24 #include <linux/sched.h>
25 #include <linux/kthread.h>
27 #include <mach/ppmu.h>
30 * dbs is used in this file as a shortform for demandbased switching
31 * It helps to keep variable names smaller, simpler
34 #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
35 #define DEF_FREQUENCY_UP_THRESHOLD (80)
36 #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3)
37 #define MICRO_FREQUENCY_UP_THRESHOLD (95)
38 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
39 #define MIN_FREQUENCY_UP_THRESHOLD (11)
40 #define MAX_FREQUENCY_UP_THRESHOLD (100)
41 #define MIN_ONDEMAND_THRESHOLD (4)
43 * The polling frequency of this governor depends on the capability of
44 * the processor. Default polling frequency is 1000 times the transition
45 * latency of the processor. The governor will work on any processor with
46 * transition latency <= 10mS, using appropriate sampling
48 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
49 * this governor will not work.
50 * All times here are in uS.
52 #define MIN_SAMPLING_RATE_RATIO (2)
54 static unsigned int min_sampling_rate;
56 #define LATENCY_MULTIPLIER (1000)
57 #define MIN_LATENCY_MULTIPLIER (100)
58 #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
60 static void (*pm_idle_old)(void);
61 static void do_dbs_timer(struct work_struct *work);
62 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
65 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ADAPTIVE
68 struct cpufreq_governor cpufreq_gov_adaptive = {
70 .governor = cpufreq_governor_dbs,
71 .max_transition_latency = TRANSITION_LATENCY_LIMIT,
76 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
78 struct cpu_dbs_info_s {
79 cputime64_t prev_cpu_idle;
80 cputime64_t prev_cpu_iowait;
81 cputime64_t prev_cpu_wall;
82 cputime64_t prev_cpu_nice;
83 struct cpufreq_policy *cur_policy;
84 struct delayed_work work;
85 struct cpufreq_frequency_table *freq_table;
86 unsigned int freq_hi_jiffies;
88 unsigned int sample_type:1;
91 * percpu mutex that serializes governor limit change with
92 * do_dbs_timer invocation. We do not want do_dbs_timer to run
93 * when user is changing the governor or limits.
95 struct mutex timer_mutex;
97 static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
99 static unsigned int dbs_enable; /* number of CPUs using this policy */
102 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
103 * different CPUs. It protects dbs_enable in governor start/stop.
105 static DEFINE_MUTEX(dbs_mutex);
106 static struct task_struct *up_task;
107 static struct workqueue_struct *down_wq;
108 static struct work_struct freq_scale_down_work;
109 static cpumask_t up_cpumask;
110 static spinlock_t up_cpumask_lock;
111 static cpumask_t down_cpumask;
112 static spinlock_t down_cpumask_lock;
114 static DEFINE_PER_CPU(cputime64_t, idle_in_idle);
115 static DEFINE_PER_CPU(cputime64_t, idle_exit_wall);
117 static struct timer_list cpu_timer;
118 static unsigned int target_freq;
119 static DEFINE_MUTEX(short_timer_mutex);
121 /* Go to max speed when CPU load at or above this value. */
122 #define DEFAULT_GO_MAXSPEED_LOAD 60
123 static unsigned long go_maxspeed_load;
125 #define DEFAULT_KEEP_MINSPEED_LOAD 30
126 static unsigned long keep_minspeed_load;
128 #define DEFAULT_STEPUP_LOAD 10
129 static unsigned long step_up_load;
131 static struct dbs_tuners {
132 unsigned int sampling_rate;
133 unsigned int up_threshold;
134 unsigned int down_differential;
135 unsigned int ignore_nice;
136 unsigned int io_is_busy;
138 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
139 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
143 static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall)
145 u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
147 if (iowait_time == -1ULL)
153 static void adaptive_init_cpu(int cpu)
155 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
156 dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
159 /************************** sysfs interface ************************/
161 static ssize_t show_sampling_rate_max(struct kobject *kobj,
162 struct attribute *attr, char *buf)
164 printk_once(KERN_INFO "CPUFREQ: adaptive sampling_rate_max "
165 "sysfs file is deprecated - used by: %s\n", current->comm);
166 return sprintf(buf, "%u\n", -1U);
169 static ssize_t show_sampling_rate_min(struct kobject *kobj,
170 struct attribute *attr, char *buf)
172 return sprintf(buf, "%u\n", min_sampling_rate);
175 define_one_global_ro(sampling_rate_max);
176 define_one_global_ro(sampling_rate_min);
178 /* cpufreq_adaptive Governor Tunables */
179 #define show_one(file_name, object) \
180 static ssize_t show_##file_name \
181 (struct kobject *kobj, struct attribute *attr, char *buf) \
183 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
185 show_one(sampling_rate, sampling_rate);
186 show_one(io_is_busy, io_is_busy);
187 show_one(up_threshold, up_threshold);
188 show_one(ignore_nice_load, ignore_nice);
190 /*** delete after deprecation time ***/
192 #define DEPRECATION_MSG(file_name) \
193 printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \
194 "interface is deprecated - " #file_name "\n");
196 #define show_one_old(file_name) \
197 static ssize_t show_##file_name##_old \
198 (struct cpufreq_policy *unused, char *buf) \
200 printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \
201 "interface is deprecated - " #file_name "\n"); \
202 return show_##file_name(NULL, NULL, buf); \
205 /*** delete after deprecation time ***/
207 static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
208 const char *buf, size_t count)
212 ret = sscanf(buf, "%u", &input);
216 mutex_lock(&dbs_mutex);
217 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
218 mutex_unlock(&dbs_mutex);
223 static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
224 const char *buf, size_t count)
229 ret = sscanf(buf, "%u", &input);
233 mutex_lock(&dbs_mutex);
234 dbs_tuners_ins.io_is_busy = !!input;
235 mutex_unlock(&dbs_mutex);
240 static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
241 const char *buf, size_t count)
245 ret = sscanf(buf, "%u", &input);
247 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
248 input < MIN_FREQUENCY_UP_THRESHOLD) {
252 mutex_lock(&dbs_mutex);
253 dbs_tuners_ins.up_threshold = input;
254 mutex_unlock(&dbs_mutex);
259 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
260 const char *buf, size_t count)
267 ret = sscanf(buf, "%u", &input);
274 mutex_lock(&dbs_mutex);
275 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
276 mutex_unlock(&dbs_mutex);
279 dbs_tuners_ins.ignore_nice = input;
281 /* we need to re-evaluate prev_cpu_idle */
282 for_each_online_cpu(j) {
283 struct cpu_dbs_info_s *dbs_info;
284 dbs_info = &per_cpu(od_cpu_dbs_info, j);
285 dbs_info->prev_cpu_idle = get_cpu_idle_time_us(j,
286 &dbs_info->prev_cpu_wall);
287 if (dbs_tuners_ins.ignore_nice)
288 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
291 mutex_unlock(&dbs_mutex);
296 define_one_global_rw(sampling_rate);
297 define_one_global_rw(io_is_busy);
298 define_one_global_rw(up_threshold);
299 define_one_global_rw(ignore_nice_load);
301 static struct attribute *dbs_attributes[] = {
302 &sampling_rate_max.attr,
303 &sampling_rate_min.attr,
306 &ignore_nice_load.attr,
311 static struct attribute_group dbs_attr_group = {
312 .attrs = dbs_attributes,
316 /*** delete after deprecation time ***/
318 #define write_one_old(file_name) \
319 static ssize_t store_##file_name##_old \
320 (struct cpufreq_policy *unused, const char *buf, size_t count) \
322 printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \
323 "interface is deprecated - " #file_name "\n"); \
324 return store_##file_name(NULL, NULL, buf, count); \
327 static void cpufreq_adaptive_timer(unsigned long data)
329 cputime64_t cur_idle;
330 cputime64_t cur_wall;
331 unsigned int delta_idle;
332 unsigned int delta_time;
334 unsigned int new_freq;
336 struct cpu_dbs_info_s *this_dbs_info;
337 struct cpufreq_policy *policy;
340 unsigned int max_load = 0;
342 this_dbs_info = &per_cpu(od_cpu_dbs_info, 0);
344 policy = this_dbs_info->cur_policy;
346 for_each_online_cpu(j) {
347 cur_idle = get_cpu_idle_time_us(j, &cur_wall);
349 delta_idle = (unsigned int) cputime64_sub(cur_idle,
350 per_cpu(idle_in_idle, j));
351 delta_time = (unsigned int) cputime64_sub(cur_wall,
352 per_cpu(idle_exit_wall, j));
355 * If timer ran less than 1ms after short-term sample started, retry.
357 if (delta_time < 1000)
360 if (delta_idle > delta_time)
363 short_load = 100 * (delta_time - delta_idle) / delta_time;
365 if (short_load > max_load)
366 max_load = short_load;
369 if (this_dbs_info->ondemand)
372 if (max_load >= go_maxspeed_load)
373 new_freq = policy->max;
375 new_freq = policy->max * max_load / 100;
377 if ((max_load <= keep_minspeed_load) &&
378 (policy->cur == policy->min))
379 new_freq = policy->cur;
381 if (cpufreq_frequency_table_target(policy, this_dbs_info->freq_table,
382 new_freq, CPUFREQ_RELATION_L,
387 new_freq = this_dbs_info->freq_table[index].frequency;
389 target_freq = new_freq;
391 if (new_freq < this_dbs_info->cur_policy->cur) {
392 spin_lock_irqsave(&down_cpumask_lock, flags);
393 cpumask_set_cpu(0, &down_cpumask);
394 spin_unlock_irqrestore(&down_cpumask_lock, flags);
395 queue_work(down_wq, &freq_scale_down_work);
397 spin_lock_irqsave(&up_cpumask_lock, flags);
398 cpumask_set_cpu(0, &up_cpumask);
399 spin_unlock_irqrestore(&up_cpumask_lock, flags);
400 wake_up_process(up_task);
406 for_each_online_cpu(j) {
407 per_cpu(idle_in_idle, j) =
408 get_cpu_idle_time_us(j,
409 &per_cpu(idle_exit_wall, j));
411 mod_timer(&cpu_timer, jiffies + 2);
412 schedule_delayed_work_on(0, &this_dbs_info->work, 10);
414 if (mutex_is_locked(&short_timer_mutex))
415 mutex_unlock(&short_timer_mutex);
419 /*** delete after deprecation time ***/
421 /************************** sysfs end ************************/
423 static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
425 #ifndef CONFIG_ARCH_EXYNOS4
426 if (p->cur == p->max)
429 __cpufreq_driver_target(p, freq, CPUFREQ_RELATION_H);
432 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
434 unsigned int max_load_freq;
436 struct cpufreq_policy *policy;
439 unsigned int index, new_freq;
440 unsigned int longterm_load = 0;
442 policy = this_dbs_info->cur_policy;
445 * Every sampling_rate, we check, if current idle time is less
446 * than 20% (default), then we try to increase frequency
447 * Every sampling_rate, we look for a the lowest
448 * frequency which can sustain the load while keeping idle time over
449 * 30%. If such a frequency exist, we try to decrease to this frequency.
451 * Any frequency increase takes it to the maximum frequency.
452 * Frequency reduction happens at minimum steps of
453 * 5% (default) of current frequency
456 /* Get Absolute Load - in terms of freq */
459 for_each_cpu(j, policy->cpus) {
460 struct cpu_dbs_info_s *j_dbs_info;
461 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
462 unsigned int idle_time, wall_time, iowait_time;
463 unsigned int load, load_freq;
466 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
468 cur_idle_time = get_cpu_idle_time_us(j, &cur_wall_time);
469 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
471 wall_time = (unsigned int) cputime64_sub(cur_wall_time,
472 j_dbs_info->prev_cpu_wall);
473 j_dbs_info->prev_cpu_wall = cur_wall_time;
475 idle_time = (unsigned int) cputime64_sub(cur_idle_time,
476 j_dbs_info->prev_cpu_idle);
477 j_dbs_info->prev_cpu_idle = cur_idle_time;
479 iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
480 j_dbs_info->prev_cpu_iowait);
481 j_dbs_info->prev_cpu_iowait = cur_iowait_time;
483 if (dbs_tuners_ins.ignore_nice) {
484 cputime64_t cur_nice;
485 unsigned long cur_nice_jiffies;
487 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
488 j_dbs_info->prev_cpu_nice);
490 * Assumption: nice time between sampling periods will
491 * be less than 2^32 jiffies for 32 bit sys
493 cur_nice_jiffies = (unsigned long)
494 cputime64_to_jiffies64(cur_nice);
496 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
497 idle_time += jiffies_to_usecs(cur_nice_jiffies);
501 * For the purpose of adaptive, waiting for disk IO is an
502 * indication that you're performance critical, and not that
503 * the system is actually idle. So subtract the iowait time
504 * from the cpu idle time.
507 if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
508 idle_time -= iowait_time;
510 if (unlikely(!wall_time || wall_time < idle_time))
513 load = 100 * (wall_time - idle_time) / wall_time;
515 if (load > longterm_load)
516 longterm_load = load;
518 freq_avg = __cpufreq_driver_getavg(policy, j);
520 freq_avg = policy->cur;
522 load_freq = load * freq_avg;
524 if (load_freq > max_load_freq)
525 max_load_freq = load_freq;
528 if (longterm_load >= MIN_ONDEMAND_THRESHOLD)
529 this_dbs_info->ondemand = true;
531 this_dbs_info->ondemand = false;
533 /* Check for frequency increase */
534 if (max_load_freq > (dbs_tuners_ins.up_threshold * policy->cur)) {
535 cpufreq_frequency_table_target(policy,
536 this_dbs_info->freq_table,
537 (policy->cur + step_up_load),
538 CPUFREQ_RELATION_L, &index);
540 new_freq = this_dbs_info->freq_table[index].frequency;
541 dbs_freq_increase(policy, new_freq);
545 /* Check for frequency decrease */
546 /* if we cannot reduce the frequency anymore, break out early */
547 #ifndef CONFIG_ARCH_EXYNOS4
548 if (policy->cur == policy->min)
552 * The optimal frequency is the frequency that is the lowest that
553 * can support the current CPU usage without triggering the up
554 * policy. To be safe, we focus 10 points under the threshold.
557 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
559 unsigned int freq_next;
560 freq_next = max_load_freq /
561 (dbs_tuners_ins.up_threshold -
562 dbs_tuners_ins.down_differential);
564 if (freq_next < policy->min)
565 freq_next = policy->min;
567 __cpufreq_driver_target(policy, freq_next,
572 static void do_dbs_timer(struct work_struct *work)
574 struct cpu_dbs_info_s *dbs_info =
575 container_of(work, struct cpu_dbs_info_s, work.work);
576 unsigned int cpu = dbs_info->cpu;
580 mutex_lock(&dbs_info->timer_mutex);
582 /* Common NORMAL_SAMPLE setup */
583 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
584 dbs_check_cpu(dbs_info);
586 /* We want all CPUs to do sampling nearly on
589 delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
591 schedule_delayed_work_on(cpu, &dbs_info->work, delay);
593 mutex_unlock(&dbs_info->timer_mutex);
596 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
598 /* We want all CPUs to do sampling nearly on same jiffy */
599 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
601 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
602 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
603 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
606 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
608 cancel_delayed_work_sync(&dbs_info->work);
612 * Not all CPUs want IO time to be accounted as busy; this dependson how
613 * efficient idling at a higher frequency/voltage is.
614 * Pavel Machek says this is not so for various generations of AMD and old
616 * Mike Chan (androidlcom) calis this is also not true for ARM.
617 * Because of this, whitelist specific known (series) of CPUs by default, and
618 * leave all others up to the user.
620 static int should_io_be_busy(void)
622 #if defined(CONFIG_X86)
624 * For Intel, Core 2 (model 15) andl later have an efficient idle.
626 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
627 boot_cpu_data.x86 == 6 &&
628 boot_cpu_data.x86_model >= 15)
634 static void cpufreq_adaptive_idle(void)
637 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 0);
638 struct cpufreq_policy *policy;
640 policy = dbs_info->cur_policy;
644 if ((policy->cur == policy->min) ||
645 (policy->cur == policy->max)) {
647 if (timer_pending(&cpu_timer))
650 if (mutex_trylock(&short_timer_mutex)) {
651 for_each_online_cpu(i) {
652 per_cpu(idle_in_idle, i) =
653 get_cpu_idle_time_us(i,
654 &per_cpu(idle_exit_wall, i));
657 mod_timer(&cpu_timer, jiffies + 2);
658 cancel_delayed_work(&dbs_info->work);
661 if (timer_pending(&cpu_timer))
662 del_timer(&cpu_timer);
667 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
670 unsigned int cpu = policy->cpu;
671 struct cpu_dbs_info_s *this_dbs_info;
675 this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
678 case CPUFREQ_GOV_START:
679 if ((!cpu_online(cpu)) || (!policy->cur))
682 mutex_lock(&dbs_mutex);
684 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
686 mutex_unlock(&dbs_mutex);
691 for_each_cpu(j, policy->cpus) {
692 struct cpu_dbs_info_s *j_dbs_info;
693 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
694 j_dbs_info->cur_policy = policy;
696 j_dbs_info->prev_cpu_idle = get_cpu_idle_time_us(j,
697 &j_dbs_info->prev_cpu_wall);
698 if (dbs_tuners_ins.ignore_nice) {
699 j_dbs_info->prev_cpu_nice =
700 kstat_cpu(j).cpustat.nice;
703 this_dbs_info->cpu = cpu;
704 adaptive_init_cpu(cpu);
707 * Start the timerschedule work, when this governor
708 * is used for first time
710 if (dbs_enable == 1) {
711 unsigned int latency;
713 rc = sysfs_create_group(cpufreq_global_kobject,
716 mutex_unlock(&dbs_mutex);
720 /* policy latency is in nS. Convert it to uS first */
721 latency = policy->cpuinfo.transition_latency / 1000;
724 /* Bring kernel and HW constraints together */
725 min_sampling_rate = max(min_sampling_rate,
726 MIN_LATENCY_MULTIPLIER * latency);
727 dbs_tuners_ins.sampling_rate =
728 max(min_sampling_rate,
729 latency * LATENCY_MULTIPLIER);
730 dbs_tuners_ins.io_is_busy = should_io_be_busy();
732 mutex_unlock(&dbs_mutex);
734 mutex_init(&this_dbs_info->timer_mutex);
735 dbs_timer_init(this_dbs_info);
737 pm_idle_old = pm_idle;
738 pm_idle = cpufreq_adaptive_idle;
741 case CPUFREQ_GOV_STOP:
742 dbs_timer_exit(this_dbs_info);
744 mutex_lock(&dbs_mutex);
745 sysfs_remove_group(&policy->kobj, &dbs_attr_group);
746 mutex_destroy(&this_dbs_info->timer_mutex);
748 mutex_unlock(&dbs_mutex);
750 sysfs_remove_group(cpufreq_global_kobject,
753 pm_idle = pm_idle_old;
756 case CPUFREQ_GOV_LIMITS:
757 mutex_lock(&this_dbs_info->timer_mutex);
758 if (policy->max < this_dbs_info->cur_policy->cur)
759 __cpufreq_driver_target(this_dbs_info->cur_policy,
760 policy->max, CPUFREQ_RELATION_H);
761 else if (policy->min > this_dbs_info->cur_policy->cur)
762 __cpufreq_driver_target(this_dbs_info->cur_policy,
763 policy->min, CPUFREQ_RELATION_L);
764 mutex_unlock(&this_dbs_info->timer_mutex);
770 static inline void cpufreq_adaptive_update_time(void)
772 struct cpu_dbs_info_s *this_dbs_info;
773 struct cpufreq_policy *policy;
776 this_dbs_info = &per_cpu(od_cpu_dbs_info, 0);
777 policy = this_dbs_info->cur_policy;
779 for_each_cpu(j, policy->cpus) {
780 struct cpu_dbs_info_s *j_dbs_info;
781 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
783 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
785 cur_idle_time = get_cpu_idle_time_us(j, &cur_wall_time);
786 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
788 j_dbs_info->prev_cpu_wall = cur_wall_time;
790 j_dbs_info->prev_cpu_idle = cur_idle_time;
792 j_dbs_info->prev_cpu_iowait = cur_iowait_time;
794 if (dbs_tuners_ins.ignore_nice)
795 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
801 static int cpufreq_adaptive_up_task(void *data)
804 struct cpu_dbs_info_s *this_dbs_info;
805 struct cpufreq_policy *policy;
806 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
808 this_dbs_info = &per_cpu(od_cpu_dbs_info, 0);
809 policy = this_dbs_info->cur_policy;
812 set_current_state(TASK_INTERRUPTIBLE);
813 spin_lock_irqsave(&up_cpumask_lock, flags);
815 if (cpumask_empty(&up_cpumask)) {
816 spin_unlock_irqrestore(&up_cpumask_lock, flags);
819 if (kthread_should_stop())
822 spin_lock_irqsave(&up_cpumask_lock, flags);
825 set_current_state(TASK_RUNNING);
827 cpumask_clear(&up_cpumask);
828 spin_unlock_irqrestore(&up_cpumask_lock, flags);
830 __cpufreq_driver_target(this_dbs_info->cur_policy,
833 if (policy->cur != policy->max) {
834 mutex_lock(&this_dbs_info->timer_mutex);
836 schedule_delayed_work_on(0, &this_dbs_info->work, delay);
837 mutex_unlock(&this_dbs_info->timer_mutex);
838 cpufreq_adaptive_update_time();
840 if (mutex_is_locked(&short_timer_mutex))
841 mutex_unlock(&short_timer_mutex);
847 static void cpufreq_adaptive_freq_down(struct work_struct *work)
850 struct cpu_dbs_info_s *this_dbs_info;
851 struct cpufreq_policy *policy;
852 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
854 spin_lock_irqsave(&down_cpumask_lock, flags);
855 cpumask_clear(&down_cpumask);
856 spin_unlock_irqrestore(&down_cpumask_lock, flags);
858 this_dbs_info = &per_cpu(od_cpu_dbs_info, 0);
859 policy = this_dbs_info->cur_policy;
861 __cpufreq_driver_target(this_dbs_info->cur_policy,
865 if (policy->cur != policy->min) {
866 mutex_lock(&this_dbs_info->timer_mutex);
868 schedule_delayed_work_on(0, &this_dbs_info->work, delay);
869 mutex_unlock(&this_dbs_info->timer_mutex);
870 cpufreq_adaptive_update_time();
873 if (mutex_is_locked(&short_timer_mutex))
874 mutex_unlock(&short_timer_mutex);
877 static int __init cpufreq_gov_dbs_init(void)
883 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
884 go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD;
885 keep_minspeed_load = DEFAULT_KEEP_MINSPEED_LOAD;
886 step_up_load = DEFAULT_STEPUP_LOAD;
888 idle_time = get_cpu_idle_time_us(cpu, &wall);
890 if (idle_time != -1ULL) {
891 /* Idle micro accounting is supported. Use finer thresholds */
892 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
893 dbs_tuners_ins.down_differential =
894 MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
896 * In no_hz/micro accounting case we set the minimum frequency
897 * not depending on HZ, but fixed (very low). The deferred
898 * timer might skip some samples if idle/sleeping as needed.
900 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
902 /* For correct statistics, we need 10 ticks for each measure */
904 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
907 init_timer(&cpu_timer);
908 cpu_timer.function = cpufreq_adaptive_timer;
910 up_task = kthread_create(cpufreq_adaptive_up_task, NULL,
914 return PTR_ERR(up_task);
916 sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m);
917 get_task_struct(up_task);
919 /* No rescuer thread, bind to CPU queuing the work for possibly
920 warm cache (probably doesn't matter much). */
921 down_wq = alloc_workqueue("kadaptive_down", 0, 1);
926 INIT_WORK(&freq_scale_down_work, cpufreq_adaptive_freq_down);
929 return cpufreq_register_governor(&cpufreq_gov_adaptive);
931 put_task_struct(up_task);
935 static void __exit cpufreq_gov_dbs_exit(void)
937 cpufreq_unregister_governor(&cpufreq_gov_adaptive);
941 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
942 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
943 MODULE_DESCRIPTION("'cpufreq_adaptive' - A dynamic cpufreq governor for "
944 "Low Latency Frequency Transition capable processors");
945 MODULE_LICENSE("GPL");
947 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ADAPTIVE
948 fs_initcall(cpufreq_gov_dbs_init);
950 module_init(cpufreq_gov_dbs_init);
952 module_exit(cpufreq_gov_dbs_exit);