1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_rps.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/librapl.h"
20 /* Try to isolate the impact of cstates from determing frequency response */
21 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
23 static void engine_heartbeat_disable(struct intel_engine_cs *engine)
25 engine->props.heartbeat_interval_ms = 0;
27 intel_engine_pm_get(engine);
28 intel_engine_park_heartbeat(engine);
31 static void engine_heartbeat_enable(struct intel_engine_cs *engine)
33 intel_engine_pm_put(engine);
35 engine->props.heartbeat_interval_ms =
36 engine->defaults.heartbeat_interval_ms;
39 static void dummy_rps_work(struct work_struct *wrk)
43 static int cmp_u64(const void *A, const void *B)
45 const u64 *a = A, *b = B;
55 static int cmp_u32(const void *A, const void *B)
57 const u32 *a = A, *b = B;
67 static struct i915_vma *
68 create_spin_counter(struct intel_engine_cs *engine,
69 struct i915_address_space *vm,
79 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
80 struct drm_i915_gem_object *obj;
87 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
91 end = obj->base.size / sizeof(u32) - 1;
93 vma = i915_vma_instance(obj, vm, NULL);
95 i915_gem_object_put(obj);
99 err = i915_vma_pin(vma, 0, 0, PIN_USER);
105 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
107 i915_gem_object_put(obj);
108 return ERR_CAST(base);
112 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
113 for (i = 0; i < __NGPR__; i++) {
114 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
116 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
120 *cs++ = MI_LOAD_REGISTER_IMM(1);
121 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
126 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
127 for (i = 0; i < 1024; i++) {
129 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
130 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
132 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
135 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
136 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
137 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
138 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
142 *cs++ = MI_BATCH_BUFFER_START_GEN8;
143 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
144 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
145 GEM_BUG_ON(cs - base > end);
147 i915_gem_object_flush_map(obj);
149 *cancel = base + loop;
150 *counter = srm ? memset32(base + end, 0, 1) : NULL;
154 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
161 memset(history, freq, sizeof(history));
164 /* The PCU does not change instantly, but drifts towards the goal? */
165 end = jiffies + msecs_to_jiffies(timeout_ms);
169 act = read_cagf(rps);
170 if (time_after(jiffies, end))
173 /* Target acquired */
177 /* Any change within the last N samples? */
178 if (!memchr_inv(history, act, sizeof(history)))
182 i = (i + 1) % ARRAY_SIZE(history);
184 usleep_range(sleep, 2 * sleep);
186 if (sleep > timeout_ms * 20)
187 sleep = timeout_ms * 20;
191 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
193 mutex_lock(&rps->lock);
194 GEM_BUG_ON(!intel_rps_is_active(rps));
195 intel_rps_set(rps, freq);
196 GEM_BUG_ON(rps->last_freq != freq);
197 mutex_unlock(&rps->lock);
199 return wait_for_freq(rps, freq, 50);
202 static void show_pstate_limits(struct intel_rps *rps)
204 struct drm_i915_private *i915 = rps_to_i915(rps);
206 if (IS_BROXTON(i915)) {
207 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209 intel_uncore_read(rps_to_uncore(rps),
211 } else if (IS_GEN(i915, 9)) {
212 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214 intel_uncore_read(rps_to_uncore(rps),
215 GEN9_RP_STATE_LIMITS));
219 int live_rps_clock_interval(void *arg)
221 struct intel_gt *gt = arg;
222 struct intel_rps *rps = >->rps;
223 void (*saved_work)(struct work_struct *wrk);
224 struct intel_engine_cs *engine;
225 enum intel_engine_id id;
226 struct igt_spinner spin;
229 if (!intel_rps_is_enabled(rps))
232 if (igt_spinner_init(&spin, gt))
235 intel_gt_pm_wait_for_idle(gt);
236 saved_work = rps->work.func;
237 rps->work.func = dummy_rps_work;
240 intel_rps_disable(>->rps);
242 intel_gt_check_clock_frequency(gt);
244 for_each_engine(engine, gt, id) {
245 struct i915_request *rq;
249 if (!intel_engine_can_store_dword(engine))
252 engine_heartbeat_disable(engine);
254 rq = igt_spinner_create_request(&spin,
255 engine->kernel_context,
258 engine_heartbeat_enable(engine);
263 i915_request_add(rq);
265 if (!igt_wait_for_spinner(&spin, rq)) {
266 pr_err("%s: RPS spinner did not start\n",
268 igt_spinner_end(&spin);
269 engine_heartbeat_enable(engine);
270 intel_gt_set_wedged(engine->gt);
275 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
277 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
279 /* Set the evaluation interval to infinity! */
280 intel_uncore_write_fw(gt->uncore,
281 GEN6_RP_UP_EI, 0xffffffff);
282 intel_uncore_write_fw(gt->uncore,
283 GEN6_RP_UP_THRESHOLD, 0xffffffff);
285 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
286 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
288 if (wait_for(intel_uncore_read_fw(gt->uncore,
291 /* Just skip the test; assume lack of HW support */
292 pr_notice("%s: rps evaluation interval not ticking\n",
300 for (i = 0; i < 5; i++) {
303 dt_[i] = ktime_get();
304 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
308 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
309 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
314 /* Use the median of both cycle/dt; close enough */
315 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
316 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
317 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
318 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
321 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
322 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
324 igt_spinner_end(&spin);
325 engine_heartbeat_enable(engine);
328 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
330 intel_gt_ns_to_pm_interval(gt, dt);
332 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
333 engine->name, cycles, time, dt, expected,
334 gt->clock_frequency / 1000);
336 if (10 * time < 8 * dt ||
337 8 * time > 10 * dt) {
338 pr_err("%s: rps clock time does not match walltime!\n",
343 if (10 * expected < 8 * cycles ||
344 8 * expected > 10 * cycles) {
345 pr_err("%s: walltime does not match rps clock ticks!\n",
351 if (igt_flush_test(gt->i915))
354 break; /* once is enough */
357 intel_rps_enable(>->rps);
360 igt_spinner_fini(&spin);
362 intel_gt_pm_wait_for_idle(gt);
363 rps->work.func = saved_work;
365 if (err == -ENODEV) /* skipped, don't report a fail */
371 int live_rps_control(void *arg)
373 struct intel_gt *gt = arg;
374 struct intel_rps *rps = >->rps;
375 void (*saved_work)(struct work_struct *wrk);
376 struct intel_engine_cs *engine;
377 enum intel_engine_id id;
378 struct igt_spinner spin;
382 * Check that the actual frequency matches our requested frequency,
383 * to verify our control mechanism. We have to be careful that the
384 * PCU may throttle the GPU in which case the actual frequency used
385 * will be lowered than requested.
388 if (!intel_rps_is_enabled(rps))
391 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
394 if (igt_spinner_init(&spin, gt))
397 intel_gt_pm_wait_for_idle(gt);
398 saved_work = rps->work.func;
399 rps->work.func = dummy_rps_work;
402 for_each_engine(engine, gt, id) {
403 struct i915_request *rq;
404 ktime_t min_dt, max_dt;
408 if (!intel_engine_can_store_dword(engine))
411 engine_heartbeat_disable(engine);
413 rq = igt_spinner_create_request(&spin,
414 engine->kernel_context,
421 i915_request_add(rq);
423 if (!igt_wait_for_spinner(&spin, rq)) {
424 pr_err("%s: RPS spinner did not start\n",
426 igt_spinner_end(&spin);
427 engine_heartbeat_enable(engine);
428 intel_gt_set_wedged(engine->gt);
433 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
434 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
435 engine->name, rps->min_freq, read_cagf(rps));
436 igt_spinner_end(&spin);
437 engine_heartbeat_enable(engine);
438 show_pstate_limits(rps);
443 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
444 if (rps_set_check(rps, f) < f)
448 limit = rps_set_check(rps, f);
450 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
451 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
452 engine->name, rps->min_freq, read_cagf(rps));
453 igt_spinner_end(&spin);
454 engine_heartbeat_enable(engine);
455 show_pstate_limits(rps);
460 max_dt = ktime_get();
461 max = rps_set_check(rps, limit);
462 max_dt = ktime_sub(ktime_get(), max_dt);
464 min_dt = ktime_get();
465 min = rps_set_check(rps, rps->min_freq);
466 min_dt = ktime_sub(ktime_get(), min_dt);
468 igt_spinner_end(&spin);
469 engine_heartbeat_enable(engine);
471 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
473 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
474 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
475 limit, intel_gpu_freq(rps, limit),
476 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
478 if (limit == rps->min_freq) {
479 pr_err("%s: GPU throttled to minimum!\n",
481 show_pstate_limits(rps);
486 if (igt_flush_test(gt->i915)) {
493 igt_spinner_fini(&spin);
495 intel_gt_pm_wait_for_idle(gt);
496 rps->work.func = saved_work;
501 static void show_pcu_config(struct intel_rps *rps)
503 struct drm_i915_private *i915 = rps_to_i915(rps);
504 unsigned int max_gpu_freq, min_gpu_freq;
505 intel_wakeref_t wakeref;
511 min_gpu_freq = rps->min_freq;
512 max_gpu_freq = rps->max_freq;
513 if (INTEL_GEN(i915) >= 9) {
514 /* Convert GT frequency to 50 HZ units */
515 min_gpu_freq /= GEN9_FREQ_SCALER;
516 max_gpu_freq /= GEN9_FREQ_SCALER;
519 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
521 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
522 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
523 int ia_freq = gpu_freq;
525 sandybridge_pcode_read(i915,
526 GEN6_PCODE_READ_MIN_FREQ_TABLE,
529 pr_info("%5d %5d %5d\n",
531 ((ia_freq >> 0) & 0xff) * 100,
532 ((ia_freq >> 8) & 0xff) * 100);
535 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
538 static u64 __measure_frequency(u32 *cntr, int duration_ms)
543 dc = READ_ONCE(*cntr);
544 usleep_range(1000 * duration_ms, 2000 * duration_ms);
545 dc = READ_ONCE(*cntr) - dc;
546 dt = ktime_get() - dt;
548 return div64_u64(1000 * 1000 * dc, dt);
551 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
556 *freq = rps_set_check(rps, *freq);
557 for (i = 0; i < 5; i++)
558 x[i] = __measure_frequency(cntr, 2);
559 *freq = (*freq + read_cagf(rps)) / 2;
561 /* A simple triangle filter for better result stability */
562 sort(x, 5, sizeof(*x), cmp_u64, NULL);
563 return div_u64(x[1] + 2 * x[2] + x[3], 4);
566 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
572 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
573 usleep_range(1000 * duration_ms, 2000 * duration_ms);
574 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
575 dt = ktime_get() - dt;
577 return div64_u64(1000 * 1000 * dc, dt);
580 static u64 measure_cs_frequency_at(struct intel_rps *rps,
581 struct intel_engine_cs *engine,
587 *freq = rps_set_check(rps, *freq);
588 for (i = 0; i < 5; i++)
589 x[i] = __measure_cs_frequency(engine, 2);
590 *freq = (*freq + read_cagf(rps)) / 2;
592 /* A simple triangle filter for better result stability */
593 sort(x, 5, sizeof(*x), cmp_u64, NULL);
594 return div_u64(x[1] + 2 * x[2] + x[3], 4);
597 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
599 return f_d * x > f_n * y && f_n * x < f_d * y;
602 int live_rps_frequency_cs(void *arg)
604 void (*saved_work)(struct work_struct *wrk);
605 struct intel_gt *gt = arg;
606 struct intel_rps *rps = >->rps;
607 struct intel_engine_cs *engine;
608 struct pm_qos_request qos;
609 enum intel_engine_id id;
613 * The premise is that the GPU does change freqency at our behest.
614 * Let's check there is a correspondence between the requested
615 * frequency, the actual frequency, and the observed clock rate.
618 if (!intel_rps_is_enabled(rps))
621 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
624 if (CPU_LATENCY >= 0)
625 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
627 intel_gt_pm_wait_for_idle(gt);
628 saved_work = rps->work.func;
629 rps->work.func = dummy_rps_work;
631 for_each_engine(engine, gt, id) {
632 struct i915_request *rq;
633 struct i915_vma *vma;
640 engine_heartbeat_disable(engine);
642 vma = create_spin_counter(engine,
643 engine->kernel_context->vm, false,
647 engine_heartbeat_enable(engine);
651 rq = intel_engine_create_kernel_request(engine);
658 err = i915_request_await_object(rq, vma->obj, false);
660 err = i915_vma_move_to_active(vma, rq, 0);
662 err = rq->engine->emit_bb_start(rq,
665 i915_vma_unlock(vma);
666 i915_request_add(rq);
670 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
672 pr_err("%s: timed loop did not start\n",
677 min.freq = rps->min_freq;
678 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
680 max.freq = rps->max_freq;
681 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
683 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
685 min.count, intel_gpu_freq(rps, min.freq),
686 max.count, intel_gpu_freq(rps, max.freq),
687 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
688 max.freq * min.count));
690 if (!scaled_within(max.freq * min.count,
691 min.freq * max.count,
695 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
697 max.freq * min.count,
698 min.freq * max.count);
699 show_pcu_config(rps);
701 for (f = min.freq + 1; f <= rps->max_freq; f++) {
705 count = measure_cs_frequency_at(rps, engine, &act);
709 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
711 act, intel_gpu_freq(rps, act), count,
712 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
715 f = act; /* may skip ahead [pcu granularity] */
722 *cancel = MI_BATCH_BUFFER_END;
723 i915_gem_object_flush_map(vma->obj);
724 i915_gem_object_unpin_map(vma->obj);
728 engine_heartbeat_enable(engine);
729 if (igt_flush_test(gt->i915))
735 intel_gt_pm_wait_for_idle(gt);
736 rps->work.func = saved_work;
738 if (CPU_LATENCY >= 0)
739 cpu_latency_qos_remove_request(&qos);
744 int live_rps_frequency_srm(void *arg)
746 void (*saved_work)(struct work_struct *wrk);
747 struct intel_gt *gt = arg;
748 struct intel_rps *rps = >->rps;
749 struct intel_engine_cs *engine;
750 struct pm_qos_request qos;
751 enum intel_engine_id id;
755 * The premise is that the GPU does change freqency at our behest.
756 * Let's check there is a correspondence between the requested
757 * frequency, the actual frequency, and the observed clock rate.
760 if (!intel_rps_is_enabled(rps))
763 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
766 if (CPU_LATENCY >= 0)
767 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
769 intel_gt_pm_wait_for_idle(gt);
770 saved_work = rps->work.func;
771 rps->work.func = dummy_rps_work;
773 for_each_engine(engine, gt, id) {
774 struct i915_request *rq;
775 struct i915_vma *vma;
782 engine_heartbeat_disable(engine);
784 vma = create_spin_counter(engine,
785 engine->kernel_context->vm, true,
789 engine_heartbeat_enable(engine);
793 rq = intel_engine_create_kernel_request(engine);
800 err = i915_request_await_object(rq, vma->obj, false);
802 err = i915_vma_move_to_active(vma, rq, 0);
804 err = rq->engine->emit_bb_start(rq,
807 i915_vma_unlock(vma);
808 i915_request_add(rq);
812 if (wait_for(READ_ONCE(*cntr), 10)) {
813 pr_err("%s: timed loop did not start\n",
818 min.freq = rps->min_freq;
819 min.count = measure_frequency_at(rps, cntr, &min.freq);
821 max.freq = rps->max_freq;
822 max.count = measure_frequency_at(rps, cntr, &max.freq);
824 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
826 min.count, intel_gpu_freq(rps, min.freq),
827 max.count, intel_gpu_freq(rps, max.freq),
828 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
829 max.freq * min.count));
831 if (!scaled_within(max.freq * min.count,
832 min.freq * max.count,
836 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
838 max.freq * min.count,
839 min.freq * max.count);
840 show_pcu_config(rps);
842 for (f = min.freq + 1; f <= rps->max_freq; f++) {
846 count = measure_frequency_at(rps, cntr, &act);
850 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
852 act, intel_gpu_freq(rps, act), count,
853 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
856 f = act; /* may skip ahead [pcu granularity] */
863 *cancel = MI_BATCH_BUFFER_END;
864 i915_gem_object_flush_map(vma->obj);
865 i915_gem_object_unpin_map(vma->obj);
869 engine_heartbeat_enable(engine);
870 if (igt_flush_test(gt->i915))
876 intel_gt_pm_wait_for_idle(gt);
877 rps->work.func = saved_work;
879 if (CPU_LATENCY >= 0)
880 cpu_latency_qos_remove_request(&qos);
885 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
887 /* Flush any previous EI */
888 usleep_range(timeout_us, 2 * timeout_us);
890 /* Reset the interrupt status */
891 rps_disable_interrupts(rps);
892 GEM_BUG_ON(rps->pm_iir);
893 rps_enable_interrupts(rps);
895 /* And then wait for the timeout, for real this time */
896 usleep_range(2 * timeout_us, 3 * timeout_us);
899 static int __rps_up_interrupt(struct intel_rps *rps,
900 struct intel_engine_cs *engine,
901 struct igt_spinner *spin)
903 struct intel_uncore *uncore = engine->uncore;
904 struct i915_request *rq;
907 if (!intel_engine_can_store_dword(engine))
910 rps_set_check(rps, rps->min_freq);
912 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
916 i915_request_get(rq);
917 i915_request_add(rq);
919 if (!igt_wait_for_spinner(spin, rq)) {
920 pr_err("%s: RPS spinner did not start\n",
922 i915_request_put(rq);
923 intel_gt_set_wedged(engine->gt);
927 if (!intel_rps_is_active(rps)) {
928 pr_err("%s: RPS not enabled on starting spinner\n",
930 igt_spinner_end(spin);
931 i915_request_put(rq);
935 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
936 pr_err("%s: RPS did not register UP interrupt\n",
938 i915_request_put(rq);
942 if (rps->last_freq != rps->min_freq) {
943 pr_err("%s: RPS did not program min frequency\n",
945 i915_request_put(rq);
949 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
950 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
951 timeout = DIV_ROUND_UP(timeout, 1000);
953 sleep_for_ei(rps, timeout);
954 GEM_BUG_ON(i915_request_completed(rq));
956 igt_spinner_end(spin);
957 i915_request_put(rq);
959 if (rps->cur_freq != rps->min_freq) {
960 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
961 engine->name, intel_rps_read_actual_frequency(rps));
965 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
966 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
967 engine->name, rps->pm_iir,
968 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
969 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
970 intel_uncore_read(uncore, GEN6_RP_UP_EI));
977 static int __rps_down_interrupt(struct intel_rps *rps,
978 struct intel_engine_cs *engine)
980 struct intel_uncore *uncore = engine->uncore;
983 rps_set_check(rps, rps->max_freq);
985 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
986 pr_err("%s: RPS did not register DOWN interrupt\n",
991 if (rps->last_freq != rps->max_freq) {
992 pr_err("%s: RPS did not program max frequency\n",
997 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
998 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
999 timeout = DIV_ROUND_UP(timeout, 1000);
1001 sleep_for_ei(rps, timeout);
1003 if (rps->cur_freq != rps->max_freq) {
1004 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1006 intel_rps_read_actual_frequency(rps));
1010 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1011 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1012 engine->name, rps->pm_iir,
1013 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1014 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1015 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1016 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1017 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1018 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1025 int live_rps_interrupt(void *arg)
1027 struct intel_gt *gt = arg;
1028 struct intel_rps *rps = >->rps;
1029 void (*saved_work)(struct work_struct *wrk);
1030 struct intel_engine_cs *engine;
1031 enum intel_engine_id id;
1032 struct igt_spinner spin;
1037 * First, let's check whether or not we are receiving interrupts.
1040 if (!intel_rps_has_interrupts(rps))
1043 intel_gt_pm_get(gt);
1044 pm_events = rps->pm_events;
1045 intel_gt_pm_put(gt);
1047 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1051 if (igt_spinner_init(&spin, gt))
1054 intel_gt_pm_wait_for_idle(gt);
1055 saved_work = rps->work.func;
1056 rps->work.func = dummy_rps_work;
1058 for_each_engine(engine, gt, id) {
1059 /* Keep the engine busy with a spinner; expect an UP! */
1060 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1061 intel_gt_pm_wait_for_idle(engine->gt);
1062 GEM_BUG_ON(intel_rps_is_active(rps));
1064 engine_heartbeat_disable(engine);
1066 err = __rps_up_interrupt(rps, engine, &spin);
1068 engine_heartbeat_enable(engine);
1072 intel_gt_pm_wait_for_idle(engine->gt);
1075 /* Keep the engine awake but idle and check for DOWN */
1076 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1077 engine_heartbeat_disable(engine);
1078 intel_rc6_disable(>->rc6);
1080 err = __rps_down_interrupt(rps, engine);
1082 intel_rc6_enable(>->rc6);
1083 engine_heartbeat_enable(engine);
1090 if (igt_flush_test(gt->i915))
1093 igt_spinner_fini(&spin);
1095 intel_gt_pm_wait_for_idle(gt);
1096 rps->work.func = saved_work;
1101 static u64 __measure_power(int duration_ms)
1106 dE = librapl_energy_uJ();
1107 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1108 dE = librapl_energy_uJ() - dE;
1109 dt = ktime_get() - dt;
1111 return div64_u64(1000 * 1000 * dE, dt);
1114 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1119 *freq = rps_set_check(rps, *freq);
1120 for (i = 0; i < 5; i++)
1121 x[i] = __measure_power(5);
1122 *freq = (*freq + read_cagf(rps)) / 2;
1124 /* A simple triangle filter for better result stability */
1125 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1126 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1129 int live_rps_power(void *arg)
1131 struct intel_gt *gt = arg;
1132 struct intel_rps *rps = >->rps;
1133 void (*saved_work)(struct work_struct *wrk);
1134 struct intel_engine_cs *engine;
1135 enum intel_engine_id id;
1136 struct igt_spinner spin;
1140 * Our fundamental assumption is that running at lower frequency
1141 * actually saves power. Let's see if our RAPL measurement support
1145 if (!intel_rps_is_enabled(rps))
1148 if (!librapl_energy_uJ())
1151 if (igt_spinner_init(&spin, gt))
1154 intel_gt_pm_wait_for_idle(gt);
1155 saved_work = rps->work.func;
1156 rps->work.func = dummy_rps_work;
1158 for_each_engine(engine, gt, id) {
1159 struct i915_request *rq;
1165 if (!intel_engine_can_store_dword(engine))
1168 engine_heartbeat_disable(engine);
1170 rq = igt_spinner_create_request(&spin,
1171 engine->kernel_context,
1174 engine_heartbeat_enable(engine);
1179 i915_request_add(rq);
1181 if (!igt_wait_for_spinner(&spin, rq)) {
1182 pr_err("%s: RPS spinner did not start\n",
1184 igt_spinner_end(&spin);
1185 engine_heartbeat_enable(engine);
1186 intel_gt_set_wedged(engine->gt);
1191 max.freq = rps->max_freq;
1192 max.power = measure_power_at(rps, &max.freq);
1194 min.freq = rps->min_freq;
1195 min.power = measure_power_at(rps, &min.freq);
1197 igt_spinner_end(&spin);
1198 engine_heartbeat_enable(engine);
1200 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1202 min.power, intel_gpu_freq(rps, min.freq),
1203 max.power, intel_gpu_freq(rps, max.freq));
1205 if (10 * min.freq >= 9 * max.freq) {
1206 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1207 min.freq, intel_gpu_freq(rps, min.freq),
1208 max.freq, intel_gpu_freq(rps, max.freq));
1212 if (11 * min.power > 10 * max.power) {
1213 pr_err("%s: did not conserve power when setting lower frequency!\n",
1219 if (igt_flush_test(gt->i915)) {
1225 igt_spinner_fini(&spin);
1227 intel_gt_pm_wait_for_idle(gt);
1228 rps->work.func = saved_work;
1233 int live_rps_dynamic(void *arg)
1235 struct intel_gt *gt = arg;
1236 struct intel_rps *rps = >->rps;
1237 struct intel_engine_cs *engine;
1238 enum intel_engine_id id;
1239 struct igt_spinner spin;
1243 * We've looked at the bascs, and have established that we
1244 * can change the clock frequency and that the HW will generate
1245 * interrupts based on load. Now we check how we integrate those
1246 * moving parts into dynamic reclocking based on load.
1249 if (!intel_rps_is_enabled(rps))
1252 if (igt_spinner_init(&spin, gt))
1255 for_each_engine(engine, gt, id) {
1256 struct i915_request *rq;
1262 if (!intel_engine_can_store_dword(engine))
1265 intel_gt_pm_wait_for_idle(gt);
1266 GEM_BUG_ON(intel_rps_is_active(rps));
1267 rps->cur_freq = rps->min_freq;
1269 intel_engine_pm_get(engine);
1270 intel_rc6_disable(>->rc6);
1271 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1273 rq = igt_spinner_create_request(&spin,
1274 engine->kernel_context,
1281 i915_request_add(rq);
1283 max.dt = ktime_get();
1284 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1285 max.dt = ktime_sub(ktime_get(), max.dt);
1287 igt_spinner_end(&spin);
1289 min.dt = ktime_get();
1290 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1291 min.dt = ktime_sub(ktime_get(), min.dt);
1293 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1295 max.freq, intel_gpu_freq(rps, max.freq),
1296 ktime_to_ns(max.dt),
1297 min.freq, intel_gpu_freq(rps, min.freq),
1298 ktime_to_ns(min.dt));
1299 if (min.freq >= max.freq) {
1300 pr_err("%s: dynamic reclocking of spinner failed\n!",
1306 intel_rc6_enable(>->rc6);
1307 intel_engine_pm_put(engine);
1309 if (igt_flush_test(gt->i915))
1315 igt_spinner_fini(&spin);