drm/i915/selftests: Restore to default heartbeat
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_rps.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/librapl.h"
19
20 /* Try to isolate the impact of cstates from determing frequency response */
21 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
22
23 static void engine_heartbeat_disable(struct intel_engine_cs *engine)
24 {
25         engine->props.heartbeat_interval_ms = 0;
26
27         intel_engine_pm_get(engine);
28         intel_engine_park_heartbeat(engine);
29 }
30
31 static void engine_heartbeat_enable(struct intel_engine_cs *engine)
32 {
33         intel_engine_pm_put(engine);
34
35         engine->props.heartbeat_interval_ms =
36                 engine->defaults.heartbeat_interval_ms;
37 }
38
39 static void dummy_rps_work(struct work_struct *wrk)
40 {
41 }
42
43 static int cmp_u64(const void *A, const void *B)
44 {
45         const u64 *a = A, *b = B;
46
47         if (a < b)
48                 return -1;
49         else if (a > b)
50                 return 1;
51         else
52                 return 0;
53 }
54
55 static int cmp_u32(const void *A, const void *B)
56 {
57         const u32 *a = A, *b = B;
58
59         if (a < b)
60                 return -1;
61         else if (a > b)
62                 return 1;
63         else
64                 return 0;
65 }
66
67 static struct i915_vma *
68 create_spin_counter(struct intel_engine_cs *engine,
69                     struct i915_address_space *vm,
70                     bool srm,
71                     u32 **cancel,
72                     u32 **counter)
73 {
74         enum {
75                 COUNT,
76                 INC,
77                 __NGPR__,
78         };
79 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
80         struct drm_i915_gem_object *obj;
81         struct i915_vma *vma;
82         unsigned long end;
83         u32 *base, *cs;
84         int loop, i;
85         int err;
86
87         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
88         if (IS_ERR(obj))
89                 return ERR_CAST(obj);
90
91         end = obj->base.size / sizeof(u32) - 1;
92
93         vma = i915_vma_instance(obj, vm, NULL);
94         if (IS_ERR(vma)) {
95                 i915_gem_object_put(obj);
96                 return vma;
97         }
98
99         err = i915_vma_pin(vma, 0, 0, PIN_USER);
100         if (err) {
101                 i915_vma_put(vma);
102                 return ERR_PTR(err);
103         }
104
105         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
106         if (IS_ERR(base)) {
107                 i915_gem_object_put(obj);
108                 return ERR_CAST(base);
109         }
110         cs = base;
111
112         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
113         for (i = 0; i < __NGPR__; i++) {
114                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
115                 *cs++ = 0;
116                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
117                 *cs++ = 0;
118         }
119
120         *cs++ = MI_LOAD_REGISTER_IMM(1);
121         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
122         *cs++ = 1;
123
124         loop = cs - base;
125
126         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
127         for (i = 0; i < 1024; i++) {
128                 *cs++ = MI_MATH(4);
129                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
130                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
131                 *cs++ = MI_MATH_ADD;
132                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
133
134                 if (srm) {
135                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
136                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
137                         *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
138                         *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
139                 }
140         }
141
142         *cs++ = MI_BATCH_BUFFER_START_GEN8;
143         *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
144         *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
145         GEM_BUG_ON(cs - base > end);
146
147         i915_gem_object_flush_map(obj);
148
149         *cancel = base + loop;
150         *counter = srm ? memset32(base + end, 0, 1) : NULL;
151         return vma;
152 }
153
154 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
155 {
156         u8 history[64], i;
157         unsigned long end;
158         int sleep;
159
160         i = 0;
161         memset(history, freq, sizeof(history));
162         sleep = 20;
163
164         /* The PCU does not change instantly, but drifts towards the goal? */
165         end = jiffies + msecs_to_jiffies(timeout_ms);
166         do {
167                 u8 act;
168
169                 act = read_cagf(rps);
170                 if (time_after(jiffies, end))
171                         return act;
172
173                 /* Target acquired */
174                 if (act == freq)
175                         return act;
176
177                 /* Any change within the last N samples? */
178                 if (!memchr_inv(history, act, sizeof(history)))
179                         return act;
180
181                 history[i] = act;
182                 i = (i + 1) % ARRAY_SIZE(history);
183
184                 usleep_range(sleep, 2 * sleep);
185                 sleep *= 2;
186                 if (sleep > timeout_ms * 20)
187                         sleep = timeout_ms * 20;
188         } while (1);
189 }
190
191 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
192 {
193         mutex_lock(&rps->lock);
194         GEM_BUG_ON(!intel_rps_is_active(rps));
195         intel_rps_set(rps, freq);
196         GEM_BUG_ON(rps->last_freq != freq);
197         mutex_unlock(&rps->lock);
198
199         return wait_for_freq(rps, freq, 50);
200 }
201
202 static void show_pstate_limits(struct intel_rps *rps)
203 {
204         struct drm_i915_private *i915 = rps_to_i915(rps);
205
206         if (IS_BROXTON(i915)) {
207                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209                         intel_uncore_read(rps_to_uncore(rps),
210                                           BXT_RP_STATE_CAP));
211         } else if (IS_GEN(i915, 9)) {
212                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214                         intel_uncore_read(rps_to_uncore(rps),
215                                           GEN9_RP_STATE_LIMITS));
216         }
217 }
218
219 int live_rps_clock_interval(void *arg)
220 {
221         struct intel_gt *gt = arg;
222         struct intel_rps *rps = &gt->rps;
223         void (*saved_work)(struct work_struct *wrk);
224         struct intel_engine_cs *engine;
225         enum intel_engine_id id;
226         struct igt_spinner spin;
227         int err = 0;
228
229         if (!intel_rps_is_enabled(rps))
230                 return 0;
231
232         if (igt_spinner_init(&spin, gt))
233                 return -ENOMEM;
234
235         intel_gt_pm_wait_for_idle(gt);
236         saved_work = rps->work.func;
237         rps->work.func = dummy_rps_work;
238
239         intel_gt_pm_get(gt);
240         intel_rps_disable(&gt->rps);
241
242         intel_gt_check_clock_frequency(gt);
243
244         for_each_engine(engine, gt, id) {
245                 struct i915_request *rq;
246                 u32 cycles;
247                 u64 dt;
248
249                 if (!intel_engine_can_store_dword(engine))
250                         continue;
251
252                 engine_heartbeat_disable(engine);
253
254                 rq = igt_spinner_create_request(&spin,
255                                                 engine->kernel_context,
256                                                 MI_NOOP);
257                 if (IS_ERR(rq)) {
258                         engine_heartbeat_enable(engine);
259                         err = PTR_ERR(rq);
260                         break;
261                 }
262
263                 i915_request_add(rq);
264
265                 if (!igt_wait_for_spinner(&spin, rq)) {
266                         pr_err("%s: RPS spinner did not start\n",
267                                engine->name);
268                         igt_spinner_end(&spin);
269                         engine_heartbeat_enable(engine);
270                         intel_gt_set_wedged(engine->gt);
271                         err = -EIO;
272                         break;
273                 }
274
275                 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
276
277                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
278
279                 /* Set the evaluation interval to infinity! */
280                 intel_uncore_write_fw(gt->uncore,
281                                       GEN6_RP_UP_EI, 0xffffffff);
282                 intel_uncore_write_fw(gt->uncore,
283                                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
284
285                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
286                                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
287
288                 if (wait_for(intel_uncore_read_fw(gt->uncore,
289                                                   GEN6_RP_CUR_UP_EI),
290                              10)) {
291                         /* Just skip the test; assume lack of HW support */
292                         pr_notice("%s: rps evaluation interval not ticking\n",
293                                   engine->name);
294                         err = -ENODEV;
295                 } else {
296                         ktime_t dt_[5];
297                         u32 cycles_[5];
298                         int i;
299
300                         for (i = 0; i < 5; i++) {
301                                 preempt_disable();
302
303                                 dt_[i] = ktime_get();
304                                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
305
306                                 udelay(1000);
307
308                                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
309                                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
310
311                                 preempt_enable();
312                         }
313
314                         /* Use the median of both cycle/dt; close enough */
315                         sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
316                         cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
317                         sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
318                         dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
319                 }
320
321                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
322                 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
323
324                 igt_spinner_end(&spin);
325                 engine_heartbeat_enable(engine);
326
327                 if (err == 0) {
328                         u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
329                         u32 expected =
330                                 intel_gt_ns_to_pm_interval(gt, dt);
331
332                         pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
333                                 engine->name, cycles, time, dt, expected,
334                                 gt->clock_frequency / 1000);
335
336                         if (10 * time < 8 * dt ||
337                             8 * time > 10 * dt) {
338                                 pr_err("%s: rps clock time does not match walltime!\n",
339                                        engine->name);
340                                 err = -EINVAL;
341                         }
342
343                         if (10 * expected < 8 * cycles ||
344                             8 * expected > 10 * cycles) {
345                                 pr_err("%s: walltime does not match rps clock ticks!\n",
346                                        engine->name);
347                                 err = -EINVAL;
348                         }
349                 }
350
351                 if (igt_flush_test(gt->i915))
352                         err = -EIO;
353
354                 break; /* once is enough */
355         }
356
357         intel_rps_enable(&gt->rps);
358         intel_gt_pm_put(gt);
359
360         igt_spinner_fini(&spin);
361
362         intel_gt_pm_wait_for_idle(gt);
363         rps->work.func = saved_work;
364
365         if (err == -ENODEV) /* skipped, don't report a fail */
366                 err = 0;
367
368         return err;
369 }
370
371 int live_rps_control(void *arg)
372 {
373         struct intel_gt *gt = arg;
374         struct intel_rps *rps = &gt->rps;
375         void (*saved_work)(struct work_struct *wrk);
376         struct intel_engine_cs *engine;
377         enum intel_engine_id id;
378         struct igt_spinner spin;
379         int err = 0;
380
381         /*
382          * Check that the actual frequency matches our requested frequency,
383          * to verify our control mechanism. We have to be careful that the
384          * PCU may throttle the GPU in which case the actual frequency used
385          * will be lowered than requested.
386          */
387
388         if (!intel_rps_is_enabled(rps))
389                 return 0;
390
391         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
392                 return 0;
393
394         if (igt_spinner_init(&spin, gt))
395                 return -ENOMEM;
396
397         intel_gt_pm_wait_for_idle(gt);
398         saved_work = rps->work.func;
399         rps->work.func = dummy_rps_work;
400
401         intel_gt_pm_get(gt);
402         for_each_engine(engine, gt, id) {
403                 struct i915_request *rq;
404                 ktime_t min_dt, max_dt;
405                 int f, limit;
406                 int min, max;
407
408                 if (!intel_engine_can_store_dword(engine))
409                         continue;
410
411                 engine_heartbeat_disable(engine);
412
413                 rq = igt_spinner_create_request(&spin,
414                                                 engine->kernel_context,
415                                                 MI_NOOP);
416                 if (IS_ERR(rq)) {
417                         err = PTR_ERR(rq);
418                         break;
419                 }
420
421                 i915_request_add(rq);
422
423                 if (!igt_wait_for_spinner(&spin, rq)) {
424                         pr_err("%s: RPS spinner did not start\n",
425                                engine->name);
426                         igt_spinner_end(&spin);
427                         engine_heartbeat_enable(engine);
428                         intel_gt_set_wedged(engine->gt);
429                         err = -EIO;
430                         break;
431                 }
432
433                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
434                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
435                                engine->name, rps->min_freq, read_cagf(rps));
436                         igt_spinner_end(&spin);
437                         engine_heartbeat_enable(engine);
438                         show_pstate_limits(rps);
439                         err = -EINVAL;
440                         break;
441                 }
442
443                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
444                         if (rps_set_check(rps, f) < f)
445                                 break;
446                 }
447
448                 limit = rps_set_check(rps, f);
449
450                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
451                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
452                                engine->name, rps->min_freq, read_cagf(rps));
453                         igt_spinner_end(&spin);
454                         engine_heartbeat_enable(engine);
455                         show_pstate_limits(rps);
456                         err = -EINVAL;
457                         break;
458                 }
459
460                 max_dt = ktime_get();
461                 max = rps_set_check(rps, limit);
462                 max_dt = ktime_sub(ktime_get(), max_dt);
463
464                 min_dt = ktime_get();
465                 min = rps_set_check(rps, rps->min_freq);
466                 min_dt = ktime_sub(ktime_get(), min_dt);
467
468                 igt_spinner_end(&spin);
469                 engine_heartbeat_enable(engine);
470
471                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
472                         engine->name,
473                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
474                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
475                         limit, intel_gpu_freq(rps, limit),
476                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
477
478                 if (limit == rps->min_freq) {
479                         pr_err("%s: GPU throttled to minimum!\n",
480                                engine->name);
481                         show_pstate_limits(rps);
482                         err = -ENODEV;
483                         break;
484                 }
485
486                 if (igt_flush_test(gt->i915)) {
487                         err = -EIO;
488                         break;
489                 }
490         }
491         intel_gt_pm_put(gt);
492
493         igt_spinner_fini(&spin);
494
495         intel_gt_pm_wait_for_idle(gt);
496         rps->work.func = saved_work;
497
498         return err;
499 }
500
501 static void show_pcu_config(struct intel_rps *rps)
502 {
503         struct drm_i915_private *i915 = rps_to_i915(rps);
504         unsigned int max_gpu_freq, min_gpu_freq;
505         intel_wakeref_t wakeref;
506         int gpu_freq;
507
508         if (!HAS_LLC(i915))
509                 return;
510
511         min_gpu_freq = rps->min_freq;
512         max_gpu_freq = rps->max_freq;
513         if (INTEL_GEN(i915) >= 9) {
514                 /* Convert GT frequency to 50 HZ units */
515                 min_gpu_freq /= GEN9_FREQ_SCALER;
516                 max_gpu_freq /= GEN9_FREQ_SCALER;
517         }
518
519         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
520
521         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
522         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
523                 int ia_freq = gpu_freq;
524
525                 sandybridge_pcode_read(i915,
526                                        GEN6_PCODE_READ_MIN_FREQ_TABLE,
527                                        &ia_freq, NULL);
528
529                 pr_info("%5d  %5d  %5d\n",
530                         gpu_freq * 50,
531                         ((ia_freq >> 0) & 0xff) * 100,
532                         ((ia_freq >> 8) & 0xff) * 100);
533         }
534
535         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
536 }
537
538 static u64 __measure_frequency(u32 *cntr, int duration_ms)
539 {
540         u64 dc, dt;
541
542         dt = ktime_get();
543         dc = READ_ONCE(*cntr);
544         usleep_range(1000 * duration_ms, 2000 * duration_ms);
545         dc = READ_ONCE(*cntr) - dc;
546         dt = ktime_get() - dt;
547
548         return div64_u64(1000 * 1000 * dc, dt);
549 }
550
551 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
552 {
553         u64 x[5];
554         int i;
555
556         *freq = rps_set_check(rps, *freq);
557         for (i = 0; i < 5; i++)
558                 x[i] = __measure_frequency(cntr, 2);
559         *freq = (*freq + read_cagf(rps)) / 2;
560
561         /* A simple triangle filter for better result stability */
562         sort(x, 5, sizeof(*x), cmp_u64, NULL);
563         return div_u64(x[1] + 2 * x[2] + x[3], 4);
564 }
565
566 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
567                                   int duration_ms)
568 {
569         u64 dc, dt;
570
571         dt = ktime_get();
572         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
573         usleep_range(1000 * duration_ms, 2000 * duration_ms);
574         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
575         dt = ktime_get() - dt;
576
577         return div64_u64(1000 * 1000 * dc, dt);
578 }
579
580 static u64 measure_cs_frequency_at(struct intel_rps *rps,
581                                    struct intel_engine_cs *engine,
582                                    int *freq)
583 {
584         u64 x[5];
585         int i;
586
587         *freq = rps_set_check(rps, *freq);
588         for (i = 0; i < 5; i++)
589                 x[i] = __measure_cs_frequency(engine, 2);
590         *freq = (*freq + read_cagf(rps)) / 2;
591
592         /* A simple triangle filter for better result stability */
593         sort(x, 5, sizeof(*x), cmp_u64, NULL);
594         return div_u64(x[1] + 2 * x[2] + x[3], 4);
595 }
596
597 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
598 {
599         return f_d * x > f_n * y && f_n * x < f_d * y;
600 }
601
602 int live_rps_frequency_cs(void *arg)
603 {
604         void (*saved_work)(struct work_struct *wrk);
605         struct intel_gt *gt = arg;
606         struct intel_rps *rps = &gt->rps;
607         struct intel_engine_cs *engine;
608         struct pm_qos_request qos;
609         enum intel_engine_id id;
610         int err = 0;
611
612         /*
613          * The premise is that the GPU does change freqency at our behest.
614          * Let's check there is a correspondence between the requested
615          * frequency, the actual frequency, and the observed clock rate.
616          */
617
618         if (!intel_rps_is_enabled(rps))
619                 return 0;
620
621         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
622                 return 0;
623
624         if (CPU_LATENCY >= 0)
625                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
626
627         intel_gt_pm_wait_for_idle(gt);
628         saved_work = rps->work.func;
629         rps->work.func = dummy_rps_work;
630
631         for_each_engine(engine, gt, id) {
632                 struct i915_request *rq;
633                 struct i915_vma *vma;
634                 u32 *cancel, *cntr;
635                 struct {
636                         u64 count;
637                         int freq;
638                 } min, max;
639
640                 engine_heartbeat_disable(engine);
641
642                 vma = create_spin_counter(engine,
643                                           engine->kernel_context->vm, false,
644                                           &cancel, &cntr);
645                 if (IS_ERR(vma)) {
646                         err = PTR_ERR(vma);
647                         engine_heartbeat_enable(engine);
648                         break;
649                 }
650
651                 rq = intel_engine_create_kernel_request(engine);
652                 if (IS_ERR(rq)) {
653                         err = PTR_ERR(rq);
654                         goto err_vma;
655                 }
656
657                 i915_vma_lock(vma);
658                 err = i915_request_await_object(rq, vma->obj, false);
659                 if (!err)
660                         err = i915_vma_move_to_active(vma, rq, 0);
661                 if (!err)
662                         err = rq->engine->emit_bb_start(rq,
663                                                         vma->node.start,
664                                                         PAGE_SIZE, 0);
665                 i915_vma_unlock(vma);
666                 i915_request_add(rq);
667                 if (err)
668                         goto err_vma;
669
670                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
671                              10)) {
672                         pr_err("%s: timed loop did not start\n",
673                                engine->name);
674                         goto err_vma;
675                 }
676
677                 min.freq = rps->min_freq;
678                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
679
680                 max.freq = rps->max_freq;
681                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
682
683                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
684                         engine->name,
685                         min.count, intel_gpu_freq(rps, min.freq),
686                         max.count, intel_gpu_freq(rps, max.freq),
687                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
688                                                      max.freq * min.count));
689
690                 if (!scaled_within(max.freq * min.count,
691                                    min.freq * max.count,
692                                    2, 3)) {
693                         int f;
694
695                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
696                                engine->name,
697                                max.freq * min.count,
698                                min.freq * max.count);
699                         show_pcu_config(rps);
700
701                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
702                                 int act = f;
703                                 u64 count;
704
705                                 count = measure_cs_frequency_at(rps, engine, &act);
706                                 if (act < f)
707                                         break;
708
709                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
710                                         engine->name,
711                                         act, intel_gpu_freq(rps, act), count,
712                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
713                                                                      act * min.count));
714
715                                 f = act; /* may skip ahead [pcu granularity] */
716                         }
717
718                         err = -EINVAL;
719                 }
720
721 err_vma:
722                 *cancel = MI_BATCH_BUFFER_END;
723                 i915_gem_object_flush_map(vma->obj);
724                 i915_gem_object_unpin_map(vma->obj);
725                 i915_vma_unpin(vma);
726                 i915_vma_put(vma);
727
728                 engine_heartbeat_enable(engine);
729                 if (igt_flush_test(gt->i915))
730                         err = -EIO;
731                 if (err)
732                         break;
733         }
734
735         intel_gt_pm_wait_for_idle(gt);
736         rps->work.func = saved_work;
737
738         if (CPU_LATENCY >= 0)
739                 cpu_latency_qos_remove_request(&qos);
740
741         return err;
742 }
743
744 int live_rps_frequency_srm(void *arg)
745 {
746         void (*saved_work)(struct work_struct *wrk);
747         struct intel_gt *gt = arg;
748         struct intel_rps *rps = &gt->rps;
749         struct intel_engine_cs *engine;
750         struct pm_qos_request qos;
751         enum intel_engine_id id;
752         int err = 0;
753
754         /*
755          * The premise is that the GPU does change freqency at our behest.
756          * Let's check there is a correspondence between the requested
757          * frequency, the actual frequency, and the observed clock rate.
758          */
759
760         if (!intel_rps_is_enabled(rps))
761                 return 0;
762
763         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
764                 return 0;
765
766         if (CPU_LATENCY >= 0)
767                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
768
769         intel_gt_pm_wait_for_idle(gt);
770         saved_work = rps->work.func;
771         rps->work.func = dummy_rps_work;
772
773         for_each_engine(engine, gt, id) {
774                 struct i915_request *rq;
775                 struct i915_vma *vma;
776                 u32 *cancel, *cntr;
777                 struct {
778                         u64 count;
779                         int freq;
780                 } min, max;
781
782                 engine_heartbeat_disable(engine);
783
784                 vma = create_spin_counter(engine,
785                                           engine->kernel_context->vm, true,
786                                           &cancel, &cntr);
787                 if (IS_ERR(vma)) {
788                         err = PTR_ERR(vma);
789                         engine_heartbeat_enable(engine);
790                         break;
791                 }
792
793                 rq = intel_engine_create_kernel_request(engine);
794                 if (IS_ERR(rq)) {
795                         err = PTR_ERR(rq);
796                         goto err_vma;
797                 }
798
799                 i915_vma_lock(vma);
800                 err = i915_request_await_object(rq, vma->obj, false);
801                 if (!err)
802                         err = i915_vma_move_to_active(vma, rq, 0);
803                 if (!err)
804                         err = rq->engine->emit_bb_start(rq,
805                                                         vma->node.start,
806                                                         PAGE_SIZE, 0);
807                 i915_vma_unlock(vma);
808                 i915_request_add(rq);
809                 if (err)
810                         goto err_vma;
811
812                 if (wait_for(READ_ONCE(*cntr), 10)) {
813                         pr_err("%s: timed loop did not start\n",
814                                engine->name);
815                         goto err_vma;
816                 }
817
818                 min.freq = rps->min_freq;
819                 min.count = measure_frequency_at(rps, cntr, &min.freq);
820
821                 max.freq = rps->max_freq;
822                 max.count = measure_frequency_at(rps, cntr, &max.freq);
823
824                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
825                         engine->name,
826                         min.count, intel_gpu_freq(rps, min.freq),
827                         max.count, intel_gpu_freq(rps, max.freq),
828                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
829                                                      max.freq * min.count));
830
831                 if (!scaled_within(max.freq * min.count,
832                                    min.freq * max.count,
833                                    1, 2)) {
834                         int f;
835
836                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
837                                engine->name,
838                                max.freq * min.count,
839                                min.freq * max.count);
840                         show_pcu_config(rps);
841
842                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
843                                 int act = f;
844                                 u64 count;
845
846                                 count = measure_frequency_at(rps, cntr, &act);
847                                 if (act < f)
848                                         break;
849
850                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
851                                         engine->name,
852                                         act, intel_gpu_freq(rps, act), count,
853                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
854                                                                      act * min.count));
855
856                                 f = act; /* may skip ahead [pcu granularity] */
857                         }
858
859                         err = -EINVAL;
860                 }
861
862 err_vma:
863                 *cancel = MI_BATCH_BUFFER_END;
864                 i915_gem_object_flush_map(vma->obj);
865                 i915_gem_object_unpin_map(vma->obj);
866                 i915_vma_unpin(vma);
867                 i915_vma_put(vma);
868
869                 engine_heartbeat_enable(engine);
870                 if (igt_flush_test(gt->i915))
871                         err = -EIO;
872                 if (err)
873                         break;
874         }
875
876         intel_gt_pm_wait_for_idle(gt);
877         rps->work.func = saved_work;
878
879         if (CPU_LATENCY >= 0)
880                 cpu_latency_qos_remove_request(&qos);
881
882         return err;
883 }
884
885 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
886 {
887         /* Flush any previous EI */
888         usleep_range(timeout_us, 2 * timeout_us);
889
890         /* Reset the interrupt status */
891         rps_disable_interrupts(rps);
892         GEM_BUG_ON(rps->pm_iir);
893         rps_enable_interrupts(rps);
894
895         /* And then wait for the timeout, for real this time */
896         usleep_range(2 * timeout_us, 3 * timeout_us);
897 }
898
899 static int __rps_up_interrupt(struct intel_rps *rps,
900                               struct intel_engine_cs *engine,
901                               struct igt_spinner *spin)
902 {
903         struct intel_uncore *uncore = engine->uncore;
904         struct i915_request *rq;
905         u32 timeout;
906
907         if (!intel_engine_can_store_dword(engine))
908                 return 0;
909
910         rps_set_check(rps, rps->min_freq);
911
912         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
913         if (IS_ERR(rq))
914                 return PTR_ERR(rq);
915
916         i915_request_get(rq);
917         i915_request_add(rq);
918
919         if (!igt_wait_for_spinner(spin, rq)) {
920                 pr_err("%s: RPS spinner did not start\n",
921                        engine->name);
922                 i915_request_put(rq);
923                 intel_gt_set_wedged(engine->gt);
924                 return -EIO;
925         }
926
927         if (!intel_rps_is_active(rps)) {
928                 pr_err("%s: RPS not enabled on starting spinner\n",
929                        engine->name);
930                 igt_spinner_end(spin);
931                 i915_request_put(rq);
932                 return -EINVAL;
933         }
934
935         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
936                 pr_err("%s: RPS did not register UP interrupt\n",
937                        engine->name);
938                 i915_request_put(rq);
939                 return -EINVAL;
940         }
941
942         if (rps->last_freq != rps->min_freq) {
943                 pr_err("%s: RPS did not program min frequency\n",
944                        engine->name);
945                 i915_request_put(rq);
946                 return -EINVAL;
947         }
948
949         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
950         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
951         timeout = DIV_ROUND_UP(timeout, 1000);
952
953         sleep_for_ei(rps, timeout);
954         GEM_BUG_ON(i915_request_completed(rq));
955
956         igt_spinner_end(spin);
957         i915_request_put(rq);
958
959         if (rps->cur_freq != rps->min_freq) {
960                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
961                        engine->name, intel_rps_read_actual_frequency(rps));
962                 return -EINVAL;
963         }
964
965         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
966                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
967                        engine->name, rps->pm_iir,
968                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
969                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
970                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
971                 return -EINVAL;
972         }
973
974         return 0;
975 }
976
977 static int __rps_down_interrupt(struct intel_rps *rps,
978                                 struct intel_engine_cs *engine)
979 {
980         struct intel_uncore *uncore = engine->uncore;
981         u32 timeout;
982
983         rps_set_check(rps, rps->max_freq);
984
985         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
986                 pr_err("%s: RPS did not register DOWN interrupt\n",
987                        engine->name);
988                 return -EINVAL;
989         }
990
991         if (rps->last_freq != rps->max_freq) {
992                 pr_err("%s: RPS did not program max frequency\n",
993                        engine->name);
994                 return -EINVAL;
995         }
996
997         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
998         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
999         timeout = DIV_ROUND_UP(timeout, 1000);
1000
1001         sleep_for_ei(rps, timeout);
1002
1003         if (rps->cur_freq != rps->max_freq) {
1004                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1005                        engine->name,
1006                        intel_rps_read_actual_frequency(rps));
1007                 return -EINVAL;
1008         }
1009
1010         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1011                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1012                        engine->name, rps->pm_iir,
1013                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1014                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1015                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1016                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1017                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1018                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
1019                 return -EINVAL;
1020         }
1021
1022         return 0;
1023 }
1024
1025 int live_rps_interrupt(void *arg)
1026 {
1027         struct intel_gt *gt = arg;
1028         struct intel_rps *rps = &gt->rps;
1029         void (*saved_work)(struct work_struct *wrk);
1030         struct intel_engine_cs *engine;
1031         enum intel_engine_id id;
1032         struct igt_spinner spin;
1033         u32 pm_events;
1034         int err = 0;
1035
1036         /*
1037          * First, let's check whether or not we are receiving interrupts.
1038          */
1039
1040         if (!intel_rps_has_interrupts(rps))
1041                 return 0;
1042
1043         intel_gt_pm_get(gt);
1044         pm_events = rps->pm_events;
1045         intel_gt_pm_put(gt);
1046         if (!pm_events) {
1047                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1048                 return -ENODEV;
1049         }
1050
1051         if (igt_spinner_init(&spin, gt))
1052                 return -ENOMEM;
1053
1054         intel_gt_pm_wait_for_idle(gt);
1055         saved_work = rps->work.func;
1056         rps->work.func = dummy_rps_work;
1057
1058         for_each_engine(engine, gt, id) {
1059                 /* Keep the engine busy with a spinner; expect an UP! */
1060                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1061                         intel_gt_pm_wait_for_idle(engine->gt);
1062                         GEM_BUG_ON(intel_rps_is_active(rps));
1063
1064                         engine_heartbeat_disable(engine);
1065
1066                         err = __rps_up_interrupt(rps, engine, &spin);
1067
1068                         engine_heartbeat_enable(engine);
1069                         if (err)
1070                                 goto out;
1071
1072                         intel_gt_pm_wait_for_idle(engine->gt);
1073                 }
1074
1075                 /* Keep the engine awake but idle and check for DOWN */
1076                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1077                         engine_heartbeat_disable(engine);
1078                         intel_rc6_disable(&gt->rc6);
1079
1080                         err = __rps_down_interrupt(rps, engine);
1081
1082                         intel_rc6_enable(&gt->rc6);
1083                         engine_heartbeat_enable(engine);
1084                         if (err)
1085                                 goto out;
1086                 }
1087         }
1088
1089 out:
1090         if (igt_flush_test(gt->i915))
1091                 err = -EIO;
1092
1093         igt_spinner_fini(&spin);
1094
1095         intel_gt_pm_wait_for_idle(gt);
1096         rps->work.func = saved_work;
1097
1098         return err;
1099 }
1100
1101 static u64 __measure_power(int duration_ms)
1102 {
1103         u64 dE, dt;
1104
1105         dt = ktime_get();
1106         dE = librapl_energy_uJ();
1107         usleep_range(1000 * duration_ms, 2000 * duration_ms);
1108         dE = librapl_energy_uJ() - dE;
1109         dt = ktime_get() - dt;
1110
1111         return div64_u64(1000 * 1000 * dE, dt);
1112 }
1113
1114 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1115 {
1116         u64 x[5];
1117         int i;
1118
1119         *freq = rps_set_check(rps, *freq);
1120         for (i = 0; i < 5; i++)
1121                 x[i] = __measure_power(5);
1122         *freq = (*freq + read_cagf(rps)) / 2;
1123
1124         /* A simple triangle filter for better result stability */
1125         sort(x, 5, sizeof(*x), cmp_u64, NULL);
1126         return div_u64(x[1] + 2 * x[2] + x[3], 4);
1127 }
1128
1129 int live_rps_power(void *arg)
1130 {
1131         struct intel_gt *gt = arg;
1132         struct intel_rps *rps = &gt->rps;
1133         void (*saved_work)(struct work_struct *wrk);
1134         struct intel_engine_cs *engine;
1135         enum intel_engine_id id;
1136         struct igt_spinner spin;
1137         int err = 0;
1138
1139         /*
1140          * Our fundamental assumption is that running at lower frequency
1141          * actually saves power. Let's see if our RAPL measurement support
1142          * that theory.
1143          */
1144
1145         if (!intel_rps_is_enabled(rps))
1146                 return 0;
1147
1148         if (!librapl_energy_uJ())
1149                 return 0;
1150
1151         if (igt_spinner_init(&spin, gt))
1152                 return -ENOMEM;
1153
1154         intel_gt_pm_wait_for_idle(gt);
1155         saved_work = rps->work.func;
1156         rps->work.func = dummy_rps_work;
1157
1158         for_each_engine(engine, gt, id) {
1159                 struct i915_request *rq;
1160                 struct {
1161                         u64 power;
1162                         int freq;
1163                 } min, max;
1164
1165                 if (!intel_engine_can_store_dword(engine))
1166                         continue;
1167
1168                 engine_heartbeat_disable(engine);
1169
1170                 rq = igt_spinner_create_request(&spin,
1171                                                 engine->kernel_context,
1172                                                 MI_NOOP);
1173                 if (IS_ERR(rq)) {
1174                         engine_heartbeat_enable(engine);
1175                         err = PTR_ERR(rq);
1176                         break;
1177                 }
1178
1179                 i915_request_add(rq);
1180
1181                 if (!igt_wait_for_spinner(&spin, rq)) {
1182                         pr_err("%s: RPS spinner did not start\n",
1183                                engine->name);
1184                         igt_spinner_end(&spin);
1185                         engine_heartbeat_enable(engine);
1186                         intel_gt_set_wedged(engine->gt);
1187                         err = -EIO;
1188                         break;
1189                 }
1190
1191                 max.freq = rps->max_freq;
1192                 max.power = measure_power_at(rps, &max.freq);
1193
1194                 min.freq = rps->min_freq;
1195                 min.power = measure_power_at(rps, &min.freq);
1196
1197                 igt_spinner_end(&spin);
1198                 engine_heartbeat_enable(engine);
1199
1200                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1201                         engine->name,
1202                         min.power, intel_gpu_freq(rps, min.freq),
1203                         max.power, intel_gpu_freq(rps, max.freq));
1204
1205                 if (10 * min.freq >= 9 * max.freq) {
1206                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1207                                   min.freq, intel_gpu_freq(rps, min.freq),
1208                                   max.freq, intel_gpu_freq(rps, max.freq));
1209                         continue;
1210                 }
1211
1212                 if (11 * min.power > 10 * max.power) {
1213                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1214                                engine->name);
1215                         err = -EINVAL;
1216                         break;
1217                 }
1218
1219                 if (igt_flush_test(gt->i915)) {
1220                         err = -EIO;
1221                         break;
1222                 }
1223         }
1224
1225         igt_spinner_fini(&spin);
1226
1227         intel_gt_pm_wait_for_idle(gt);
1228         rps->work.func = saved_work;
1229
1230         return err;
1231 }
1232
1233 int live_rps_dynamic(void *arg)
1234 {
1235         struct intel_gt *gt = arg;
1236         struct intel_rps *rps = &gt->rps;
1237         struct intel_engine_cs *engine;
1238         enum intel_engine_id id;
1239         struct igt_spinner spin;
1240         int err = 0;
1241
1242         /*
1243          * We've looked at the bascs, and have established that we
1244          * can change the clock frequency and that the HW will generate
1245          * interrupts based on load. Now we check how we integrate those
1246          * moving parts into dynamic reclocking based on load.
1247          */
1248
1249         if (!intel_rps_is_enabled(rps))
1250                 return 0;
1251
1252         if (igt_spinner_init(&spin, gt))
1253                 return -ENOMEM;
1254
1255         for_each_engine(engine, gt, id) {
1256                 struct i915_request *rq;
1257                 struct {
1258                         ktime_t dt;
1259                         u8 freq;
1260                 } min, max;
1261
1262                 if (!intel_engine_can_store_dword(engine))
1263                         continue;
1264
1265                 intel_gt_pm_wait_for_idle(gt);
1266                 GEM_BUG_ON(intel_rps_is_active(rps));
1267                 rps->cur_freq = rps->min_freq;
1268
1269                 intel_engine_pm_get(engine);
1270                 intel_rc6_disable(&gt->rc6);
1271                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1272
1273                 rq = igt_spinner_create_request(&spin,
1274                                                 engine->kernel_context,
1275                                                 MI_NOOP);
1276                 if (IS_ERR(rq)) {
1277                         err = PTR_ERR(rq);
1278                         goto err;
1279                 }
1280
1281                 i915_request_add(rq);
1282
1283                 max.dt = ktime_get();
1284                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1285                 max.dt = ktime_sub(ktime_get(), max.dt);
1286
1287                 igt_spinner_end(&spin);
1288
1289                 min.dt = ktime_get();
1290                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1291                 min.dt = ktime_sub(ktime_get(), min.dt);
1292
1293                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1294                         engine->name,
1295                         max.freq, intel_gpu_freq(rps, max.freq),
1296                         ktime_to_ns(max.dt),
1297                         min.freq, intel_gpu_freq(rps, min.freq),
1298                         ktime_to_ns(min.dt));
1299                 if (min.freq >= max.freq) {
1300                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1301                                engine->name);
1302                         err = -EINVAL;
1303                 }
1304
1305 err:
1306                 intel_rc6_enable(&gt->rc6);
1307                 intel_engine_pm_put(engine);
1308
1309                 if (igt_flush_test(gt->i915))
1310                         err = -EIO;
1311                 if (err)
1312                         break;
1313         }
1314
1315         igt_spinner_fini(&spin);
1316
1317         return err;
1318 }