drm/i915/pmu: Support multiple GPUs
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / i915 / i915_pmu.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include <linux/irq.h>
8 #include <linux/pm_runtime.h>
9
10 #include "gt/intel_engine.h"
11 #include "gt/intel_engine_pm.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_rc6.h"
15
16 #include "i915_drv.h"
17 #include "i915_pmu.h"
18 #include "intel_pm.h"
19
20 /* Frequency for the sampling timer for events which need it. */
21 #define FREQUENCY 200
22 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
23
24 #define ENGINE_SAMPLE_MASK \
25         (BIT(I915_SAMPLE_BUSY) | \
26          BIT(I915_SAMPLE_WAIT) | \
27          BIT(I915_SAMPLE_SEMA))
28
29 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
30
31 static cpumask_t i915_pmu_cpumask;
32
33 static u8 engine_config_sample(u64 config)
34 {
35         return config & I915_PMU_SAMPLE_MASK;
36 }
37
38 static u8 engine_event_sample(struct perf_event *event)
39 {
40         return engine_config_sample(event->attr.config);
41 }
42
43 static u8 engine_event_class(struct perf_event *event)
44 {
45         return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
46 }
47
48 static u8 engine_event_instance(struct perf_event *event)
49 {
50         return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
51 }
52
53 static bool is_engine_config(u64 config)
54 {
55         return config < __I915_PMU_OTHER(0);
56 }
57
58 static unsigned int config_enabled_bit(u64 config)
59 {
60         if (is_engine_config(config))
61                 return engine_config_sample(config);
62         else
63                 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
64 }
65
66 static u64 config_enabled_mask(u64 config)
67 {
68         return BIT_ULL(config_enabled_bit(config));
69 }
70
71 static bool is_engine_event(struct perf_event *event)
72 {
73         return is_engine_config(event->attr.config);
74 }
75
76 static unsigned int event_enabled_bit(struct perf_event *event)
77 {
78         return config_enabled_bit(event->attr.config);
79 }
80
81 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
82 {
83         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
84         u64 enable;
85
86         /*
87          * Only some counters need the sampling timer.
88          *
89          * We start with a bitmask of all currently enabled events.
90          */
91         enable = pmu->enable;
92
93         /*
94          * Mask out all the ones which do not need the timer, or in
95          * other words keep all the ones that could need the timer.
96          */
97         enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
98                   config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
99                   ENGINE_SAMPLE_MASK;
100
101         /*
102          * When the GPU is idle per-engine counters do not need to be
103          * running so clear those bits out.
104          */
105         if (!gpu_active)
106                 enable &= ~ENGINE_SAMPLE_MASK;
107         /*
108          * Also there is software busyness tracking available we do not
109          * need the timer for I915_SAMPLE_BUSY counter.
110          */
111         else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
112                 enable &= ~BIT(I915_SAMPLE_BUSY);
113
114         /*
115          * If some bits remain it means we need the sampling timer running.
116          */
117         return enable;
118 }
119
120 static u64 __get_rc6(struct intel_gt *gt)
121 {
122         struct drm_i915_private *i915 = gt->i915;
123         u64 val;
124
125         val = intel_rc6_residency_ns(&gt->rc6,
126                                      IS_VALLEYVIEW(i915) ?
127                                      VLV_GT_RENDER_RC6 :
128                                      GEN6_GT_GFX_RC6);
129
130         if (HAS_RC6p(i915))
131                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
132
133         if (HAS_RC6pp(i915))
134                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
135
136         return val;
137 }
138
139 #if IS_ENABLED(CONFIG_PM)
140
141 static inline s64 ktime_since(const ktime_t kt)
142 {
143         return ktime_to_ns(ktime_sub(ktime_get(), kt));
144 }
145
146 static u64 __pmu_estimate_rc6(struct i915_pmu *pmu)
147 {
148         u64 val;
149
150         /*
151          * We think we are runtime suspended.
152          *
153          * Report the delta from when the device was suspended to now,
154          * on top of the last known real value, as the approximated RC6
155          * counter value.
156          */
157         val = ktime_since(pmu->sleep_last);
158         val += pmu->sample[__I915_SAMPLE_RC6].cur;
159
160         pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
161
162         return val;
163 }
164
165 static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val)
166 {
167         /*
168          * If we are coming back from being runtime suspended we must
169          * be careful not to report a larger value than returned
170          * previously.
171          */
172         if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
173                 pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
174                 pmu->sample[__I915_SAMPLE_RC6].cur = val;
175         } else {
176                 val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
177         }
178
179         return val;
180 }
181
182 static u64 get_rc6(struct intel_gt *gt)
183 {
184         struct drm_i915_private *i915 = gt->i915;
185         struct i915_pmu *pmu = &i915->pmu;
186         unsigned long flags;
187         u64 val;
188
189         val = 0;
190         if (intel_gt_pm_get_if_awake(gt)) {
191                 val = __get_rc6(gt);
192                 intel_gt_pm_put(gt);
193         }
194
195         spin_lock_irqsave(&pmu->lock, flags);
196
197         if (val)
198                 val = __pmu_update_rc6(pmu, val);
199         else
200                 val = __pmu_estimate_rc6(pmu);
201
202         spin_unlock_irqrestore(&pmu->lock, flags);
203
204         return val;
205 }
206
207 static void park_rc6(struct drm_i915_private *i915)
208 {
209         struct i915_pmu *pmu = &i915->pmu;
210
211         if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
212                 __pmu_update_rc6(pmu, __get_rc6(&i915->gt));
213
214         pmu->sleep_last = ktime_get();
215 }
216
217 static void unpark_rc6(struct drm_i915_private *i915)
218 {
219         struct i915_pmu *pmu = &i915->pmu;
220
221         /* Estimate how long we slept and accumulate that into rc6 counters */
222         if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
223                 __pmu_estimate_rc6(pmu);
224 }
225
226 #else
227
228 static u64 get_rc6(struct intel_gt *gt)
229 {
230         return __get_rc6(gt);
231 }
232
233 static void park_rc6(struct drm_i915_private *i915) {}
234 static void unpark_rc6(struct drm_i915_private *i915) {}
235
236 #endif
237
238 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
239 {
240         if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
241                 pmu->timer_enabled = true;
242                 pmu->timer_last = ktime_get();
243                 hrtimer_start_range_ns(&pmu->timer,
244                                        ns_to_ktime(PERIOD), 0,
245                                        HRTIMER_MODE_REL_PINNED);
246         }
247 }
248
249 void i915_pmu_gt_parked(struct drm_i915_private *i915)
250 {
251         struct i915_pmu *pmu = &i915->pmu;
252
253         if (!pmu->base.event_init)
254                 return;
255
256         spin_lock_irq(&pmu->lock);
257
258         park_rc6(i915);
259
260         /*
261          * Signal sampling timer to stop if only engine events are enabled and
262          * GPU went idle.
263          */
264         pmu->timer_enabled = pmu_needs_timer(pmu, false);
265
266         spin_unlock_irq(&pmu->lock);
267 }
268
269 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
270 {
271         struct i915_pmu *pmu = &i915->pmu;
272
273         if (!pmu->base.event_init)
274                 return;
275
276         spin_lock_irq(&pmu->lock);
277
278         /*
279          * Re-enable sampling timer when GPU goes active.
280          */
281         __i915_pmu_maybe_start_timer(pmu);
282
283         unpark_rc6(i915);
284
285         spin_unlock_irq(&pmu->lock);
286 }
287
288 static void
289 add_sample(struct i915_pmu_sample *sample, u32 val)
290 {
291         sample->cur += val;
292 }
293
294 static void
295 engines_sample(struct intel_gt *gt, unsigned int period_ns)
296 {
297         struct drm_i915_private *i915 = gt->i915;
298         struct intel_engine_cs *engine;
299         enum intel_engine_id id;
300
301         if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
302                 return;
303
304         for_each_engine(engine, i915, id) {
305                 struct intel_engine_pmu *pmu = &engine->pmu;
306                 unsigned long flags;
307                 bool busy;
308                 u32 val;
309
310                 if (!intel_engine_pm_get_if_awake(engine))
311                         continue;
312
313                 spin_lock_irqsave(&engine->uncore->lock, flags);
314
315                 val = ENGINE_READ_FW(engine, RING_CTL);
316                 if (val == 0) /* powerwell off => engine idle */
317                         goto skip;
318
319                 if (val & RING_WAIT)
320                         add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
321                 if (val & RING_WAIT_SEMAPHORE)
322                         add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
323
324                 /* No need to sample when busy stats are supported. */
325                 if (intel_engine_supports_stats(engine))
326                         goto skip;
327
328                 /*
329                  * While waiting on a semaphore or event, MI_MODE reports the
330                  * ring as idle. However, previously using the seqno, and with
331                  * execlists sampling, we account for the ring waiting as the
332                  * engine being busy. Therefore, we record the sample as being
333                  * busy if either waiting or !idle.
334                  */
335                 busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
336                 if (!busy) {
337                         val = ENGINE_READ_FW(engine, RING_MI_MODE);
338                         busy = !(val & MODE_IDLE);
339                 }
340                 if (busy)
341                         add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
342
343 skip:
344                 spin_unlock_irqrestore(&engine->uncore->lock, flags);
345                 intel_engine_pm_put(engine);
346         }
347 }
348
349 static void
350 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
351 {
352         sample->cur += mul_u32_u32(val, mul);
353 }
354
355 static void
356 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
357 {
358         struct drm_i915_private *i915 = gt->i915;
359         struct intel_uncore *uncore = gt->uncore;
360         struct i915_pmu *pmu = &i915->pmu;
361
362         if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
363                 u32 val;
364
365                 val = i915->gt_pm.rps.cur_freq;
366                 if (intel_gt_pm_get_if_awake(gt)) {
367                         val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1);
368                         val = intel_get_cagf(i915, val);
369                         intel_gt_pm_put(gt);
370                 }
371
372                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
373                                 intel_gpu_freq(i915, val),
374                                 period_ns / 1000);
375         }
376
377         if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
378                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
379                                 intel_gpu_freq(i915, i915->gt_pm.rps.cur_freq),
380                                 period_ns / 1000);
381         }
382 }
383
384 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
385 {
386         struct drm_i915_private *i915 =
387                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
388         struct i915_pmu *pmu = &i915->pmu;
389         struct intel_gt *gt = &i915->gt;
390         unsigned int period_ns;
391         ktime_t now;
392
393         if (!READ_ONCE(pmu->timer_enabled))
394                 return HRTIMER_NORESTART;
395
396         now = ktime_get();
397         period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
398         pmu->timer_last = now;
399
400         /*
401          * Strictly speaking the passed in period may not be 100% accurate for
402          * all internal calculation, since some amount of time can be spent on
403          * grabbing the forcewake. However the potential error from timer call-
404          * back delay greatly dominates this so we keep it simple.
405          */
406         engines_sample(gt, period_ns);
407         frequency_sample(gt, period_ns);
408
409         hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
410
411         return HRTIMER_RESTART;
412 }
413
414 static u64 count_interrupts(struct drm_i915_private *i915)
415 {
416         /* open-coded kstat_irqs() */
417         struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
418         u64 sum = 0;
419         int cpu;
420
421         if (!desc || !desc->kstat_irqs)
422                 return 0;
423
424         for_each_possible_cpu(cpu)
425                 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
426
427         return sum;
428 }
429
430 static void engine_event_destroy(struct perf_event *event)
431 {
432         struct drm_i915_private *i915 =
433                 container_of(event->pmu, typeof(*i915), pmu.base);
434         struct intel_engine_cs *engine;
435
436         engine = intel_engine_lookup_user(i915,
437                                           engine_event_class(event),
438                                           engine_event_instance(event));
439         if (WARN_ON_ONCE(!engine))
440                 return;
441
442         if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
443             intel_engine_supports_stats(engine))
444                 intel_disable_engine_stats(engine);
445 }
446
447 static void i915_pmu_event_destroy(struct perf_event *event)
448 {
449         WARN_ON(event->parent);
450
451         if (is_engine_event(event))
452                 engine_event_destroy(event);
453 }
454
455 static int
456 engine_event_status(struct intel_engine_cs *engine,
457                     enum drm_i915_pmu_engine_sample sample)
458 {
459         switch (sample) {
460         case I915_SAMPLE_BUSY:
461         case I915_SAMPLE_WAIT:
462                 break;
463         case I915_SAMPLE_SEMA:
464                 if (INTEL_GEN(engine->i915) < 6)
465                         return -ENODEV;
466                 break;
467         default:
468                 return -ENOENT;
469         }
470
471         return 0;
472 }
473
474 static int
475 config_status(struct drm_i915_private *i915, u64 config)
476 {
477         switch (config) {
478         case I915_PMU_ACTUAL_FREQUENCY:
479                 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
480                         /* Requires a mutex for sampling! */
481                         return -ENODEV;
482                 /* Fall-through. */
483         case I915_PMU_REQUESTED_FREQUENCY:
484                 if (INTEL_GEN(i915) < 6)
485                         return -ENODEV;
486                 break;
487         case I915_PMU_INTERRUPTS:
488                 break;
489         case I915_PMU_RC6_RESIDENCY:
490                 if (!HAS_RC6(i915))
491                         return -ENODEV;
492                 break;
493         default:
494                 return -ENOENT;
495         }
496
497         return 0;
498 }
499
500 static int engine_event_init(struct perf_event *event)
501 {
502         struct drm_i915_private *i915 =
503                 container_of(event->pmu, typeof(*i915), pmu.base);
504         struct intel_engine_cs *engine;
505         u8 sample;
506         int ret;
507
508         engine = intel_engine_lookup_user(i915, engine_event_class(event),
509                                           engine_event_instance(event));
510         if (!engine)
511                 return -ENODEV;
512
513         sample = engine_event_sample(event);
514         ret = engine_event_status(engine, sample);
515         if (ret)
516                 return ret;
517
518         if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
519                 ret = intel_enable_engine_stats(engine);
520
521         return ret;
522 }
523
524 static int i915_pmu_event_init(struct perf_event *event)
525 {
526         struct drm_i915_private *i915 =
527                 container_of(event->pmu, typeof(*i915), pmu.base);
528         int ret;
529
530         if (event->attr.type != event->pmu->type)
531                 return -ENOENT;
532
533         /* unsupported modes and filters */
534         if (event->attr.sample_period) /* no sampling */
535                 return -EINVAL;
536
537         if (has_branch_stack(event))
538                 return -EOPNOTSUPP;
539
540         if (event->cpu < 0)
541                 return -EINVAL;
542
543         /* only allow running on one cpu at a time */
544         if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
545                 return -EINVAL;
546
547         if (is_engine_event(event))
548                 ret = engine_event_init(event);
549         else
550                 ret = config_status(i915, event->attr.config);
551         if (ret)
552                 return ret;
553
554         if (!event->parent)
555                 event->destroy = i915_pmu_event_destroy;
556
557         return 0;
558 }
559
560 static u64 __i915_pmu_event_read(struct perf_event *event)
561 {
562         struct drm_i915_private *i915 =
563                 container_of(event->pmu, typeof(*i915), pmu.base);
564         struct i915_pmu *pmu = &i915->pmu;
565         u64 val = 0;
566
567         if (is_engine_event(event)) {
568                 u8 sample = engine_event_sample(event);
569                 struct intel_engine_cs *engine;
570
571                 engine = intel_engine_lookup_user(i915,
572                                                   engine_event_class(event),
573                                                   engine_event_instance(event));
574
575                 if (WARN_ON_ONCE(!engine)) {
576                         /* Do nothing */
577                 } else if (sample == I915_SAMPLE_BUSY &&
578                            intel_engine_supports_stats(engine)) {
579                         val = ktime_to_ns(intel_engine_get_busy_time(engine));
580                 } else {
581                         val = engine->pmu.sample[sample].cur;
582                 }
583         } else {
584                 switch (event->attr.config) {
585                 case I915_PMU_ACTUAL_FREQUENCY:
586                         val =
587                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
588                                    USEC_PER_SEC /* to MHz */);
589                         break;
590                 case I915_PMU_REQUESTED_FREQUENCY:
591                         val =
592                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
593                                    USEC_PER_SEC /* to MHz */);
594                         break;
595                 case I915_PMU_INTERRUPTS:
596                         val = count_interrupts(i915);
597                         break;
598                 case I915_PMU_RC6_RESIDENCY:
599                         val = get_rc6(&i915->gt);
600                         break;
601                 }
602         }
603
604         return val;
605 }
606
607 static void i915_pmu_event_read(struct perf_event *event)
608 {
609         struct hw_perf_event *hwc = &event->hw;
610         u64 prev, new;
611
612 again:
613         prev = local64_read(&hwc->prev_count);
614         new = __i915_pmu_event_read(event);
615
616         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
617                 goto again;
618
619         local64_add(new - prev, &event->count);
620 }
621
622 static void i915_pmu_enable(struct perf_event *event)
623 {
624         struct drm_i915_private *i915 =
625                 container_of(event->pmu, typeof(*i915), pmu.base);
626         unsigned int bit = event_enabled_bit(event);
627         struct i915_pmu *pmu = &i915->pmu;
628         unsigned long flags;
629
630         spin_lock_irqsave(&pmu->lock, flags);
631
632         /*
633          * Update the bitmask of enabled events and increment
634          * the event reference counter.
635          */
636         BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
637         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
638         GEM_BUG_ON(pmu->enable_count[bit] == ~0);
639         pmu->enable |= BIT_ULL(bit);
640         pmu->enable_count[bit]++;
641
642         /*
643          * Start the sampling timer if needed and not already enabled.
644          */
645         __i915_pmu_maybe_start_timer(pmu);
646
647         /*
648          * For per-engine events the bitmask and reference counting
649          * is stored per engine.
650          */
651         if (is_engine_event(event)) {
652                 u8 sample = engine_event_sample(event);
653                 struct intel_engine_cs *engine;
654
655                 engine = intel_engine_lookup_user(i915,
656                                                   engine_event_class(event),
657                                                   engine_event_instance(event));
658
659                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
660                              I915_ENGINE_SAMPLE_COUNT);
661                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
662                              I915_ENGINE_SAMPLE_COUNT);
663                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
664                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
665                 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
666
667                 engine->pmu.enable |= BIT(sample);
668                 engine->pmu.enable_count[sample]++;
669         }
670
671         spin_unlock_irqrestore(&pmu->lock, flags);
672
673         /*
674          * Store the current counter value so we can report the correct delta
675          * for all listeners. Even when the event was already enabled and has
676          * an existing non-zero value.
677          */
678         local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
679 }
680
681 static void i915_pmu_disable(struct perf_event *event)
682 {
683         struct drm_i915_private *i915 =
684                 container_of(event->pmu, typeof(*i915), pmu.base);
685         unsigned int bit = event_enabled_bit(event);
686         struct i915_pmu *pmu = &i915->pmu;
687         unsigned long flags;
688
689         spin_lock_irqsave(&pmu->lock, flags);
690
691         if (is_engine_event(event)) {
692                 u8 sample = engine_event_sample(event);
693                 struct intel_engine_cs *engine;
694
695                 engine = intel_engine_lookup_user(i915,
696                                                   engine_event_class(event),
697                                                   engine_event_instance(event));
698
699                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
700                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
701                 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
702
703                 /*
704                  * Decrement the reference count and clear the enabled
705                  * bitmask when the last listener on an event goes away.
706                  */
707                 if (--engine->pmu.enable_count[sample] == 0)
708                         engine->pmu.enable &= ~BIT(sample);
709         }
710
711         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
712         GEM_BUG_ON(pmu->enable_count[bit] == 0);
713         /*
714          * Decrement the reference count and clear the enabled
715          * bitmask when the last listener on an event goes away.
716          */
717         if (--pmu->enable_count[bit] == 0) {
718                 pmu->enable &= ~BIT_ULL(bit);
719                 pmu->timer_enabled &= pmu_needs_timer(pmu, true);
720         }
721
722         spin_unlock_irqrestore(&pmu->lock, flags);
723 }
724
725 static void i915_pmu_event_start(struct perf_event *event, int flags)
726 {
727         i915_pmu_enable(event);
728         event->hw.state = 0;
729 }
730
731 static void i915_pmu_event_stop(struct perf_event *event, int flags)
732 {
733         if (flags & PERF_EF_UPDATE)
734                 i915_pmu_event_read(event);
735         i915_pmu_disable(event);
736         event->hw.state = PERF_HES_STOPPED;
737 }
738
739 static int i915_pmu_event_add(struct perf_event *event, int flags)
740 {
741         if (flags & PERF_EF_START)
742                 i915_pmu_event_start(event, flags);
743
744         return 0;
745 }
746
747 static void i915_pmu_event_del(struct perf_event *event, int flags)
748 {
749         i915_pmu_event_stop(event, PERF_EF_UPDATE);
750 }
751
752 static int i915_pmu_event_event_idx(struct perf_event *event)
753 {
754         return 0;
755 }
756
757 struct i915_str_attribute {
758         struct device_attribute attr;
759         const char *str;
760 };
761
762 static ssize_t i915_pmu_format_show(struct device *dev,
763                                     struct device_attribute *attr, char *buf)
764 {
765         struct i915_str_attribute *eattr;
766
767         eattr = container_of(attr, struct i915_str_attribute, attr);
768         return sprintf(buf, "%s\n", eattr->str);
769 }
770
771 #define I915_PMU_FORMAT_ATTR(_name, _config) \
772         (&((struct i915_str_attribute[]) { \
773                 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
774                   .str = _config, } \
775         })[0].attr.attr)
776
777 static struct attribute *i915_pmu_format_attrs[] = {
778         I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
779         NULL,
780 };
781
782 static const struct attribute_group i915_pmu_format_attr_group = {
783         .name = "format",
784         .attrs = i915_pmu_format_attrs,
785 };
786
787 struct i915_ext_attribute {
788         struct device_attribute attr;
789         unsigned long val;
790 };
791
792 static ssize_t i915_pmu_event_show(struct device *dev,
793                                    struct device_attribute *attr, char *buf)
794 {
795         struct i915_ext_attribute *eattr;
796
797         eattr = container_of(attr, struct i915_ext_attribute, attr);
798         return sprintf(buf, "config=0x%lx\n", eattr->val);
799 }
800
801 static struct attribute_group i915_pmu_events_attr_group = {
802         .name = "events",
803         /* Patch in attrs at runtime. */
804 };
805
806 static ssize_t
807 i915_pmu_get_attr_cpumask(struct device *dev,
808                           struct device_attribute *attr,
809                           char *buf)
810 {
811         return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
812 }
813
814 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
815
816 static struct attribute *i915_cpumask_attrs[] = {
817         &dev_attr_cpumask.attr,
818         NULL,
819 };
820
821 static const struct attribute_group i915_pmu_cpumask_attr_group = {
822         .attrs = i915_cpumask_attrs,
823 };
824
825 static const struct attribute_group *i915_pmu_attr_groups[] = {
826         &i915_pmu_format_attr_group,
827         &i915_pmu_events_attr_group,
828         &i915_pmu_cpumask_attr_group,
829         NULL
830 };
831
832 #define __event(__config, __name, __unit) \
833 { \
834         .config = (__config), \
835         .name = (__name), \
836         .unit = (__unit), \
837 }
838
839 #define __engine_event(__sample, __name) \
840 { \
841         .sample = (__sample), \
842         .name = (__name), \
843 }
844
845 static struct i915_ext_attribute *
846 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
847 {
848         sysfs_attr_init(&attr->attr.attr);
849         attr->attr.attr.name = name;
850         attr->attr.attr.mode = 0444;
851         attr->attr.show = i915_pmu_event_show;
852         attr->val = config;
853
854         return ++attr;
855 }
856
857 static struct perf_pmu_events_attr *
858 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
859              const char *str)
860 {
861         sysfs_attr_init(&attr->attr.attr);
862         attr->attr.attr.name = name;
863         attr->attr.attr.mode = 0444;
864         attr->attr.show = perf_event_sysfs_show;
865         attr->event_str = str;
866
867         return ++attr;
868 }
869
870 static struct attribute **
871 create_event_attributes(struct i915_pmu *pmu)
872 {
873         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
874         static const struct {
875                 u64 config;
876                 const char *name;
877                 const char *unit;
878         } events[] = {
879                 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
880                 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
881                 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
882                 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
883         };
884         static const struct {
885                 enum drm_i915_pmu_engine_sample sample;
886                 char *name;
887         } engine_events[] = {
888                 __engine_event(I915_SAMPLE_BUSY, "busy"),
889                 __engine_event(I915_SAMPLE_SEMA, "sema"),
890                 __engine_event(I915_SAMPLE_WAIT, "wait"),
891         };
892         unsigned int count = 0;
893         struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
894         struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
895         struct attribute **attr = NULL, **attr_iter;
896         struct intel_engine_cs *engine;
897         unsigned int i;
898
899         /* Count how many counters we will be exposing. */
900         for (i = 0; i < ARRAY_SIZE(events); i++) {
901                 if (!config_status(i915, events[i].config))
902                         count++;
903         }
904
905         for_each_uabi_engine(engine, i915) {
906                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
907                         if (!engine_event_status(engine,
908                                                  engine_events[i].sample))
909                                 count++;
910                 }
911         }
912
913         /* Allocate attribute objects and table. */
914         i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
915         if (!i915_attr)
916                 goto err_alloc;
917
918         pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
919         if (!pmu_attr)
920                 goto err_alloc;
921
922         /* Max one pointer of each attribute type plus a termination entry. */
923         attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
924         if (!attr)
925                 goto err_alloc;
926
927         i915_iter = i915_attr;
928         pmu_iter = pmu_attr;
929         attr_iter = attr;
930
931         /* Initialize supported non-engine counters. */
932         for (i = 0; i < ARRAY_SIZE(events); i++) {
933                 char *str;
934
935                 if (config_status(i915, events[i].config))
936                         continue;
937
938                 str = kstrdup(events[i].name, GFP_KERNEL);
939                 if (!str)
940                         goto err;
941
942                 *attr_iter++ = &i915_iter->attr.attr;
943                 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
944
945                 if (events[i].unit) {
946                         str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
947                         if (!str)
948                                 goto err;
949
950                         *attr_iter++ = &pmu_iter->attr.attr;
951                         pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
952                 }
953         }
954
955         /* Initialize supported engine counters. */
956         for_each_uabi_engine(engine, i915) {
957                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
958                         char *str;
959
960                         if (engine_event_status(engine,
961                                                 engine_events[i].sample))
962                                 continue;
963
964                         str = kasprintf(GFP_KERNEL, "%s-%s",
965                                         engine->name, engine_events[i].name);
966                         if (!str)
967                                 goto err;
968
969                         *attr_iter++ = &i915_iter->attr.attr;
970                         i915_iter =
971                                 add_i915_attr(i915_iter, str,
972                                               __I915_PMU_ENGINE(engine->uabi_class,
973                                                                 engine->uabi_instance,
974                                                                 engine_events[i].sample));
975
976                         str = kasprintf(GFP_KERNEL, "%s-%s.unit",
977                                         engine->name, engine_events[i].name);
978                         if (!str)
979                                 goto err;
980
981                         *attr_iter++ = &pmu_iter->attr.attr;
982                         pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
983                 }
984         }
985
986         pmu->i915_attr = i915_attr;
987         pmu->pmu_attr = pmu_attr;
988
989         return attr;
990
991 err:;
992         for (attr_iter = attr; *attr_iter; attr_iter++)
993                 kfree((*attr_iter)->name);
994
995 err_alloc:
996         kfree(attr);
997         kfree(i915_attr);
998         kfree(pmu_attr);
999
1000         return NULL;
1001 }
1002
1003 static void free_event_attributes(struct i915_pmu *pmu)
1004 {
1005         struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
1006
1007         for (; *attr_iter; attr_iter++)
1008                 kfree((*attr_iter)->name);
1009
1010         kfree(i915_pmu_events_attr_group.attrs);
1011         kfree(pmu->i915_attr);
1012         kfree(pmu->pmu_attr);
1013
1014         i915_pmu_events_attr_group.attrs = NULL;
1015         pmu->i915_attr = NULL;
1016         pmu->pmu_attr = NULL;
1017 }
1018
1019 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1020 {
1021         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
1022
1023         GEM_BUG_ON(!pmu->base.event_init);
1024
1025         /* Select the first online CPU as a designated reader. */
1026         if (!cpumask_weight(&i915_pmu_cpumask))
1027                 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1028
1029         return 0;
1030 }
1031
1032 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1033 {
1034         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
1035         unsigned int target;
1036
1037         GEM_BUG_ON(!pmu->base.event_init);
1038
1039         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1040                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1041                 /* Migrate events if there is a valid target */
1042                 if (target < nr_cpu_ids) {
1043                         cpumask_set_cpu(target, &i915_pmu_cpumask);
1044                         perf_pmu_migrate_context(&pmu->base, cpu, target);
1045                 }
1046         }
1047
1048         return 0;
1049 }
1050
1051 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1052
1053 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1054 {
1055         enum cpuhp_state slot;
1056         int ret;
1057
1058         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1059                                       "perf/x86/intel/i915:online",
1060                                       i915_pmu_cpu_online,
1061                                       i915_pmu_cpu_offline);
1062         if (ret < 0)
1063                 return ret;
1064
1065         slot = ret;
1066         ret = cpuhp_state_add_instance(slot, &pmu->node);
1067         if (ret) {
1068                 cpuhp_remove_multi_state(slot);
1069                 return ret;
1070         }
1071
1072         cpuhp_slot = slot;
1073         return 0;
1074 }
1075
1076 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1077 {
1078         WARN_ON(cpuhp_slot == CPUHP_INVALID);
1079         WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node));
1080         cpuhp_remove_multi_state(cpuhp_slot);
1081 }
1082
1083 static bool is_igp(struct drm_i915_private *i915)
1084 {
1085         struct pci_dev *pdev = i915->drm.pdev;
1086
1087         /* IGP is 0000:00:02.0 */
1088         return pci_domain_nr(pdev->bus) == 0 &&
1089                pdev->bus->number == 0 &&
1090                PCI_SLOT(pdev->devfn) == 2 &&
1091                PCI_FUNC(pdev->devfn) == 0;
1092 }
1093
1094 void i915_pmu_register(struct drm_i915_private *i915)
1095 {
1096         struct i915_pmu *pmu = &i915->pmu;
1097         int ret;
1098
1099         if (INTEL_GEN(i915) <= 2) {
1100                 dev_info(i915->drm.dev, "PMU not supported for this GPU.");
1101                 return;
1102         }
1103
1104         i915_pmu_events_attr_group.attrs = create_event_attributes(pmu);
1105         if (!i915_pmu_events_attr_group.attrs) {
1106                 ret = -ENOMEM;
1107                 goto err;
1108         }
1109
1110         pmu->base.attr_groups   = i915_pmu_attr_groups;
1111         pmu->base.task_ctx_nr   = perf_invalid_context;
1112         pmu->base.event_init    = i915_pmu_event_init;
1113         pmu->base.add           = i915_pmu_event_add;
1114         pmu->base.del           = i915_pmu_event_del;
1115         pmu->base.start         = i915_pmu_event_start;
1116         pmu->base.stop          = i915_pmu_event_stop;
1117         pmu->base.read          = i915_pmu_event_read;
1118         pmu->base.event_idx     = i915_pmu_event_event_idx;
1119
1120         spin_lock_init(&pmu->lock);
1121         hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1122         pmu->timer.function = i915_sample;
1123
1124         if (!is_igp(i915))
1125                 pmu->name = kasprintf(GFP_KERNEL,
1126                                       "i915-%s",
1127                                       dev_name(i915->drm.dev));
1128         else
1129                 pmu->name = "i915";
1130         if (!pmu->name)
1131                 goto err;
1132
1133         ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1134         if (ret)
1135                 goto err_name;
1136
1137         ret = i915_pmu_register_cpuhp_state(pmu);
1138         if (ret)
1139                 goto err_unreg;
1140
1141         return;
1142
1143 err_unreg:
1144         perf_pmu_unregister(&pmu->base);
1145 err_name:
1146         if (!is_igp(i915))
1147                 kfree(pmu->name);
1148 err:
1149         pmu->base.event_init = NULL;
1150         free_event_attributes(pmu);
1151         DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1152 }
1153
1154 void i915_pmu_unregister(struct drm_i915_private *i915)
1155 {
1156         struct i915_pmu *pmu = &i915->pmu;
1157
1158         if (!pmu->base.event_init)
1159                 return;
1160
1161         WARN_ON(pmu->enable);
1162
1163         hrtimer_cancel(&pmu->timer);
1164
1165         i915_pmu_unregister_cpuhp_state(pmu);
1166
1167         perf_pmu_unregister(&pmu->base);
1168         pmu->base.event_init = NULL;
1169         if (!is_igp(i915))
1170                 kfree(pmu->name);
1171         free_event_attributes(pmu);
1172 }