921bbf732e77c9fb15e40a4139c5c99f6a11ce8e
[platform/adaptation/renesas_rcar/renesas_kernel.git] / arch / x86 / kernel / cpu / perf_event.c
1 /*
2  * Performance events x86 architecture code
3  *
4  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6  *  Copyright (C) 2009 Jaswinder Singh Rajput
7  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10  *  Copyright (C) 2009 Google, Inc., Stephane Eranian
11  *
12  *  For licencing details see kernel-base/COPYING
13  */
14
15 #include <linux/perf_event.h>
16 #include <linux/capability.h>
17 #include <linux/notifier.h>
18 #include <linux/hardirq.h>
19 #include <linux/kprobes.h>
20 #include <linux/module.h>
21 #include <linux/kdebug.h>
22 #include <linux/sched.h>
23 #include <linux/uaccess.h>
24 #include <linux/highmem.h>
25 #include <linux/cpu.h>
26
27 #include <asm/apic.h>
28 #include <asm/stacktrace.h>
29 #include <asm/nmi.h>
30
31 static u64 perf_event_mask __read_mostly;
32
33 /* The maximal number of PEBS events: */
34 #define MAX_PEBS_EVENTS 4
35
36 /* The size of a BTS record in bytes: */
37 #define BTS_RECORD_SIZE         24
38
39 /* The size of a per-cpu BTS buffer in bytes: */
40 #define BTS_BUFFER_SIZE         (BTS_RECORD_SIZE * 2048)
41
42 /* The BTS overflow threshold in bytes from the end of the buffer: */
43 #define BTS_OVFL_TH             (BTS_RECORD_SIZE * 128)
44
45
46 /*
47  * Bits in the debugctlmsr controlling branch tracing.
48  */
49 #define X86_DEBUGCTL_TR                 (1 << 6)
50 #define X86_DEBUGCTL_BTS                (1 << 7)
51 #define X86_DEBUGCTL_BTINT              (1 << 8)
52 #define X86_DEBUGCTL_BTS_OFF_OS         (1 << 9)
53 #define X86_DEBUGCTL_BTS_OFF_USR        (1 << 10)
54
55 /*
56  * A debug store configuration.
57  *
58  * We only support architectures that use 64bit fields.
59  */
60 struct debug_store {
61         u64     bts_buffer_base;
62         u64     bts_index;
63         u64     bts_absolute_maximum;
64         u64     bts_interrupt_threshold;
65         u64     pebs_buffer_base;
66         u64     pebs_index;
67         u64     pebs_absolute_maximum;
68         u64     pebs_interrupt_threshold;
69         u64     pebs_event_reset[MAX_PEBS_EVENTS];
70 };
71
72 struct event_constraint {
73         union {
74                 unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75                 u64             idxmsk64[1];
76         };
77         int     code;
78         int     cmask;
79 };
80
81 struct cpu_hw_events {
82         struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
83         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
84         unsigned long           interrupts;
85         int                     enabled;
86         struct debug_store      *ds;
87
88         int                     n_events;
89         int                     n_added;
90         int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
91         struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
92 };
93
94 #define EVENT_CONSTRAINT(c, n, m) {     \
95         { .idxmsk64[0] = (n) },         \
96         .code = (c),                    \
97         .cmask = (m),                   \
98 }
99
100 #define EVENT_CONSTRAINT_END \
101         EVENT_CONSTRAINT(0, 0, 0)
102
103 #define for_each_event_constraint(e, c) \
104         for ((e) = (c); (e)->cmask; (e)++)
105
106 /*
107  * struct x86_pmu - generic x86 pmu
108  */
109 struct x86_pmu {
110         const char      *name;
111         int             version;
112         int             (*handle_irq)(struct pt_regs *);
113         void            (*disable_all)(void);
114         void            (*enable_all)(void);
115         void            (*enable)(struct hw_perf_event *, int);
116         void            (*disable)(struct hw_perf_event *, int);
117         unsigned        eventsel;
118         unsigned        perfctr;
119         u64             (*event_map)(int);
120         u64             (*raw_event)(u64);
121         int             max_events;
122         int             num_events;
123         int             num_events_fixed;
124         int             event_bits;
125         u64             event_mask;
126         int             apic;
127         u64             max_period;
128         u64             intel_ctrl;
129         void            (*enable_bts)(u64 config);
130         void            (*disable_bts)(void);
131         void            (*get_event_constraints)(struct cpu_hw_events *cpuc,
132                                                  struct perf_event *event,
133                                                  unsigned long *idxmsk);
134         void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
135                                                  struct perf_event *event);
136         const struct event_constraint *event_constraints;
137 };
138
139 static struct x86_pmu x86_pmu __read_mostly;
140
141 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
142         .enabled = 1,
143 };
144
145 static int x86_perf_event_set_period(struct perf_event *event,
146                              struct hw_perf_event *hwc, int idx);
147
148 /*
149  * Not sure about some of these
150  */
151 static const u64 p6_perfmon_event_map[] =
152 {
153   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0079,
154   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
155   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0f2e,
156   [PERF_COUNT_HW_CACHE_MISSES]          = 0x012e,
157   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
158   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
159   [PERF_COUNT_HW_BUS_CYCLES]            = 0x0062,
160 };
161
162 static u64 p6_pmu_event_map(int hw_event)
163 {
164         return p6_perfmon_event_map[hw_event];
165 }
166
167 /*
168  * Event setting that is specified not to count anything.
169  * We use this to effectively disable a counter.
170  *
171  * L2_RQSTS with 0 MESI unit mask.
172  */
173 #define P6_NOP_EVENT                    0x0000002EULL
174
175 static u64 p6_pmu_raw_event(u64 hw_event)
176 {
177 #define P6_EVNTSEL_EVENT_MASK           0x000000FFULL
178 #define P6_EVNTSEL_UNIT_MASK            0x0000FF00ULL
179 #define P6_EVNTSEL_EDGE_MASK            0x00040000ULL
180 #define P6_EVNTSEL_INV_MASK             0x00800000ULL
181 #define P6_EVNTSEL_REG_MASK             0xFF000000ULL
182
183 #define P6_EVNTSEL_MASK                 \
184         (P6_EVNTSEL_EVENT_MASK |        \
185          P6_EVNTSEL_UNIT_MASK  |        \
186          P6_EVNTSEL_EDGE_MASK  |        \
187          P6_EVNTSEL_INV_MASK   |        \
188          P6_EVNTSEL_REG_MASK)
189
190         return hw_event & P6_EVNTSEL_MASK;
191 }
192
193 static struct event_constraint intel_p6_event_constraints[] =
194 {
195         EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK),     /* FLOPS */
196         EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK),     /* FP_COMP_OPS_EXE */
197         EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK),     /* FP_ASSIST */
198         EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK),     /* MUL */
199         EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK),     /* DIV */
200         EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK),     /* CYCLES_DIV_BUSY */
201         EVENT_CONSTRAINT_END
202 };
203
204 /*
205  * Intel PerfMon v3. Used on Core2 and later.
206  */
207 static const u64 intel_perfmon_event_map[] =
208 {
209   [PERF_COUNT_HW_CPU_CYCLES]            = 0x003c,
210   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
211   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x4f2e,
212   [PERF_COUNT_HW_CACHE_MISSES]          = 0x412e,
213   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
214   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
215   [PERF_COUNT_HW_BUS_CYCLES]            = 0x013c,
216 };
217
218 static struct event_constraint intel_core_event_constraints[] =
219 {
220         EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
221         EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
222         EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */
223         EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */
224         EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */
225         EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */
226         EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */
227         EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */
228         EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */
229         EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */
230         EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */
231         EVENT_CONSTRAINT_END
232 };
233
234 static struct event_constraint intel_nehalem_event_constraints[] =
235 {
236         EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
237         EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
238         EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */
239         EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */
240         EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */
241         EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */
242         EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */
243         EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */
244         EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */
245         EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
246         EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */
247         EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */
248         EVENT_CONSTRAINT_END
249 };
250
251 static struct event_constraint intel_gen_event_constraints[] =
252 {
253         EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
254         EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
255         EVENT_CONSTRAINT_END
256 };
257
258 static u64 intel_pmu_event_map(int hw_event)
259 {
260         return intel_perfmon_event_map[hw_event];
261 }
262
263 /*
264  * Generalized hw caching related hw_event table, filled
265  * in on a per model basis. A value of 0 means
266  * 'not supported', -1 means 'hw_event makes no sense on
267  * this CPU', any other value means the raw hw_event
268  * ID.
269  */
270
271 #define C(x) PERF_COUNT_HW_CACHE_##x
272
273 static u64 __read_mostly hw_cache_event_ids
274                                 [PERF_COUNT_HW_CACHE_MAX]
275                                 [PERF_COUNT_HW_CACHE_OP_MAX]
276                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
277
278 static __initconst u64 nehalem_hw_cache_event_ids
279                                 [PERF_COUNT_HW_CACHE_MAX]
280                                 [PERF_COUNT_HW_CACHE_OP_MAX]
281                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
282 {
283  [ C(L1D) ] = {
284         [ C(OP_READ) ] = {
285                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
286                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
287         },
288         [ C(OP_WRITE) ] = {
289                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
290                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
291         },
292         [ C(OP_PREFETCH) ] = {
293                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
294                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
295         },
296  },
297  [ C(L1I ) ] = {
298         [ C(OP_READ) ] = {
299                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
300                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
301         },
302         [ C(OP_WRITE) ] = {
303                 [ C(RESULT_ACCESS) ] = -1,
304                 [ C(RESULT_MISS)   ] = -1,
305         },
306         [ C(OP_PREFETCH) ] = {
307                 [ C(RESULT_ACCESS) ] = 0x0,
308                 [ C(RESULT_MISS)   ] = 0x0,
309         },
310  },
311  [ C(LL  ) ] = {
312         [ C(OP_READ) ] = {
313                 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
314                 [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
315         },
316         [ C(OP_WRITE) ] = {
317                 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
318                 [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
319         },
320         [ C(OP_PREFETCH) ] = {
321                 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
322                 [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
323         },
324  },
325  [ C(DTLB) ] = {
326         [ C(OP_READ) ] = {
327                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
328                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
329         },
330         [ C(OP_WRITE) ] = {
331                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
332                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
333         },
334         [ C(OP_PREFETCH) ] = {
335                 [ C(RESULT_ACCESS) ] = 0x0,
336                 [ C(RESULT_MISS)   ] = 0x0,
337         },
338  },
339  [ C(ITLB) ] = {
340         [ C(OP_READ) ] = {
341                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
342                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
343         },
344         [ C(OP_WRITE) ] = {
345                 [ C(RESULT_ACCESS) ] = -1,
346                 [ C(RESULT_MISS)   ] = -1,
347         },
348         [ C(OP_PREFETCH) ] = {
349                 [ C(RESULT_ACCESS) ] = -1,
350                 [ C(RESULT_MISS)   ] = -1,
351         },
352  },
353  [ C(BPU ) ] = {
354         [ C(OP_READ) ] = {
355                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
356                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
357         },
358         [ C(OP_WRITE) ] = {
359                 [ C(RESULT_ACCESS) ] = -1,
360                 [ C(RESULT_MISS)   ] = -1,
361         },
362         [ C(OP_PREFETCH) ] = {
363                 [ C(RESULT_ACCESS) ] = -1,
364                 [ C(RESULT_MISS)   ] = -1,
365         },
366  },
367 };
368
369 static __initconst u64 core2_hw_cache_event_ids
370                                 [PERF_COUNT_HW_CACHE_MAX]
371                                 [PERF_COUNT_HW_CACHE_OP_MAX]
372                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
373 {
374  [ C(L1D) ] = {
375         [ C(OP_READ) ] = {
376                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
377                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
378         },
379         [ C(OP_WRITE) ] = {
380                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
381                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
382         },
383         [ C(OP_PREFETCH) ] = {
384                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
385                 [ C(RESULT_MISS)   ] = 0,
386         },
387  },
388  [ C(L1I ) ] = {
389         [ C(OP_READ) ] = {
390                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
391                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
392         },
393         [ C(OP_WRITE) ] = {
394                 [ C(RESULT_ACCESS) ] = -1,
395                 [ C(RESULT_MISS)   ] = -1,
396         },
397         [ C(OP_PREFETCH) ] = {
398                 [ C(RESULT_ACCESS) ] = 0,
399                 [ C(RESULT_MISS)   ] = 0,
400         },
401  },
402  [ C(LL  ) ] = {
403         [ C(OP_READ) ] = {
404                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
405                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
406         },
407         [ C(OP_WRITE) ] = {
408                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
409                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
410         },
411         [ C(OP_PREFETCH) ] = {
412                 [ C(RESULT_ACCESS) ] = 0,
413                 [ C(RESULT_MISS)   ] = 0,
414         },
415  },
416  [ C(DTLB) ] = {
417         [ C(OP_READ) ] = {
418                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
419                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
420         },
421         [ C(OP_WRITE) ] = {
422                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
423                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
424         },
425         [ C(OP_PREFETCH) ] = {
426                 [ C(RESULT_ACCESS) ] = 0,
427                 [ C(RESULT_MISS)   ] = 0,
428         },
429  },
430  [ C(ITLB) ] = {
431         [ C(OP_READ) ] = {
432                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
433                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
434         },
435         [ C(OP_WRITE) ] = {
436                 [ C(RESULT_ACCESS) ] = -1,
437                 [ C(RESULT_MISS)   ] = -1,
438         },
439         [ C(OP_PREFETCH) ] = {
440                 [ C(RESULT_ACCESS) ] = -1,
441                 [ C(RESULT_MISS)   ] = -1,
442         },
443  },
444  [ C(BPU ) ] = {
445         [ C(OP_READ) ] = {
446                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
447                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
448         },
449         [ C(OP_WRITE) ] = {
450                 [ C(RESULT_ACCESS) ] = -1,
451                 [ C(RESULT_MISS)   ] = -1,
452         },
453         [ C(OP_PREFETCH) ] = {
454                 [ C(RESULT_ACCESS) ] = -1,
455                 [ C(RESULT_MISS)   ] = -1,
456         },
457  },
458 };
459
460 static __initconst u64 atom_hw_cache_event_ids
461                                 [PERF_COUNT_HW_CACHE_MAX]
462                                 [PERF_COUNT_HW_CACHE_OP_MAX]
463                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
464 {
465  [ C(L1D) ] = {
466         [ C(OP_READ) ] = {
467                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
468                 [ C(RESULT_MISS)   ] = 0,
469         },
470         [ C(OP_WRITE) ] = {
471                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
472                 [ C(RESULT_MISS)   ] = 0,
473         },
474         [ C(OP_PREFETCH) ] = {
475                 [ C(RESULT_ACCESS) ] = 0x0,
476                 [ C(RESULT_MISS)   ] = 0,
477         },
478  },
479  [ C(L1I ) ] = {
480         [ C(OP_READ) ] = {
481                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
482                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
483         },
484         [ C(OP_WRITE) ] = {
485                 [ C(RESULT_ACCESS) ] = -1,
486                 [ C(RESULT_MISS)   ] = -1,
487         },
488         [ C(OP_PREFETCH) ] = {
489                 [ C(RESULT_ACCESS) ] = 0,
490                 [ C(RESULT_MISS)   ] = 0,
491         },
492  },
493  [ C(LL  ) ] = {
494         [ C(OP_READ) ] = {
495                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
496                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
497         },
498         [ C(OP_WRITE) ] = {
499                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
500                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
501         },
502         [ C(OP_PREFETCH) ] = {
503                 [ C(RESULT_ACCESS) ] = 0,
504                 [ C(RESULT_MISS)   ] = 0,
505         },
506  },
507  [ C(DTLB) ] = {
508         [ C(OP_READ) ] = {
509                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
510                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
511         },
512         [ C(OP_WRITE) ] = {
513                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
514                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
515         },
516         [ C(OP_PREFETCH) ] = {
517                 [ C(RESULT_ACCESS) ] = 0,
518                 [ C(RESULT_MISS)   ] = 0,
519         },
520  },
521  [ C(ITLB) ] = {
522         [ C(OP_READ) ] = {
523                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
524                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
525         },
526         [ C(OP_WRITE) ] = {
527                 [ C(RESULT_ACCESS) ] = -1,
528                 [ C(RESULT_MISS)   ] = -1,
529         },
530         [ C(OP_PREFETCH) ] = {
531                 [ C(RESULT_ACCESS) ] = -1,
532                 [ C(RESULT_MISS)   ] = -1,
533         },
534  },
535  [ C(BPU ) ] = {
536         [ C(OP_READ) ] = {
537                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
538                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
539         },
540         [ C(OP_WRITE) ] = {
541                 [ C(RESULT_ACCESS) ] = -1,
542                 [ C(RESULT_MISS)   ] = -1,
543         },
544         [ C(OP_PREFETCH) ] = {
545                 [ C(RESULT_ACCESS) ] = -1,
546                 [ C(RESULT_MISS)   ] = -1,
547         },
548  },
549 };
550
551 static u64 intel_pmu_raw_event(u64 hw_event)
552 {
553 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
554 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
555 #define CORE_EVNTSEL_EDGE_MASK          0x00040000ULL
556 #define CORE_EVNTSEL_INV_MASK           0x00800000ULL
557 #define CORE_EVNTSEL_REG_MASK           0xFF000000ULL
558
559 #define CORE_EVNTSEL_MASK               \
560         (INTEL_ARCH_EVTSEL_MASK |       \
561          INTEL_ARCH_UNIT_MASK   |       \
562          INTEL_ARCH_EDGE_MASK   |       \
563          INTEL_ARCH_INV_MASK    |       \
564          INTEL_ARCH_CNT_MASK)
565
566         return hw_event & CORE_EVNTSEL_MASK;
567 }
568
569 static __initconst u64 amd_hw_cache_event_ids
570                                 [PERF_COUNT_HW_CACHE_MAX]
571                                 [PERF_COUNT_HW_CACHE_OP_MAX]
572                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
573 {
574  [ C(L1D) ] = {
575         [ C(OP_READ) ] = {
576                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
577                 [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
578         },
579         [ C(OP_WRITE) ] = {
580                 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
581                 [ C(RESULT_MISS)   ] = 0,
582         },
583         [ C(OP_PREFETCH) ] = {
584                 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
585                 [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
586         },
587  },
588  [ C(L1I ) ] = {
589         [ C(OP_READ) ] = {
590                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
591                 [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
592         },
593         [ C(OP_WRITE) ] = {
594                 [ C(RESULT_ACCESS) ] = -1,
595                 [ C(RESULT_MISS)   ] = -1,
596         },
597         [ C(OP_PREFETCH) ] = {
598                 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
599                 [ C(RESULT_MISS)   ] = 0,
600         },
601  },
602  [ C(LL  ) ] = {
603         [ C(OP_READ) ] = {
604                 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
605                 [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
606         },
607         [ C(OP_WRITE) ] = {
608                 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
609                 [ C(RESULT_MISS)   ] = 0,
610         },
611         [ C(OP_PREFETCH) ] = {
612                 [ C(RESULT_ACCESS) ] = 0,
613                 [ C(RESULT_MISS)   ] = 0,
614         },
615  },
616  [ C(DTLB) ] = {
617         [ C(OP_READ) ] = {
618                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
619                 [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
620         },
621         [ C(OP_WRITE) ] = {
622                 [ C(RESULT_ACCESS) ] = 0,
623                 [ C(RESULT_MISS)   ] = 0,
624         },
625         [ C(OP_PREFETCH) ] = {
626                 [ C(RESULT_ACCESS) ] = 0,
627                 [ C(RESULT_MISS)   ] = 0,
628         },
629  },
630  [ C(ITLB) ] = {
631         [ C(OP_READ) ] = {
632                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
633                 [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
634         },
635         [ C(OP_WRITE) ] = {
636                 [ C(RESULT_ACCESS) ] = -1,
637                 [ C(RESULT_MISS)   ] = -1,
638         },
639         [ C(OP_PREFETCH) ] = {
640                 [ C(RESULT_ACCESS) ] = -1,
641                 [ C(RESULT_MISS)   ] = -1,
642         },
643  },
644  [ C(BPU ) ] = {
645         [ C(OP_READ) ] = {
646                 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
647                 [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
648         },
649         [ C(OP_WRITE) ] = {
650                 [ C(RESULT_ACCESS) ] = -1,
651                 [ C(RESULT_MISS)   ] = -1,
652         },
653         [ C(OP_PREFETCH) ] = {
654                 [ C(RESULT_ACCESS) ] = -1,
655                 [ C(RESULT_MISS)   ] = -1,
656         },
657  },
658 };
659
660 /*
661  * AMD Performance Monitor K7 and later.
662  */
663 static const u64 amd_perfmon_event_map[] =
664 {
665   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0076,
666   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
667   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0080,
668   [PERF_COUNT_HW_CACHE_MISSES]          = 0x0081,
669   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
670   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
671 };
672
673 static u64 amd_pmu_event_map(int hw_event)
674 {
675         return amd_perfmon_event_map[hw_event];
676 }
677
678 static u64 amd_pmu_raw_event(u64 hw_event)
679 {
680 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
681 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
682 #define K7_EVNTSEL_EDGE_MASK    0x000040000ULL
683 #define K7_EVNTSEL_INV_MASK     0x000800000ULL
684 #define K7_EVNTSEL_REG_MASK     0x0FF000000ULL
685
686 #define K7_EVNTSEL_MASK                 \
687         (K7_EVNTSEL_EVENT_MASK |        \
688          K7_EVNTSEL_UNIT_MASK  |        \
689          K7_EVNTSEL_EDGE_MASK  |        \
690          K7_EVNTSEL_INV_MASK   |        \
691          K7_EVNTSEL_REG_MASK)
692
693         return hw_event & K7_EVNTSEL_MASK;
694 }
695
696 /*
697  * Propagate event elapsed time into the generic event.
698  * Can only be executed on the CPU where the event is active.
699  * Returns the delta events processed.
700  */
701 static u64
702 x86_perf_event_update(struct perf_event *event,
703                         struct hw_perf_event *hwc, int idx)
704 {
705         int shift = 64 - x86_pmu.event_bits;
706         u64 prev_raw_count, new_raw_count;
707         s64 delta;
708
709         if (idx == X86_PMC_IDX_FIXED_BTS)
710                 return 0;
711
712         /*
713          * Careful: an NMI might modify the previous event value.
714          *
715          * Our tactic to handle this is to first atomically read and
716          * exchange a new raw count - then add that new-prev delta
717          * count to the generic event atomically:
718          */
719 again:
720         prev_raw_count = atomic64_read(&hwc->prev_count);
721         rdmsrl(hwc->event_base + idx, new_raw_count);
722
723         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
724                                         new_raw_count) != prev_raw_count)
725                 goto again;
726
727         /*
728          * Now we have the new raw value and have updated the prev
729          * timestamp already. We can now calculate the elapsed delta
730          * (event-)time and add that to the generic event.
731          *
732          * Careful, not all hw sign-extends above the physical width
733          * of the count.
734          */
735         delta = (new_raw_count << shift) - (prev_raw_count << shift);
736         delta >>= shift;
737
738         atomic64_add(delta, &event->count);
739         atomic64_sub(delta, &hwc->period_left);
740
741         return new_raw_count;
742 }
743
744 static atomic_t active_events;
745 static DEFINE_MUTEX(pmc_reserve_mutex);
746
747 static bool reserve_pmc_hardware(void)
748 {
749 #ifdef CONFIG_X86_LOCAL_APIC
750         int i;
751
752         if (nmi_watchdog == NMI_LOCAL_APIC)
753                 disable_lapic_nmi_watchdog();
754
755         for (i = 0; i < x86_pmu.num_events; i++) {
756                 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
757                         goto perfctr_fail;
758         }
759
760         for (i = 0; i < x86_pmu.num_events; i++) {
761                 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
762                         goto eventsel_fail;
763         }
764 #endif
765
766         return true;
767
768 #ifdef CONFIG_X86_LOCAL_APIC
769 eventsel_fail:
770         for (i--; i >= 0; i--)
771                 release_evntsel_nmi(x86_pmu.eventsel + i);
772
773         i = x86_pmu.num_events;
774
775 perfctr_fail:
776         for (i--; i >= 0; i--)
777                 release_perfctr_nmi(x86_pmu.perfctr + i);
778
779         if (nmi_watchdog == NMI_LOCAL_APIC)
780                 enable_lapic_nmi_watchdog();
781
782         return false;
783 #endif
784 }
785
786 static void release_pmc_hardware(void)
787 {
788 #ifdef CONFIG_X86_LOCAL_APIC
789         int i;
790
791         for (i = 0; i < x86_pmu.num_events; i++) {
792                 release_perfctr_nmi(x86_pmu.perfctr + i);
793                 release_evntsel_nmi(x86_pmu.eventsel + i);
794         }
795
796         if (nmi_watchdog == NMI_LOCAL_APIC)
797                 enable_lapic_nmi_watchdog();
798 #endif
799 }
800
801 static inline bool bts_available(void)
802 {
803         return x86_pmu.enable_bts != NULL;
804 }
805
806 static inline void init_debug_store_on_cpu(int cpu)
807 {
808         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
809
810         if (!ds)
811                 return;
812
813         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
814                      (u32)((u64)(unsigned long)ds),
815                      (u32)((u64)(unsigned long)ds >> 32));
816 }
817
818 static inline void fini_debug_store_on_cpu(int cpu)
819 {
820         if (!per_cpu(cpu_hw_events, cpu).ds)
821                 return;
822
823         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
824 }
825
826 static void release_bts_hardware(void)
827 {
828         int cpu;
829
830         if (!bts_available())
831                 return;
832
833         get_online_cpus();
834
835         for_each_online_cpu(cpu)
836                 fini_debug_store_on_cpu(cpu);
837
838         for_each_possible_cpu(cpu) {
839                 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
840
841                 if (!ds)
842                         continue;
843
844                 per_cpu(cpu_hw_events, cpu).ds = NULL;
845
846                 kfree((void *)(unsigned long)ds->bts_buffer_base);
847                 kfree(ds);
848         }
849
850         put_online_cpus();
851 }
852
853 static int reserve_bts_hardware(void)
854 {
855         int cpu, err = 0;
856
857         if (!bts_available())
858                 return 0;
859
860         get_online_cpus();
861
862         for_each_possible_cpu(cpu) {
863                 struct debug_store *ds;
864                 void *buffer;
865
866                 err = -ENOMEM;
867                 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
868                 if (unlikely(!buffer))
869                         break;
870
871                 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
872                 if (unlikely(!ds)) {
873                         kfree(buffer);
874                         break;
875                 }
876
877                 ds->bts_buffer_base = (u64)(unsigned long)buffer;
878                 ds->bts_index = ds->bts_buffer_base;
879                 ds->bts_absolute_maximum =
880                         ds->bts_buffer_base + BTS_BUFFER_SIZE;
881                 ds->bts_interrupt_threshold =
882                         ds->bts_absolute_maximum - BTS_OVFL_TH;
883
884                 per_cpu(cpu_hw_events, cpu).ds = ds;
885                 err = 0;
886         }
887
888         if (err)
889                 release_bts_hardware();
890         else {
891                 for_each_online_cpu(cpu)
892                         init_debug_store_on_cpu(cpu);
893         }
894
895         put_online_cpus();
896
897         return err;
898 }
899
900 static void hw_perf_event_destroy(struct perf_event *event)
901 {
902         if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
903                 release_pmc_hardware();
904                 release_bts_hardware();
905                 mutex_unlock(&pmc_reserve_mutex);
906         }
907 }
908
909 static inline int x86_pmu_initialized(void)
910 {
911         return x86_pmu.handle_irq != NULL;
912 }
913
914 static inline int
915 set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
916 {
917         unsigned int cache_type, cache_op, cache_result;
918         u64 config, val;
919
920         config = attr->config;
921
922         cache_type = (config >>  0) & 0xff;
923         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
924                 return -EINVAL;
925
926         cache_op = (config >>  8) & 0xff;
927         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
928                 return -EINVAL;
929
930         cache_result = (config >> 16) & 0xff;
931         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
932                 return -EINVAL;
933
934         val = hw_cache_event_ids[cache_type][cache_op][cache_result];
935
936         if (val == 0)
937                 return -ENOENT;
938
939         if (val == -1)
940                 return -EINVAL;
941
942         hwc->config |= val;
943
944         return 0;
945 }
946
947 static void intel_pmu_enable_bts(u64 config)
948 {
949         unsigned long debugctlmsr;
950
951         debugctlmsr = get_debugctlmsr();
952
953         debugctlmsr |= X86_DEBUGCTL_TR;
954         debugctlmsr |= X86_DEBUGCTL_BTS;
955         debugctlmsr |= X86_DEBUGCTL_BTINT;
956
957         if (!(config & ARCH_PERFMON_EVENTSEL_OS))
958                 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
959
960         if (!(config & ARCH_PERFMON_EVENTSEL_USR))
961                 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
962
963         update_debugctlmsr(debugctlmsr);
964 }
965
966 static void intel_pmu_disable_bts(void)
967 {
968         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
969         unsigned long debugctlmsr;
970
971         if (!cpuc->ds)
972                 return;
973
974         debugctlmsr = get_debugctlmsr();
975
976         debugctlmsr &=
977                 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
978                   X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
979
980         update_debugctlmsr(debugctlmsr);
981 }
982
983 /*
984  * Setup the hardware configuration for a given attr_type
985  */
986 static int __hw_perf_event_init(struct perf_event *event)
987 {
988         struct perf_event_attr *attr = &event->attr;
989         struct hw_perf_event *hwc = &event->hw;
990         u64 config;
991         int err;
992
993         if (!x86_pmu_initialized())
994                 return -ENODEV;
995
996         err = 0;
997         if (!atomic_inc_not_zero(&active_events)) {
998                 mutex_lock(&pmc_reserve_mutex);
999                 if (atomic_read(&active_events) == 0) {
1000                         if (!reserve_pmc_hardware())
1001                                 err = -EBUSY;
1002                         else
1003                                 err = reserve_bts_hardware();
1004                 }
1005                 if (!err)
1006                         atomic_inc(&active_events);
1007                 mutex_unlock(&pmc_reserve_mutex);
1008         }
1009         if (err)
1010                 return err;
1011
1012         event->destroy = hw_perf_event_destroy;
1013
1014         /*
1015          * Generate PMC IRQs:
1016          * (keep 'enabled' bit clear for now)
1017          */
1018         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
1019
1020         hwc->idx = -1;
1021
1022         /*
1023          * Count user and OS events unless requested not to.
1024          */
1025         if (!attr->exclude_user)
1026                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
1027         if (!attr->exclude_kernel)
1028                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
1029
1030         if (!hwc->sample_period) {
1031                 hwc->sample_period = x86_pmu.max_period;
1032                 hwc->last_period = hwc->sample_period;
1033                 atomic64_set(&hwc->period_left, hwc->sample_period);
1034         } else {
1035                 /*
1036                  * If we have a PMU initialized but no APIC
1037                  * interrupts, we cannot sample hardware
1038                  * events (user-space has to fall back and
1039                  * sample via a hrtimer based software event):
1040                  */
1041                 if (!x86_pmu.apic)
1042                         return -EOPNOTSUPP;
1043         }
1044
1045         /*
1046          * Raw hw_event type provide the config in the hw_event structure
1047          */
1048         if (attr->type == PERF_TYPE_RAW) {
1049                 hwc->config |= x86_pmu.raw_event(attr->config);
1050                 return 0;
1051         }
1052
1053         if (attr->type == PERF_TYPE_HW_CACHE)
1054                 return set_ext_hw_attr(hwc, attr);
1055
1056         if (attr->config >= x86_pmu.max_events)
1057                 return -EINVAL;
1058
1059         /*
1060          * The generic map:
1061          */
1062         config = x86_pmu.event_map(attr->config);
1063
1064         if (config == 0)
1065                 return -ENOENT;
1066
1067         if (config == -1LL)
1068                 return -EINVAL;
1069
1070         /*
1071          * Branch tracing:
1072          */
1073         if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
1074             (hwc->sample_period == 1)) {
1075                 /* BTS is not supported by this architecture. */
1076                 if (!bts_available())
1077                         return -EOPNOTSUPP;
1078
1079                 /* BTS is currently only allowed for user-mode. */
1080                 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1081                         return -EOPNOTSUPP;
1082         }
1083
1084         hwc->config |= config;
1085
1086         return 0;
1087 }
1088
1089 static void p6_pmu_disable_all(void)
1090 {
1091         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1092         u64 val;
1093
1094         if (!cpuc->enabled)
1095                 return;
1096
1097         cpuc->enabled = 0;
1098         barrier();
1099
1100         /* p6 only has one enable register */
1101         rdmsrl(MSR_P6_EVNTSEL0, val);
1102         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1103         wrmsrl(MSR_P6_EVNTSEL0, val);
1104 }
1105
1106 static void intel_pmu_disable_all(void)
1107 {
1108         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1109
1110         if (!cpuc->enabled)
1111                 return;
1112
1113         cpuc->enabled = 0;
1114         barrier();
1115
1116         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1117
1118         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1119                 intel_pmu_disable_bts();
1120 }
1121
1122 static void amd_pmu_disable_all(void)
1123 {
1124         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1125         int idx;
1126
1127         if (!cpuc->enabled)
1128                 return;
1129
1130         cpuc->enabled = 0;
1131         /*
1132          * ensure we write the disable before we start disabling the
1133          * events proper, so that amd_pmu_enable_event() does the
1134          * right thing.
1135          */
1136         barrier();
1137
1138         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1139                 u64 val;
1140
1141                 if (!test_bit(idx, cpuc->active_mask))
1142                         continue;
1143                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
1144                 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
1145                         continue;
1146                 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1147                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1148         }
1149 }
1150
1151 void hw_perf_disable(void)
1152 {
1153         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1154
1155         if (!x86_pmu_initialized())
1156                 return;
1157
1158         if (cpuc->enabled)
1159                 cpuc->n_added = 0;
1160
1161         x86_pmu.disable_all();
1162 }
1163
1164 static void p6_pmu_enable_all(void)
1165 {
1166         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1167         unsigned long val;
1168
1169         if (cpuc->enabled)
1170                 return;
1171
1172         cpuc->enabled = 1;
1173         barrier();
1174
1175         /* p6 only has one enable register */
1176         rdmsrl(MSR_P6_EVNTSEL0, val);
1177         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1178         wrmsrl(MSR_P6_EVNTSEL0, val);
1179 }
1180
1181 static void intel_pmu_enable_all(void)
1182 {
1183         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1184
1185         if (cpuc->enabled)
1186                 return;
1187
1188         cpuc->enabled = 1;
1189         barrier();
1190
1191         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1192
1193         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1194                 struct perf_event *event =
1195                         cpuc->events[X86_PMC_IDX_FIXED_BTS];
1196
1197                 if (WARN_ON_ONCE(!event))
1198                         return;
1199
1200                 intel_pmu_enable_bts(event->hw.config);
1201         }
1202 }
1203
1204 static void amd_pmu_enable_all(void)
1205 {
1206         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1207         int idx;
1208
1209         if (cpuc->enabled)
1210                 return;
1211
1212         cpuc->enabled = 1;
1213         barrier();
1214
1215         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1216                 struct perf_event *event = cpuc->events[idx];
1217                 u64 val;
1218
1219                 if (!test_bit(idx, cpuc->active_mask))
1220                         continue;
1221
1222                 val = event->hw.config;
1223                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1224                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1225         }
1226 }
1227
1228 static const struct pmu pmu;
1229
1230 static inline int is_x86_event(struct perf_event *event)
1231 {
1232         return event->pmu == &pmu;
1233 }
1234
1235 static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1236 {
1237         int i, j , w, num;
1238         int weight, wmax;
1239         unsigned long *c;
1240         unsigned long constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)];
1241         unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
1242         struct hw_perf_event *hwc;
1243
1244         bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1245
1246         for (i = 0; i < n; i++) {
1247                 x86_pmu.get_event_constraints(cpuc,
1248                                               cpuc->event_list[i],
1249                                               constraints[i]);
1250         }
1251
1252         /*
1253          * fastpath, try to reuse previous register
1254          */
1255         for (i = 0, num = n; i < n; i++, num--) {
1256                 hwc = &cpuc->event_list[i]->hw;
1257                 c = constraints[i];
1258
1259                 /* never assigned */
1260                 if (hwc->idx == -1)
1261                         break;
1262
1263                 /* constraint still honored */
1264                 if (!test_bit(hwc->idx, c))
1265                         break;
1266
1267                 /* not already used */
1268                 if (test_bit(hwc->idx, used_mask))
1269                         break;
1270
1271 #if 0
1272                 pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n",
1273                          smp_processor_id(),
1274                          hwc->config,
1275                          hwc->idx,
1276                          assign ? 'y' : 'n');
1277 #endif
1278
1279                 set_bit(hwc->idx, used_mask);
1280                 if (assign)
1281                         assign[i] = hwc->idx;
1282         }
1283         if (!num)
1284                 goto done;
1285
1286         /*
1287          * begin slow path
1288          */
1289
1290         bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1291
1292         /*
1293          * weight = number of possible counters
1294          *
1295          * 1    = most constrained, only works on one counter
1296          * wmax = least constrained, works on any counter
1297          *
1298          * assign events to counters starting with most
1299          * constrained events.
1300          */
1301         wmax = x86_pmu.num_events;
1302
1303         /*
1304          * when fixed event counters are present,
1305          * wmax is incremented by 1 to account
1306          * for one more choice
1307          */
1308         if (x86_pmu.num_events_fixed)
1309                 wmax++;
1310
1311         for (w = 1, num = n; num && w <= wmax; w++) {
1312                 /* for each event */
1313                 for (i = 0; num && i < n; i++) {
1314                         c = constraints[i];
1315                         hwc = &cpuc->event_list[i]->hw;
1316
1317                         weight = bitmap_weight(c, X86_PMC_IDX_MAX);
1318                         if (weight != w)
1319                                 continue;
1320
1321                         for_each_bit(j, c, X86_PMC_IDX_MAX) {
1322                                 if (!test_bit(j, used_mask))
1323                                         break;
1324                         }
1325
1326                         if (j == X86_PMC_IDX_MAX)
1327                                 break;
1328
1329 #if 0
1330                         pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n",
1331                                 smp_processor_id(),
1332                                 hwc->config,
1333                                 j,
1334                                 assign ? 'y' : 'n');
1335 #endif
1336
1337                         set_bit(j, used_mask);
1338
1339                         if (assign)
1340                                 assign[i] = j;
1341                         num--;
1342                 }
1343         }
1344 done:
1345         /*
1346          * scheduling failed or is just a simulation,
1347          * free resources if necessary
1348          */
1349         if (!assign || num) {
1350                 for (i = 0; i < n; i++) {
1351                         if (x86_pmu.put_event_constraints)
1352                                 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
1353                 }
1354         }
1355         return num ? -ENOSPC : 0;
1356 }
1357
1358 /*
1359  * dogrp: true if must collect siblings events (group)
1360  * returns total number of events and error code
1361  */
1362 static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
1363 {
1364         struct perf_event *event;
1365         int n, max_count;
1366
1367         max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
1368
1369         /* current number of events already accepted */
1370         n = cpuc->n_events;
1371
1372         if (is_x86_event(leader)) {
1373                 if (n >= max_count)
1374                         return -ENOSPC;
1375                 cpuc->event_list[n] = leader;
1376                 n++;
1377         }
1378         if (!dogrp)
1379                 return n;
1380
1381         list_for_each_entry(event, &leader->sibling_list, group_entry) {
1382                 if (!is_x86_event(event) ||
1383                     event->state <= PERF_EVENT_STATE_OFF)
1384                         continue;
1385
1386                 if (n >= max_count)
1387                         return -ENOSPC;
1388
1389                 cpuc->event_list[n] = event;
1390                 n++;
1391         }
1392         return n;
1393 }
1394
1395
1396 static inline void x86_assign_hw_event(struct perf_event *event,
1397                                 struct hw_perf_event *hwc, int idx)
1398 {
1399         hwc->idx = idx;
1400
1401         if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
1402                 hwc->config_base = 0;
1403                 hwc->event_base = 0;
1404         } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
1405                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1406                 /*
1407                  * We set it so that event_base + idx in wrmsr/rdmsr maps to
1408                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1409                  */
1410                 hwc->event_base =
1411                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1412         } else {
1413                 hwc->config_base = x86_pmu.eventsel;
1414                 hwc->event_base  = x86_pmu.perfctr;
1415         }
1416 }
1417
1418 void hw_perf_enable(void)
1419 {
1420         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1421         struct perf_event *event;
1422         struct hw_perf_event *hwc;
1423         int i;
1424
1425         if (!x86_pmu_initialized())
1426                 return;
1427         if (cpuc->n_added) {
1428                 /*
1429                  * apply assignment obtained either from
1430                  * hw_perf_group_sched_in() or x86_pmu_enable()
1431                  *
1432                  * step1: save events moving to new counters
1433                  * step2: reprogram moved events into new counters
1434                  */
1435                 for (i = 0; i < cpuc->n_events; i++) {
1436
1437                         event = cpuc->event_list[i];
1438                         hwc = &event->hw;
1439
1440                         if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
1441                                 continue;
1442
1443                         x86_pmu.disable(hwc, hwc->idx);
1444
1445                         clear_bit(hwc->idx, cpuc->active_mask);
1446                         barrier();
1447                         cpuc->events[hwc->idx] = NULL;
1448
1449                         x86_perf_event_update(event, hwc, hwc->idx);
1450
1451                         hwc->idx = -1;
1452                 }
1453
1454                 for (i = 0; i < cpuc->n_events; i++) {
1455
1456                         event = cpuc->event_list[i];
1457                         hwc = &event->hw;
1458
1459                         if (hwc->idx == -1) {
1460                                 x86_assign_hw_event(event, hwc, cpuc->assign[i]);
1461                                 x86_perf_event_set_period(event, hwc, hwc->idx);
1462                         }
1463                         /*
1464                          * need to mark as active because x86_pmu_disable()
1465                          * clear active_mask and eventsp[] yet it preserves
1466                          * idx
1467                          */
1468                         set_bit(hwc->idx, cpuc->active_mask);
1469                         cpuc->events[hwc->idx] = event;
1470
1471                         x86_pmu.enable(hwc, hwc->idx);
1472                         perf_event_update_userpage(event);
1473                 }
1474                 cpuc->n_added = 0;
1475                 perf_events_lapic_init();
1476         }
1477         x86_pmu.enable_all();
1478 }
1479
1480 static inline u64 intel_pmu_get_status(void)
1481 {
1482         u64 status;
1483
1484         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1485
1486         return status;
1487 }
1488
1489 static inline void intel_pmu_ack_status(u64 ack)
1490 {
1491         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
1492 }
1493
1494 static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1495 {
1496         (void)checking_wrmsrl(hwc->config_base + idx,
1497                               hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1498 }
1499
1500 static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1501 {
1502         (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1503 }
1504
1505 static inline void
1506 intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1507 {
1508         int idx = __idx - X86_PMC_IDX_FIXED;
1509         u64 ctrl_val, mask;
1510
1511         mask = 0xfULL << (idx * 4);
1512
1513         rdmsrl(hwc->config_base, ctrl_val);
1514         ctrl_val &= ~mask;
1515         (void)checking_wrmsrl(hwc->config_base, ctrl_val);
1516 }
1517
1518 static inline void
1519 p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1520 {
1521         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1522         u64 val = P6_NOP_EVENT;
1523
1524         if (cpuc->enabled)
1525                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1526
1527         (void)checking_wrmsrl(hwc->config_base + idx, val);
1528 }
1529
1530 static inline void
1531 intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1532 {
1533         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1534                 intel_pmu_disable_bts();
1535                 return;
1536         }
1537
1538         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1539                 intel_pmu_disable_fixed(hwc, idx);
1540                 return;
1541         }
1542
1543         x86_pmu_disable_event(hwc, idx);
1544 }
1545
1546 static inline void
1547 amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1548 {
1549         x86_pmu_disable_event(hwc, idx);
1550 }
1551
1552 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1553
1554 /*
1555  * Set the next IRQ period, based on the hwc->period_left value.
1556  * To be called with the event disabled in hw:
1557  */
1558 static int
1559 x86_perf_event_set_period(struct perf_event *event,
1560                              struct hw_perf_event *hwc, int idx)
1561 {
1562         s64 left = atomic64_read(&hwc->period_left);
1563         s64 period = hwc->sample_period;
1564         int err, ret = 0;
1565
1566         if (idx == X86_PMC_IDX_FIXED_BTS)
1567                 return 0;
1568
1569         /*
1570          * If we are way outside a reasonable range then just skip forward:
1571          */
1572         if (unlikely(left <= -period)) {
1573                 left = period;
1574                 atomic64_set(&hwc->period_left, left);
1575                 hwc->last_period = period;
1576                 ret = 1;
1577         }
1578
1579         if (unlikely(left <= 0)) {
1580                 left += period;
1581                 atomic64_set(&hwc->period_left, left);
1582                 hwc->last_period = period;
1583                 ret = 1;
1584         }
1585         /*
1586          * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1587          */
1588         if (unlikely(left < 2))
1589                 left = 2;
1590
1591         if (left > x86_pmu.max_period)
1592                 left = x86_pmu.max_period;
1593
1594         per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1595
1596         /*
1597          * The hw event starts counting from this event offset,
1598          * mark it to be able to extra future deltas:
1599          */
1600         atomic64_set(&hwc->prev_count, (u64)-left);
1601
1602         err = checking_wrmsrl(hwc->event_base + idx,
1603                              (u64)(-left) & x86_pmu.event_mask);
1604
1605         perf_event_update_userpage(event);
1606
1607         return ret;
1608 }
1609
1610 static inline void
1611 intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1612 {
1613         int idx = __idx - X86_PMC_IDX_FIXED;
1614         u64 ctrl_val, bits, mask;
1615         int err;
1616
1617         /*
1618          * Enable IRQ generation (0x8),
1619          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1620          * if requested:
1621          */
1622         bits = 0x8ULL;
1623         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1624                 bits |= 0x2;
1625         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1626                 bits |= 0x1;
1627         bits <<= (idx * 4);
1628         mask = 0xfULL << (idx * 4);
1629
1630         rdmsrl(hwc->config_base, ctrl_val);
1631         ctrl_val &= ~mask;
1632         ctrl_val |= bits;
1633         err = checking_wrmsrl(hwc->config_base, ctrl_val);
1634 }
1635
1636 static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1637 {
1638         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1639         u64 val;
1640
1641         val = hwc->config;
1642         if (cpuc->enabled)
1643                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1644
1645         (void)checking_wrmsrl(hwc->config_base + idx, val);
1646 }
1647
1648
1649 static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1650 {
1651         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1652                 if (!__get_cpu_var(cpu_hw_events).enabled)
1653                         return;
1654
1655                 intel_pmu_enable_bts(hwc->config);
1656                 return;
1657         }
1658
1659         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1660                 intel_pmu_enable_fixed(hwc, idx);
1661                 return;
1662         }
1663
1664         x86_pmu_enable_event(hwc, idx);
1665 }
1666
1667 static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1668 {
1669         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1670
1671         if (cpuc->enabled)
1672                 x86_pmu_enable_event(hwc, idx);
1673 }
1674
1675 /*
1676  * activate a single event
1677  *
1678  * The event is added to the group of enabled events
1679  * but only if it can be scehduled with existing events.
1680  *
1681  * Called with PMU disabled. If successful and return value 1,
1682  * then guaranteed to call perf_enable() and hw_perf_enable()
1683  */
1684 static int x86_pmu_enable(struct perf_event *event)
1685 {
1686         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1687         struct hw_perf_event *hwc;
1688         int assign[X86_PMC_IDX_MAX];
1689         int n, n0, ret;
1690
1691         hwc = &event->hw;
1692
1693         n0 = cpuc->n_events;
1694         n = collect_events(cpuc, event, false);
1695         if (n < 0)
1696                 return n;
1697
1698         ret = x86_schedule_events(cpuc, n, assign);
1699         if (ret)
1700                 return ret;
1701         /*
1702          * copy new assignment, now we know it is possible
1703          * will be used by hw_perf_enable()
1704          */
1705         memcpy(cpuc->assign, assign, n*sizeof(int));
1706
1707         cpuc->n_events = n;
1708         cpuc->n_added  = n - n0;
1709
1710         if (hwc->idx != -1)
1711                 x86_perf_event_set_period(event, hwc, hwc->idx);
1712
1713         return 0;
1714 }
1715
1716 static void x86_pmu_unthrottle(struct perf_event *event)
1717 {
1718         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1719         struct hw_perf_event *hwc = &event->hw;
1720
1721         if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1722                                 cpuc->events[hwc->idx] != event))
1723                 return;
1724
1725         x86_pmu.enable(hwc, hwc->idx);
1726 }
1727
1728 void perf_event_print_debug(void)
1729 {
1730         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1731         struct cpu_hw_events *cpuc;
1732         unsigned long flags;
1733         int cpu, idx;
1734
1735         if (!x86_pmu.num_events)
1736                 return;
1737
1738         local_irq_save(flags);
1739
1740         cpu = smp_processor_id();
1741         cpuc = &per_cpu(cpu_hw_events, cpu);
1742
1743         if (x86_pmu.version >= 2) {
1744                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1745                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1746                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1747                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1748
1749                 pr_info("\n");
1750                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
1751                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
1752                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
1753                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1754         }
1755         pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1756
1757         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1758                 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1759                 rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
1760
1761                 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1762
1763                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
1764                         cpu, idx, pmc_ctrl);
1765                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
1766                         cpu, idx, pmc_count);
1767                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1768                         cpu, idx, prev_left);
1769         }
1770         for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1771                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1772
1773                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1774                         cpu, idx, pmc_count);
1775         }
1776         local_irq_restore(flags);
1777 }
1778
1779 static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1780 {
1781         struct debug_store *ds = cpuc->ds;
1782         struct bts_record {
1783                 u64     from;
1784                 u64     to;
1785                 u64     flags;
1786         };
1787         struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1788         struct bts_record *at, *top;
1789         struct perf_output_handle handle;
1790         struct perf_event_header header;
1791         struct perf_sample_data data;
1792         struct pt_regs regs;
1793
1794         if (!event)
1795                 return;
1796
1797         if (!ds)
1798                 return;
1799
1800         at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1801         top = (struct bts_record *)(unsigned long)ds->bts_index;
1802
1803         if (top <= at)
1804                 return;
1805
1806         ds->bts_index = ds->bts_buffer_base;
1807
1808
1809         data.period     = event->hw.last_period;
1810         data.addr       = 0;
1811         data.raw        = NULL;
1812         regs.ip         = 0;
1813
1814         /*
1815          * Prepare a generic sample, i.e. fill in the invariant fields.
1816          * We will overwrite the from and to address before we output
1817          * the sample.
1818          */
1819         perf_prepare_sample(&header, &data, event, &regs);
1820
1821         if (perf_output_begin(&handle, event,
1822                               header.size * (top - at), 1, 1))
1823                 return;
1824
1825         for (; at < top; at++) {
1826                 data.ip         = at->from;
1827                 data.addr       = at->to;
1828
1829                 perf_output_sample(&handle, &header, &data, event);
1830         }
1831
1832         perf_output_end(&handle);
1833
1834         /* There's new data available. */
1835         event->hw.interrupts++;
1836         event->pending_kill = POLL_IN;
1837 }
1838
1839 static void x86_pmu_disable(struct perf_event *event)
1840 {
1841         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1842         struct hw_perf_event *hwc = &event->hw;
1843         int i, idx = hwc->idx;
1844
1845         /*
1846          * Must be done before we disable, otherwise the nmi handler
1847          * could reenable again:
1848          */
1849         clear_bit(idx, cpuc->active_mask);
1850         x86_pmu.disable(hwc, idx);
1851
1852         /*
1853          * Make sure the cleared pointer becomes visible before we
1854          * (potentially) free the event:
1855          */
1856         barrier();
1857
1858         /*
1859          * Drain the remaining delta count out of a event
1860          * that we are disabling:
1861          */
1862         x86_perf_event_update(event, hwc, idx);
1863
1864         /* Drain the remaining BTS records. */
1865         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1866                 intel_pmu_drain_bts_buffer(cpuc);
1867
1868         cpuc->events[idx] = NULL;
1869
1870         for (i = 0; i < cpuc->n_events; i++) {
1871                 if (event == cpuc->event_list[i]) {
1872
1873                         if (x86_pmu.put_event_constraints)
1874                                 x86_pmu.put_event_constraints(cpuc, event);
1875
1876                         while (++i < cpuc->n_events)
1877                                 cpuc->event_list[i-1] = cpuc->event_list[i];
1878
1879                         --cpuc->n_events;
1880                 }
1881         }
1882         perf_event_update_userpage(event);
1883 }
1884
1885 /*
1886  * Save and restart an expired event. Called by NMI contexts,
1887  * so it has to be careful about preempting normal event ops:
1888  */
1889 static int intel_pmu_save_and_restart(struct perf_event *event)
1890 {
1891         struct hw_perf_event *hwc = &event->hw;
1892         int idx = hwc->idx;
1893         int ret;
1894
1895         x86_perf_event_update(event, hwc, idx);
1896         ret = x86_perf_event_set_period(event, hwc, idx);
1897
1898         if (event->state == PERF_EVENT_STATE_ACTIVE)
1899                 intel_pmu_enable_event(hwc, idx);
1900
1901         return ret;
1902 }
1903
1904 static void intel_pmu_reset(void)
1905 {
1906         struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1907         unsigned long flags;
1908         int idx;
1909
1910         if (!x86_pmu.num_events)
1911                 return;
1912
1913         local_irq_save(flags);
1914
1915         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1916
1917         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1918                 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1919                 checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
1920         }
1921         for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1922                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1923         }
1924         if (ds)
1925                 ds->bts_index = ds->bts_buffer_base;
1926
1927         local_irq_restore(flags);
1928 }
1929
1930 static int p6_pmu_handle_irq(struct pt_regs *regs)
1931 {
1932         struct perf_sample_data data;
1933         struct cpu_hw_events *cpuc;
1934         struct perf_event *event;
1935         struct hw_perf_event *hwc;
1936         int idx, handled = 0;
1937         u64 val;
1938
1939         data.addr = 0;
1940         data.raw = NULL;
1941
1942         cpuc = &__get_cpu_var(cpu_hw_events);
1943
1944         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1945                 if (!test_bit(idx, cpuc->active_mask))
1946                         continue;
1947
1948                 event = cpuc->events[idx];
1949                 hwc = &event->hw;
1950
1951                 val = x86_perf_event_update(event, hwc, idx);
1952                 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1953                         continue;
1954
1955                 /*
1956                  * event overflow
1957                  */
1958                 handled         = 1;
1959                 data.period     = event->hw.last_period;
1960
1961                 if (!x86_perf_event_set_period(event, hwc, idx))
1962                         continue;
1963
1964                 if (perf_event_overflow(event, 1, &data, regs))
1965                         p6_pmu_disable_event(hwc, idx);
1966         }
1967
1968         if (handled)
1969                 inc_irq_stat(apic_perf_irqs);
1970
1971         return handled;
1972 }
1973
1974 /*
1975  * This handler is triggered by the local APIC, so the APIC IRQ handling
1976  * rules apply:
1977  */
1978 static int intel_pmu_handle_irq(struct pt_regs *regs)
1979 {
1980         struct perf_sample_data data;
1981         struct cpu_hw_events *cpuc;
1982         int bit, loops;
1983         u64 ack, status;
1984
1985         data.addr = 0;
1986         data.raw = NULL;
1987
1988         cpuc = &__get_cpu_var(cpu_hw_events);
1989
1990         perf_disable();
1991         intel_pmu_drain_bts_buffer(cpuc);
1992         status = intel_pmu_get_status();
1993         if (!status) {
1994                 perf_enable();
1995                 return 0;
1996         }
1997
1998         loops = 0;
1999 again:
2000         if (++loops > 100) {
2001                 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
2002                 perf_event_print_debug();
2003                 intel_pmu_reset();
2004                 perf_enable();
2005                 return 1;
2006         }
2007
2008         inc_irq_stat(apic_perf_irqs);
2009         ack = status;
2010         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
2011                 struct perf_event *event = cpuc->events[bit];
2012
2013                 clear_bit(bit, (unsigned long *) &status);
2014                 if (!test_bit(bit, cpuc->active_mask))
2015                         continue;
2016
2017                 if (!intel_pmu_save_and_restart(event))
2018                         continue;
2019
2020                 data.period = event->hw.last_period;
2021
2022                 if (perf_event_overflow(event, 1, &data, regs))
2023                         intel_pmu_disable_event(&event->hw, bit);
2024         }
2025
2026         intel_pmu_ack_status(ack);
2027
2028         /*
2029          * Repeat if there is more work to be done:
2030          */
2031         status = intel_pmu_get_status();
2032         if (status)
2033                 goto again;
2034
2035         perf_enable();
2036
2037         return 1;
2038 }
2039
2040 static int amd_pmu_handle_irq(struct pt_regs *regs)
2041 {
2042         struct perf_sample_data data;
2043         struct cpu_hw_events *cpuc;
2044         struct perf_event *event;
2045         struct hw_perf_event *hwc;
2046         int idx, handled = 0;
2047         u64 val;
2048
2049         data.addr = 0;
2050         data.raw = NULL;
2051
2052         cpuc = &__get_cpu_var(cpu_hw_events);
2053
2054         for (idx = 0; idx < x86_pmu.num_events; idx++) {
2055                 if (!test_bit(idx, cpuc->active_mask))
2056                         continue;
2057
2058                 event = cpuc->events[idx];
2059                 hwc = &event->hw;
2060
2061                 val = x86_perf_event_update(event, hwc, idx);
2062                 if (val & (1ULL << (x86_pmu.event_bits - 1)))
2063                         continue;
2064
2065                 /*
2066                  * event overflow
2067                  */
2068                 handled         = 1;
2069                 data.period     = event->hw.last_period;
2070
2071                 if (!x86_perf_event_set_period(event, hwc, idx))
2072                         continue;
2073
2074                 if (perf_event_overflow(event, 1, &data, regs))
2075                         amd_pmu_disable_event(hwc, idx);
2076         }
2077
2078         if (handled)
2079                 inc_irq_stat(apic_perf_irqs);
2080
2081         return handled;
2082 }
2083
2084 void smp_perf_pending_interrupt(struct pt_regs *regs)
2085 {
2086         irq_enter();
2087         ack_APIC_irq();
2088         inc_irq_stat(apic_pending_irqs);
2089         perf_event_do_pending();
2090         irq_exit();
2091 }
2092
2093 void set_perf_event_pending(void)
2094 {
2095 #ifdef CONFIG_X86_LOCAL_APIC
2096         if (!x86_pmu.apic || !x86_pmu_initialized())
2097                 return;
2098
2099         apic->send_IPI_self(LOCAL_PENDING_VECTOR);
2100 #endif
2101 }
2102
2103 void perf_events_lapic_init(void)
2104 {
2105 #ifdef CONFIG_X86_LOCAL_APIC
2106         if (!x86_pmu.apic || !x86_pmu_initialized())
2107                 return;
2108
2109         /*
2110          * Always use NMI for PMU
2111          */
2112         apic_write(APIC_LVTPC, APIC_DM_NMI);
2113 #endif
2114 }
2115
2116 static int __kprobes
2117 perf_event_nmi_handler(struct notifier_block *self,
2118                          unsigned long cmd, void *__args)
2119 {
2120         struct die_args *args = __args;
2121         struct pt_regs *regs;
2122
2123         if (!atomic_read(&active_events))
2124                 return NOTIFY_DONE;
2125
2126         switch (cmd) {
2127         case DIE_NMI:
2128         case DIE_NMI_IPI:
2129                 break;
2130
2131         default:
2132                 return NOTIFY_DONE;
2133         }
2134
2135         regs = args->regs;
2136
2137 #ifdef CONFIG_X86_LOCAL_APIC
2138         apic_write(APIC_LVTPC, APIC_DM_NMI);
2139 #endif
2140         /*
2141          * Can't rely on the handled return value to say it was our NMI, two
2142          * events could trigger 'simultaneously' raising two back-to-back NMIs.
2143          *
2144          * If the first NMI handles both, the latter will be empty and daze
2145          * the CPU.
2146          */
2147         x86_pmu.handle_irq(regs);
2148
2149         return NOTIFY_STOP;
2150 }
2151
2152 static struct event_constraint bts_constraint =
2153         EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
2154
2155 static int intel_special_constraints(struct perf_event *event,
2156                                      unsigned long *idxmsk)
2157 {
2158         unsigned int hw_event;
2159
2160         hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
2161
2162         if (unlikely((hw_event ==
2163                       x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
2164                      (event->hw.sample_period == 1))) {
2165
2166                 bitmap_copy((unsigned long *)idxmsk,
2167                             (unsigned long *)bts_constraint.idxmsk,
2168                             X86_PMC_IDX_MAX);
2169                 return 1;
2170         }
2171         return 0;
2172 }
2173
2174 static void intel_get_event_constraints(struct cpu_hw_events *cpuc,
2175                                         struct perf_event *event,
2176                                         unsigned long *idxmsk)
2177 {
2178         const struct event_constraint *c;
2179
2180         /*
2181          * cleanup bitmask
2182          */
2183         bitmap_zero(idxmsk, X86_PMC_IDX_MAX);
2184
2185         if (intel_special_constraints(event, idxmsk))
2186                 return;
2187
2188         if (x86_pmu.event_constraints) {
2189                 for_each_event_constraint(c, x86_pmu.event_constraints) {
2190                         if ((event->hw.config & c->cmask) == c->code) {
2191                                 bitmap_copy(idxmsk, c->idxmsk, X86_PMC_IDX_MAX);
2192                                 return;
2193                         }
2194                 }
2195         }
2196         /* no constraints, means supports all generic counters */
2197         bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
2198 }
2199
2200 static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
2201                                       struct perf_event *event,
2202                                       unsigned long *idxmsk)
2203 {
2204         /* no constraints, means supports all generic counters */
2205         bitmap_fill(idxmsk, x86_pmu.num_events);
2206 }
2207
2208 static int x86_event_sched_in(struct perf_event *event,
2209                           struct perf_cpu_context *cpuctx, int cpu)
2210 {
2211         int ret = 0;
2212
2213         event->state = PERF_EVENT_STATE_ACTIVE;
2214         event->oncpu = cpu;
2215         event->tstamp_running += event->ctx->time - event->tstamp_stopped;
2216
2217         if (!is_x86_event(event))
2218                 ret = event->pmu->enable(event);
2219
2220         if (!ret && !is_software_event(event))
2221                 cpuctx->active_oncpu++;
2222
2223         if (!ret && event->attr.exclusive)
2224                 cpuctx->exclusive = 1;
2225
2226         return ret;
2227 }
2228
2229 static void x86_event_sched_out(struct perf_event *event,
2230                             struct perf_cpu_context *cpuctx, int cpu)
2231 {
2232         event->state = PERF_EVENT_STATE_INACTIVE;
2233         event->oncpu = -1;
2234
2235         if (!is_x86_event(event))
2236                 event->pmu->disable(event);
2237
2238         event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
2239
2240         if (!is_software_event(event))
2241                 cpuctx->active_oncpu--;
2242
2243         if (event->attr.exclusive || !cpuctx->active_oncpu)
2244                 cpuctx->exclusive = 0;
2245 }
2246
2247 /*
2248  * Called to enable a whole group of events.
2249  * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
2250  * Assumes the caller has disabled interrupts and has
2251  * frozen the PMU with hw_perf_save_disable.
2252  *
2253  * called with PMU disabled. If successful and return value 1,
2254  * then guaranteed to call perf_enable() and hw_perf_enable()
2255  */
2256 int hw_perf_group_sched_in(struct perf_event *leader,
2257                struct perf_cpu_context *cpuctx,
2258                struct perf_event_context *ctx, int cpu)
2259 {
2260         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
2261         struct perf_event *sub;
2262         int assign[X86_PMC_IDX_MAX];
2263         int n0, n1, ret;
2264
2265         /* n0 = total number of events */
2266         n0 = collect_events(cpuc, leader, true);
2267         if (n0 < 0)
2268                 return n0;
2269
2270         ret = x86_schedule_events(cpuc, n0, assign);
2271         if (ret)
2272                 return ret;
2273
2274         ret = x86_event_sched_in(leader, cpuctx, cpu);
2275         if (ret)
2276                 return ret;
2277
2278         n1 = 1;
2279         list_for_each_entry(sub, &leader->sibling_list, group_entry) {
2280                 if (sub->state > PERF_EVENT_STATE_OFF) {
2281                         ret = x86_event_sched_in(sub, cpuctx, cpu);
2282                         if (ret)
2283                                 goto undo;
2284                         ++n1;
2285                 }
2286         }
2287         /*
2288          * copy new assignment, now we know it is possible
2289          * will be used by hw_perf_enable()
2290          */
2291         memcpy(cpuc->assign, assign, n0*sizeof(int));
2292
2293         cpuc->n_events  = n0;
2294         cpuc->n_added   = n1;
2295         ctx->nr_active += n1;
2296
2297         /*
2298          * 1 means successful and events are active
2299          * This is not quite true because we defer
2300          * actual activation until hw_perf_enable() but
2301          * this way we* ensure caller won't try to enable
2302          * individual events
2303          */
2304         return 1;
2305 undo:
2306         x86_event_sched_out(leader, cpuctx, cpu);
2307         n0  = 1;
2308         list_for_each_entry(sub, &leader->sibling_list, group_entry) {
2309                 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
2310                         x86_event_sched_out(sub, cpuctx, cpu);
2311                         if (++n0 == n1)
2312                                 break;
2313                 }
2314         }
2315         return ret;
2316 }
2317
2318 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
2319         .notifier_call          = perf_event_nmi_handler,
2320         .next                   = NULL,
2321         .priority               = 1
2322 };
2323
2324 static __initconst struct x86_pmu p6_pmu = {
2325         .name                   = "p6",
2326         .handle_irq             = p6_pmu_handle_irq,
2327         .disable_all            = p6_pmu_disable_all,
2328         .enable_all             = p6_pmu_enable_all,
2329         .enable                 = p6_pmu_enable_event,
2330         .disable                = p6_pmu_disable_event,
2331         .eventsel               = MSR_P6_EVNTSEL0,
2332         .perfctr                = MSR_P6_PERFCTR0,
2333         .event_map              = p6_pmu_event_map,
2334         .raw_event              = p6_pmu_raw_event,
2335         .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
2336         .apic                   = 1,
2337         .max_period             = (1ULL << 31) - 1,
2338         .version                = 0,
2339         .num_events             = 2,
2340         /*
2341          * Events have 40 bits implemented. However they are designed such
2342          * that bits [32-39] are sign extensions of bit 31. As such the
2343          * effective width of a event for P6-like PMU is 32 bits only.
2344          *
2345          * See IA-32 Intel Architecture Software developer manual Vol 3B
2346          */
2347         .event_bits             = 32,
2348         .event_mask             = (1ULL << 32) - 1,
2349         .get_event_constraints  = intel_get_event_constraints,
2350         .event_constraints      = intel_p6_event_constraints
2351 };
2352
2353 static __initconst struct x86_pmu intel_pmu = {
2354         .name                   = "Intel",
2355         .handle_irq             = intel_pmu_handle_irq,
2356         .disable_all            = intel_pmu_disable_all,
2357         .enable_all             = intel_pmu_enable_all,
2358         .enable                 = intel_pmu_enable_event,
2359         .disable                = intel_pmu_disable_event,
2360         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
2361         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
2362         .event_map              = intel_pmu_event_map,
2363         .raw_event              = intel_pmu_raw_event,
2364         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
2365         .apic                   = 1,
2366         /*
2367          * Intel PMCs cannot be accessed sanely above 32 bit width,
2368          * so we install an artificial 1<<31 period regardless of
2369          * the generic event period:
2370          */
2371         .max_period             = (1ULL << 31) - 1,
2372         .enable_bts             = intel_pmu_enable_bts,
2373         .disable_bts            = intel_pmu_disable_bts,
2374         .get_event_constraints  = intel_get_event_constraints
2375 };
2376
2377 static __initconst struct x86_pmu amd_pmu = {
2378         .name                   = "AMD",
2379         .handle_irq             = amd_pmu_handle_irq,
2380         .disable_all            = amd_pmu_disable_all,
2381         .enable_all             = amd_pmu_enable_all,
2382         .enable                 = amd_pmu_enable_event,
2383         .disable                = amd_pmu_disable_event,
2384         .eventsel               = MSR_K7_EVNTSEL0,
2385         .perfctr                = MSR_K7_PERFCTR0,
2386         .event_map              = amd_pmu_event_map,
2387         .raw_event              = amd_pmu_raw_event,
2388         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
2389         .num_events             = 4,
2390         .event_bits             = 48,
2391         .event_mask             = (1ULL << 48) - 1,
2392         .apic                   = 1,
2393         /* use highest bit to detect overflow */
2394         .max_period             = (1ULL << 47) - 1,
2395         .get_event_constraints  = amd_get_event_constraints
2396 };
2397
2398 static __init int p6_pmu_init(void)
2399 {
2400         switch (boot_cpu_data.x86_model) {
2401         case 1:
2402         case 3:  /* Pentium Pro */
2403         case 5:
2404         case 6:  /* Pentium II */
2405         case 7:
2406         case 8:
2407         case 11: /* Pentium III */
2408         case 9:
2409         case 13:
2410                 /* Pentium M */
2411                 break;
2412         default:
2413                 pr_cont("unsupported p6 CPU model %d ",
2414                         boot_cpu_data.x86_model);
2415                 return -ENODEV;
2416         }
2417
2418         x86_pmu = p6_pmu;
2419
2420         return 0;
2421 }
2422
2423 static __init int intel_pmu_init(void)
2424 {
2425         union cpuid10_edx edx;
2426         union cpuid10_eax eax;
2427         unsigned int unused;
2428         unsigned int ebx;
2429         int version;
2430
2431         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2432                 /* check for P6 processor family */
2433            if (boot_cpu_data.x86 == 6) {
2434                 return p6_pmu_init();
2435            } else {
2436                 return -ENODEV;
2437            }
2438         }
2439
2440         /*
2441          * Check whether the Architectural PerfMon supports
2442          * Branch Misses Retired hw_event or not.
2443          */
2444         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2445         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2446                 return -ENODEV;
2447
2448         version = eax.split.version_id;
2449         if (version < 2)
2450                 return -ENODEV;
2451
2452         x86_pmu                         = intel_pmu;
2453         x86_pmu.version                 = version;
2454         x86_pmu.num_events              = eax.split.num_events;
2455         x86_pmu.event_bits              = eax.split.bit_width;
2456         x86_pmu.event_mask              = (1ULL << eax.split.bit_width) - 1;
2457
2458         /*
2459          * Quirk: v2 perfmon does not report fixed-purpose events, so
2460          * assume at least 3 events:
2461          */
2462         x86_pmu.num_events_fixed        = max((int)edx.split.num_events_fixed, 3);
2463
2464         /*
2465          * Install the hw-cache-events table:
2466          */
2467         switch (boot_cpu_data.x86_model) {
2468         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2469         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2470         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2471         case 29: /* six-core 45 nm xeon "Dunnington" */
2472                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2473                        sizeof(hw_cache_event_ids));
2474
2475                 x86_pmu.event_constraints = intel_core_event_constraints;
2476                 pr_cont("Core2 events, ");
2477                 break;
2478         case 26:
2479                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2480                        sizeof(hw_cache_event_ids));
2481
2482                 x86_pmu.event_constraints = intel_nehalem_event_constraints;
2483                 pr_cont("Nehalem/Corei7 events, ");
2484                 break;
2485         case 28:
2486                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2487                        sizeof(hw_cache_event_ids));
2488
2489                 x86_pmu.event_constraints = intel_gen_event_constraints;
2490                 pr_cont("Atom events, ");
2491                 break;
2492         default:
2493                 /*
2494                  * default constraints for v2 and up
2495                  */
2496                 x86_pmu.event_constraints = intel_gen_event_constraints;
2497                 pr_cont("generic architected perfmon, ");
2498         }
2499         return 0;
2500 }
2501
2502 static __init int amd_pmu_init(void)
2503 {
2504         /* Performance-monitoring supported from K7 and later: */
2505         if (boot_cpu_data.x86 < 6)
2506                 return -ENODEV;
2507
2508         x86_pmu = amd_pmu;
2509
2510         /* Events are common for all AMDs */
2511         memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2512                sizeof(hw_cache_event_ids));
2513
2514         return 0;
2515 }
2516
2517 static void __init pmu_check_apic(void)
2518 {
2519         if (cpu_has_apic)
2520                 return;
2521
2522         x86_pmu.apic = 0;
2523         pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
2524         pr_info("no hardware sampling interrupt available.\n");
2525 }
2526
2527 void __init init_hw_perf_events(void)
2528 {
2529         int err;
2530
2531         pr_info("Performance Events: ");
2532
2533         switch (boot_cpu_data.x86_vendor) {
2534         case X86_VENDOR_INTEL:
2535                 err = intel_pmu_init();
2536                 break;
2537         case X86_VENDOR_AMD:
2538                 err = amd_pmu_init();
2539                 break;
2540         default:
2541                 return;
2542         }
2543         if (err != 0) {
2544                 pr_cont("no PMU driver, software events only.\n");
2545                 return;
2546         }
2547
2548         pmu_check_apic();
2549
2550         pr_cont("%s PMU driver.\n", x86_pmu.name);
2551
2552         if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
2553                 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2554                      x86_pmu.num_events, X86_PMC_MAX_GENERIC);
2555                 x86_pmu.num_events = X86_PMC_MAX_GENERIC;
2556         }
2557         perf_event_mask = (1 << x86_pmu.num_events) - 1;
2558         perf_max_events = x86_pmu.num_events;
2559
2560         if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
2561                 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2562                      x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
2563                 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
2564         }
2565
2566         perf_event_mask |=
2567                 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
2568         x86_pmu.intel_ctrl = perf_event_mask;
2569
2570         perf_events_lapic_init();
2571         register_die_notifier(&perf_event_nmi_notifier);
2572
2573         pr_info("... version:                %d\n",     x86_pmu.version);
2574         pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
2575         pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
2576         pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
2577         pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
2578         pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
2579         pr_info("... event mask:             %016Lx\n", perf_event_mask);
2580 }
2581
2582 static inline void x86_pmu_read(struct perf_event *event)
2583 {
2584         x86_perf_event_update(event, &event->hw, event->hw.idx);
2585 }
2586
2587 static const struct pmu pmu = {
2588         .enable         = x86_pmu_enable,
2589         .disable        = x86_pmu_disable,
2590         .read           = x86_pmu_read,
2591         .unthrottle     = x86_pmu_unthrottle,
2592 };
2593
2594 /*
2595  * validate a single event group
2596  *
2597  * validation include:
2598  *      - check events are compatible which each other
2599  *      - events do not compete for the same counter
2600  *      - number of events <= number of counters
2601  *
2602  * validation ensures the group can be loaded onto the
2603  * PMU if it was the only group available.
2604  */
2605 static int validate_group(struct perf_event *event)
2606 {
2607         struct perf_event *leader = event->group_leader;
2608         struct cpu_hw_events *fake_cpuc;
2609         int ret, n;
2610
2611         ret = -ENOMEM;
2612         fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
2613         if (!fake_cpuc)
2614                 goto out;
2615
2616         /*
2617          * the event is not yet connected with its
2618          * siblings therefore we must first collect
2619          * existing siblings, then add the new event
2620          * before we can simulate the scheduling
2621          */
2622         ret = -ENOSPC;
2623         n = collect_events(fake_cpuc, leader, true);
2624         if (n < 0)
2625                 goto out_free;
2626
2627         fake_cpuc->n_events = n;
2628         n = collect_events(fake_cpuc, event, false);
2629         if (n < 0)
2630                 goto out_free;
2631
2632         fake_cpuc->n_events = n;
2633
2634         ret = x86_schedule_events(fake_cpuc, n, NULL);
2635
2636 out_free:
2637         kfree(fake_cpuc);
2638 out:
2639         return ret;
2640 }
2641
2642 const struct pmu *hw_perf_event_init(struct perf_event *event)
2643 {
2644         const struct pmu *tmp;
2645         int err;
2646
2647         err = __hw_perf_event_init(event);
2648         if (!err) {
2649                 /*
2650                  * we temporarily connect event to its pmu
2651                  * such that validate_group() can classify
2652                  * it as an x86 event using is_x86_event()
2653                  */
2654                 tmp = event->pmu;
2655                 event->pmu = &pmu;
2656
2657                 if (event->group_leader != event)
2658                         err = validate_group(event);
2659
2660                 event->pmu = tmp;
2661         }
2662         if (err) {
2663                 if (event->destroy)
2664                         event->destroy(event);
2665                 return ERR_PTR(err);
2666         }
2667
2668         return &pmu;
2669 }
2670
2671 /*
2672  * callchain support
2673  */
2674
2675 static inline
2676 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2677 {
2678         if (entry->nr < PERF_MAX_STACK_DEPTH)
2679                 entry->ip[entry->nr++] = ip;
2680 }
2681
2682 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2683 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2684
2685
2686 static void
2687 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
2688 {
2689         /* Ignore warnings */
2690 }
2691
2692 static void backtrace_warning(void *data, char *msg)
2693 {
2694         /* Ignore warnings */
2695 }
2696
2697 static int backtrace_stack(void *data, char *name)
2698 {
2699         return 0;
2700 }
2701
2702 static void backtrace_address(void *data, unsigned long addr, int reliable)
2703 {
2704         struct perf_callchain_entry *entry = data;
2705
2706         if (reliable)
2707                 callchain_store(entry, addr);
2708 }
2709
2710 static const struct stacktrace_ops backtrace_ops = {
2711         .warning                = backtrace_warning,
2712         .warning_symbol         = backtrace_warning_symbol,
2713         .stack                  = backtrace_stack,
2714         .address                = backtrace_address,
2715         .walk_stack             = print_context_stack_bp,
2716 };
2717
2718 #include "../dumpstack.h"
2719
2720 static void
2721 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
2722 {
2723         callchain_store(entry, PERF_CONTEXT_KERNEL);
2724         callchain_store(entry, regs->ip);
2725
2726         dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
2727 }
2728
2729 /*
2730  * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
2731  */
2732 static unsigned long
2733 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
2734 {
2735         unsigned long offset, addr = (unsigned long)from;
2736         int type = in_nmi() ? KM_NMI : KM_IRQ0;
2737         unsigned long size, len = 0;
2738         struct page *page;
2739         void *map;
2740         int ret;
2741
2742         do {
2743                 ret = __get_user_pages_fast(addr, 1, 0, &page);
2744                 if (!ret)
2745                         break;
2746
2747                 offset = addr & (PAGE_SIZE - 1);
2748                 size = min(PAGE_SIZE - offset, n - len);
2749
2750                 map = kmap_atomic(page, type);
2751                 memcpy(to, map+offset, size);
2752                 kunmap_atomic(map, type);
2753                 put_page(page);
2754
2755                 len  += size;
2756                 to   += size;
2757                 addr += size;
2758
2759         } while (len < n);
2760
2761         return len;
2762 }
2763
2764 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
2765 {
2766         unsigned long bytes;
2767
2768         bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
2769
2770         return bytes == sizeof(*frame);
2771 }
2772
2773 static void
2774 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
2775 {
2776         struct stack_frame frame;
2777         const void __user *fp;
2778
2779         if (!user_mode(regs))
2780                 regs = task_pt_regs(current);
2781
2782         fp = (void __user *)regs->bp;
2783
2784         callchain_store(entry, PERF_CONTEXT_USER);
2785         callchain_store(entry, regs->ip);
2786
2787         while (entry->nr < PERF_MAX_STACK_DEPTH) {
2788                 frame.next_frame             = NULL;
2789                 frame.return_address = 0;
2790
2791                 if (!copy_stack_frame(fp, &frame))
2792                         break;
2793
2794                 if ((unsigned long)fp < regs->sp)
2795                         break;
2796
2797                 callchain_store(entry, frame.return_address);
2798                 fp = frame.next_frame;
2799         }
2800 }
2801
2802 static void
2803 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2804 {
2805         int is_user;
2806
2807         if (!regs)
2808                 return;
2809
2810         is_user = user_mode(regs);
2811
2812         if (is_user && current->state != TASK_RUNNING)
2813                 return;
2814
2815         if (!is_user)
2816                 perf_callchain_kernel(regs, entry);
2817
2818         if (current->mm)
2819                 perf_callchain_user(regs, entry);
2820 }
2821
2822 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2823 {
2824         struct perf_callchain_entry *entry;
2825
2826         if (in_nmi())
2827                 entry = &__get_cpu_var(pmc_nmi_entry);
2828         else
2829                 entry = &__get_cpu_var(pmc_irq_entry);
2830
2831         entry->nr = 0;
2832
2833         perf_do_callchain(regs, entry);
2834
2835         return entry;
2836 }
2837
2838 void hw_perf_event_setup_online(int cpu)
2839 {
2840         init_debug_store_on_cpu(cpu);
2841 }