1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * the Mozilla Foundation.
19 * Portions created by the Initial Developer are Copyright (C) 2010
20 * the Initial Developer. All Rights Reserved.
23 * Zack Weinberg <zweinberg@mozilla.com> (original author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
42 /* This variant of nsIPerfMeasurement uses the perf_event interface
43 * added in Linux 2.6.31. We key compilation of this file off the
44 * existence of <linux/perf_event.h>.
47 #include <linux/perf_event.h>
49 #include <sys/syscall.h>
50 #include <sys/ioctl.h>
55 // As of July 2010, this system call has not been added to the
56 // C library, so we have to provide our own wrapper function.
57 // If this code runs on a kernel that does not implement the
58 // system call (2.6.30 or older) nothing unpredictable will
59 // happen - it will just always fail and return -1.
61 sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
62 int group_fd, unsigned long flags)
64 return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
69 using JS::PerfMeasurement;
70 typedef PerfMeasurement::EventMask EventMask;
72 // Additional state required by this implementation.
75 // Each active counter corresponds to an open file descriptor.
78 int f_cache_references;
80 int f_branch_instructions;
84 int f_major_page_faults;
85 int f_context_switches;
88 // Counter group leader, for Start and Stop.
91 // Whether counters are running.
97 EventMask init(EventMask toMeasure);
99 void stop(PerfMeasurement* counters);
102 // Mapping from our event bitmask to codes passed into the kernel, and
103 // to fields in the PerfMeasurement and PerfMeasurement::impl structures.
109 uint64 PerfMeasurement::* counter;
111 } kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = {
112 #define HW(mask, constant, fieldname) \
113 { PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \
114 &PerfMeasurement::fieldname, &Impl::f_##fieldname }
115 #define SW(mask, constant, fieldname) \
116 { PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \
117 &PerfMeasurement::fieldname, &Impl::f_##fieldname }
119 HW(CPU_CYCLES, CPU_CYCLES, cpu_cycles),
120 HW(INSTRUCTIONS, INSTRUCTIONS, instructions),
121 HW(CACHE_REFERENCES, CACHE_REFERENCES, cache_references),
122 HW(CACHE_MISSES, CACHE_MISSES, cache_misses),
123 HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions),
124 HW(BRANCH_MISSES, BRANCH_MISSES, branch_misses),
125 HW(BUS_CYCLES, BUS_CYCLES, bus_cycles),
126 SW(PAGE_FAULTS, PAGE_FAULTS, page_faults),
127 SW(MAJOR_PAGE_FAULTS, PAGE_FAULTS_MAJ, major_page_faults),
128 SW(CONTEXT_SWITCHES, CONTEXT_SWITCHES, context_switches),
129 SW(CPU_MIGRATIONS, CPU_MIGRATIONS, cpu_migrations),
138 f_cache_references(-1),
140 f_branch_instructions(-1),
144 f_major_page_faults(-1),
145 f_context_switches(-1),
146 f_cpu_migrations(-1),
154 // Close all active counter descriptors. Take care to do the group
155 // leader last (this may not be necessary, but it's unclear what
156 // happens if you close the group leader out from under a group).
157 for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
158 int fd = this->*(kSlots[i].fd);
159 if (fd != -1 && fd != group_leader)
163 if (group_leader != -1)
168 Impl::init(EventMask toMeasure)
170 JS_ASSERT(group_leader == -1);
174 EventMask measured = EventMask(0);
175 struct perf_event_attr attr;
176 for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
177 if (!(toMeasure & kSlots[i].bit))
180 memset(&attr, 0, sizeof(attr));
181 attr.size = sizeof(attr);
183 // Set the type and config fields to indicate the counter we
184 // want to enable. We want read format 0, and we're not using
185 // sampling, so leave those fields unset.
186 attr.type = kSlots[i].type;
187 attr.config = kSlots[i].config;
189 // If this will be the group leader it should start off
190 // disabled. Otherwise it should start off enabled (but blocked
191 // on the group leader).
192 if (group_leader == -1)
195 // The rest of the bit fields are really poorly documented.
196 // For instance, I have *no idea* whether we should be setting
197 // the inherit, inherit_stat, or task flags. I'm pretty sure
198 // we do want to set mmap and comm, and not any of the ones I
199 // haven't mentioned.
203 int fd = sys_perf_event_open(&attr,
207 0 /* no flags presently defined */);
211 measured = EventMask(measured | kSlots[i].bit);
212 this->*(kSlots[i].fd) = fd;
213 if (group_leader == -1)
222 if (running || group_leader == -1)
226 ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0);
230 Impl::stop(PerfMeasurement* counters)
232 // This scratch buffer is to ensure that we have read all the
233 // available data, even if that's more than we expect.
234 unsigned char buf[1024];
236 if (!running || group_leader == -1)
239 ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0);
242 // read out and reset all the counter values
243 for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
244 int fd = this->*(kSlots[i].fd);
248 if (read(fd, buf, sizeof(buf)) == sizeof(uint64)) {
250 memcpy(&cur, buf, sizeof(uint64));
251 counters->*(kSlots[i].counter) += cur;
254 // Reset the counter regardless of whether the read did what
256 ioctl(fd, PERF_EVENT_IOC_RESET, 0);
260 } // anonymous namespace
265 #define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1)
267 PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure)
268 : impl(js_new<Impl>()),
269 eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure)
271 cpu_cycles(initCtr(CPU_CYCLES)),
272 instructions(initCtr(INSTRUCTIONS)),
273 cache_references(initCtr(CACHE_REFERENCES)),
274 cache_misses(initCtr(CACHE_MISSES)),
275 branch_instructions(initCtr(BRANCH_INSTRUCTIONS)),
276 branch_misses(initCtr(BRANCH_MISSES)),
277 bus_cycles(initCtr(BUS_CYCLES)),
278 page_faults(initCtr(PAGE_FAULTS)),
279 major_page_faults(initCtr(MAJOR_PAGE_FAULTS)),
280 context_switches(initCtr(CONTEXT_SWITCHES)),
281 cpu_migrations(initCtr(CPU_MIGRATIONS))
287 PerfMeasurement::~PerfMeasurement()
289 js_delete(static_cast<Impl*>(impl));
293 PerfMeasurement::start()
296 static_cast<Impl*>(impl)->start();
300 PerfMeasurement::stop()
303 static_cast<Impl*>(impl)->stop(this);
307 PerfMeasurement::reset()
309 for (int i = 0; i < NUM_MEASURABLE_EVENTS; i++) {
310 if (eventsMeasured & kSlots[i].bit)
311 this->*(kSlots[i].counter) = 0;
313 this->*(kSlots[i].counter) = -1;
318 PerfMeasurement::canMeasureSomething()
320 // Find out if the kernel implements the performance measurement
321 // API. If it doesn't, syscall(__NR_perf_event_open, ...) is
322 // guaranteed to return -1 and set errno to ENOSYS.
324 // We set up input parameters that should provoke an EINVAL error
325 // from a kernel that does implement perf_event_open, but we can't
326 // be sure it will (newer kernels might add more event types), so
327 // we have to take care to close any valid fd it might return.
329 struct perf_event_attr attr;
330 memset(&attr, 0, sizeof(attr));
331 attr.size = sizeof(attr);
332 attr.type = PERF_TYPE_MAX;
334 int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
339 return errno != ENOSYS;