Compute Library  18.05
OpenCLTimer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2018 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "OpenCLTimer.h"
25 
26 #include "../Framework.h"
27 #include "../Utils.h"
28 
31 
32 #ifndef ARM_COMPUTE_CL
33 #error "You can't use OpenCLTimer without OpenCL"
34 #endif /* ARM_COMPUTE_CL */
35 
36 namespace arm_compute
37 {
38 namespace test
39 {
40 namespace framework
41 {
42 std::string OpenCLTimer::id() const
43 {
44  return "OpenCLTimer";
45 }
46 
48  : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix(), _timer_enabled(false)
49 {
50  auto q = CLScheduler::get().queue();
51  cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
52  if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
53  {
54  CLScheduler::get().set_queue(cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE));
55  }
56 
57  switch(scale_factor)
58  {
59  case ScaleFactor::NONE:
60  _scale_factor = 1.f;
61  _unit = "ns";
62  break;
64  _scale_factor = 1000.f;
65  _unit = "us";
66  break;
68  _scale_factor = 1000000.f;
69  _unit = "ms";
70  break;
72  _scale_factor = 1000000000.f;
73  _unit = "s";
74  break;
75  default:
76  ARM_COMPUTE_ERROR("Invalid scale");
77  }
78 }
79 
81 {
82  // Start intercepting enqueues:
83  ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
84  ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
85  _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
86  _real_graph_function = graph::TaskExecutor::get().execute_function;
87  auto interceptor = [this](
88  cl_command_queue command_queue,
89  cl_kernel kernel,
90  cl_uint work_dim,
91  const size_t *gwo,
92  const size_t *gws,
93  const size_t *lws,
94  cl_uint num_events_in_wait_list,
95  const cl_event * event_wait_list,
96  cl_event * event)
97  {
98  if(this->_timer_enabled)
99  {
100  ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
101  ARM_COMPUTE_UNUSED(event);
102 
103  OpenCLTimer::kernel_info info;
104  cl::Kernel cpp_kernel(kernel, true);
105  std::stringstream ss;
106  ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
107  if(gws != nullptr)
108  {
109  ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
110  }
111  if(lws != nullptr)
112  {
113  ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
114  }
115  info.name = ss.str();
116  cl_event tmp;
117  cl_int retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
118  info.event = tmp;
119  this->_kernels.push_back(std::move(info));
120  return retval;
121  }
122  else
123  {
124  return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
125  }
126  };
127 
128  // Start intercepting tasks:
129  auto task_interceptor = [this](graph::ExecutionTask & task)
130  {
131  if(task.node != nullptr && !task.node->name().empty())
132  {
133  this->_prefix = task.node->name() + "/";
134  }
135  else
136  {
137  this->_prefix = "";
138  }
139  this->_real_graph_function(task);
140  this->_prefix = "";
141  };
142 
144  graph::TaskExecutor::get().execute_function = task_interceptor;
145 }
146 
148 {
149  _kernels.clear();
150  _timer_enabled = true;
151 }
153 {
154  _timer_enabled = false;
155 }
156 
158 {
159  // Restore real function
160  CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
161  graph::TaskExecutor::get().execute_function = _real_graph_function;
162  _real_graph_function = nullptr;
163  _real_function = nullptr;
164 }
165 
167 {
169  unsigned int kernel_number = 0;
170  for(auto kernel : _kernels)
171  {
172  cl_ulong start = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
173  cl_ulong end = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
174 
175  measurements.emplace(kernel.name + " #" + support::cpp11::to_string(kernel_number++), Measurement((end - start) / _scale_factor, _unit));
176  }
177 
178  return measurements;
179 }
180 } // namespace framework
181 } // namespace test
182 } // namespace arm_compute
#define ARM_COMPUTE_ERROR(...)
Print the given message then throw an std::runtime_error.
Definition: Error.h:260
static CLSymbols & get()
Get the static instance of CLSymbols.
void start() override
Start measuring.
std::string to_string(T &&value)
Convert integer and float values to string.
Generic measurement that stores values as either double or long long int.
Definition: Measurement.h:41
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:328
src info() -> set_format(Format::S16)
This file contains all available output stages for GEMMLowp on OpenCL.
std::string id() const override
Identifier for the instrument.
Definition: OpenCLTimer.cpp:42
std::function< decltype(clEnqueueNDRangeKernel)> clEnqueueNDRangeKernel_ptr
Definition: OpenCL.h:85
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:159
static CLScheduler & get()
Access the scheduler singleton.
void set_queue(cl::CommandQueue queue)
Accessor to set the CL command queue to be used by the scheduler.
Definition: CLScheduler.h:175
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.h:156
void test_stop() override
End of the test.
void stop() override
Stop measuring.
MeasurementsMap measurements() const override
Return the latest measurements.
void test_start() override
Start of the test.
Definition: OpenCLTimer.cpp:80
std::map< std::string, Measurement > MeasurementsMap
Map of measurements.
Definition: Instrument.h:109
static TaskExecutor & get()
Task executor accessor.
std::function< decltype(execute_task)> execute_function
Function that is responsible for executing tasks.
Definition: Workload.h:63
#define ARM_COMPUTE_ERROR_ON_MSG(cond,...)
Definition: Error.h:319
OpenCLTimer(ScaleFactor scale_factor)
Construct an OpenCL timer.
Definition: OpenCLTimer.cpp:47