From 4d138307f4b532e5cd2e3164f18b926fd0434281 Mon Sep 17 00:00:00 2001 From: Jedrzej Hajduczenia Date: Tue, 13 Oct 2020 15:40:30 +0200 Subject: [PATCH] [IE CLDNN] Revert fix for profiling of multiple kernel implementations (#2219) (#2595) --- .../thirdparty/clDNN/src/gpu/ocl_base_event.cpp | 87 +++++++--------------- .../thirdparty/clDNN/src/gpu/ocl_base_event.h | 26 +------ .../thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp | 30 ++------ .../thirdparty/clDNN/src/gpu/ocl_toolkit.cpp | 15 +--- .../thirdparty/clDNN/src/gpu/primitive_gpu_base.h | 13 ++-- 5 files changed, 44 insertions(+), 127 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.cpp index 51ba350..915eb79 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.cpp @@ -121,73 +121,40 @@ bool base_events::is_set_impl() { } bool base_events::get_profiling_info_impl(std::list& info) { - - // For every profiling period (i.e. submission / starting / executing), - // the goal is to sum up all disjoint durations of its projection on the time axis - - std::map>> all_durations; + cl_ulong min_queue = CL_ULONG_MAX; + cl_ulong min_sub = CL_ULONG_MAX; + cl_ulong min_start = CL_ULONG_MAX; + uint64_t execution_time = 0; for (size_t i = 0; i < _events.size(); i++) { auto be = dynamic_cast(_events[i].get()); if (!is_event_profiled(be->_event)) continue; - for (auto& period : profiling_periods) { - cl_ulong ev_start; - cl_ulong ev_end; - be->_event.getProfilingInfo(period.start, &ev_start); - be->_event.getProfilingInfo(period.stop, &ev_end); - auto ev_duration = std::make_pair(static_cast(ev_start), - static_cast(ev_end)); - - auto& durations = all_durations[period.name]; - bool ev_duration_merged = false; - auto it = durations.begin(); - - while (it != durations.end()) { - auto& duration = *it; - if ((duration.second >= ev_duration.first) && (duration.first <= ev_duration.second)) { - if ((duration.first == ev_duration.first) && (duration.second == ev_duration.second)) { - if (!ev_duration_merged) { - ev_duration_merged = true; - break; - } else { - it = durations.erase(it); - } - } else { - if (!ev_duration_merged) { - duration.first = std::min(duration.first, ev_duration.first); - duration.second = std::max(duration.second, ev_duration.second); - ev_duration = duration; - ev_duration_merged = true; - it++; - } else { - if (duration.second > ev_duration.second) { - ev_duration.second = duration.second; - it--; - it->second = ev_duration.second; - it++; - } - it = durations.erase(it); - } - } - } else { - it++; - } - } - - if (!ev_duration_merged) { - durations.insert(it, ev_duration); - } - } - } + cl_ulong curr_queue; + cl_ulong curr_sub; + cl_ulong curr_start; + cl_ulong curr_end; + be->_event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &curr_queue); + be->_event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &curr_sub); + be->_event.getProfilingInfo(CL_PROFILING_COMMAND_START, &curr_start); + be->_event.getProfilingInfo(CL_PROFILING_COMMAND_END, &curr_end); - for (auto& period : profiling_periods) { - unsigned long long sum = 0; - for (auto& duration : all_durations[period.name]) { - sum += (duration.second - duration.first); - } - info.push_back(get_profiling_interval(period.name, 0, sum)); + if (curr_queue < min_queue) + min_queue = curr_queue; + + if (curr_sub < min_sub) + min_sub = curr_sub; + + if (curr_start < min_start) + min_start = curr_start; + + execution_time += curr_end - curr_start; } + + info.push_back(get_profiling_interval(profiling_periods[0].name, min_sub, min_queue)); + info.push_back(get_profiling_interval(profiling_periods[1].name, min_start, min_sub)); + info.push_back(get_profiling_interval(profiling_periods[2].name, 0, execution_time)); + return true; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h index ffccbce..39e6f04 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h @@ -77,17 +77,7 @@ protected: struct base_events : virtual public ocl_base_event { public: base_events(std::shared_ptr ctx, std::vector const& ev) - : ocl_base_event(0, true), _ctx(ctx) { - for (size_t i = 0; i < ev.size(); i++) { - auto multiple_events = dynamic_cast(ev[i].get()); - if (multiple_events) { - for (size_t j = 0; j < multiple_events->_events.size(); j++) { - _events.push_back(multiple_events->_events[j]); - } - } else { - _events.push_back(ev[i]); - } - } + : ocl_base_event(0, true), _ctx(ctx), _events(ev) { set_queue_stamp(); } @@ -96,28 +86,18 @@ public: void attach_events(const std::vector& ev) { if (_attached) throw std::runtime_error("Trying to attach events to valid event object."); - for (size_t i = 0; i < ev.size(); i++) { - auto multiple_events = dynamic_cast(ev[i].get()); - if (multiple_events) { - for (size_t j = 0; j < multiple_events->_events.size(); j++) { - _events.push_back(multiple_events->_events[j]); - } - } else { - _events.push_back(ev[i]); - } - } + _events = ev; _attached = true; set_queue_stamp(); } std::shared_ptr get_context() const { return _ctx; } - const std::vector& get_events() { return _events; }; private: void set_queue_stamp() { uint64_t _queue_stamp_max = 0; for (size_t i = 0; i < _events.size(); i++) { - auto* _base_event = dynamic_cast(_events[i].get()); + auto* _base_event = dynamic_cast(_events[i].get()); if (_base_event->get_queue_stamp() > _queue_stamp_max) _queue_stamp_max = _base_event->get_queue_stamp(); } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp index 8e3b238..18bc5b1 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp @@ -79,18 +79,9 @@ event_impl::ptr gpu_queue::enqueue_kernel(kernels_cache::kernel_type const& kern std::vector dep_events; auto dep_events_ptr = &dep_events; if (!context()->get_configuration().host_out_of_order) { - for (auto& dep : deps) { - auto multiple_events = dynamic_cast(dep.get()); - if (multiple_events) { - for (size_t i = 0; i < multiple_events->get_events().size(); i++) { - if (auto base_ev = dynamic_cast(multiple_events->get_events()[i].get())) - dep_events.push_back(base_ev->get()); - } - } else { - if (auto base_ev = dynamic_cast(dep.get())) - dep_events.push_back(base_ev->get()); - } - } + for (auto& dep : deps) + if (auto ocl_ev = dynamic_cast(dep.get())) + dep_events.push_back(ocl_ev->get()); } else { dep_events_ptr = nullptr; @@ -122,18 +113,9 @@ event_impl::ptr gpu_queue::enqueue_marker(std::vector const& de cl::Event ret_ev; if (!enabled_single_kernel) { std::vector dep_events; - for (auto& dep : deps) { - auto multiple_events = dynamic_cast(dep.get()); - if (multiple_events) { - for (size_t i = 0; i < multiple_events->get_events().size(); i++) { - if (auto base_ev = dynamic_cast(multiple_events->get_events()[i].get())) - dep_events.push_back(base_ev->get()); - } - } else { - if (auto base_ev = dynamic_cast(dep.get())) - dep_events.push_back(base_ev->get()); - } - } + for (auto& dep : deps) + if (auto ocl_ev = dynamic_cast(dep.get())) + dep_events.push_back(ocl_ev->get()); try { _command_queue.enqueueMarkerWithWaitList(&dep_events, &ret_ev); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp index 5299d58..0d1f2d3 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp @@ -229,18 +229,9 @@ void gpu_toolkit::release_pending_memory(uint32_t queue_id) { get_command_queue( void gpu_toolkit::wait_for_events(std::vector const& events) { std::vector clevents; - for (auto& ev : events) { - auto multiple_events = dynamic_cast(ev.get()); - if (multiple_events) { - for (size_t i = 0; i < multiple_events->get_events().size(); i++) { - if (auto base_ev = dynamic_cast(multiple_events->get_events()[i].get())) - clevents.push_back(base_ev->get()); - } - } else { - if (auto base_ev = dynamic_cast(ev.get())) - clevents.push_back(base_ev->get()); - } - } + for (auto& ev : events) + if (auto ocl_ev = dynamic_cast(ev.get())) + clevents.push_back(ocl_ev->get()); try { cl::WaitForEvents(clevents); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h index d5f7fa6..0c01641 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h @@ -107,11 +107,12 @@ protected: virtual bool get_depthwise_sep_opt() const { return false; } event_impl::ptr aggregate_events(const std::vector& events, - uint32_t net_id) const { + uint32_t net_id, + bool group = false) const { if (events.size() == 1) return events[0]; - if (events.size() > 1) + if (group) return _outer.get_program().get_engine().get_context()->group_events(net_id, events); return events_waiter(_outer.get_program().get_engine().get_context()).run(net_id, events); @@ -160,7 +161,6 @@ protected: } std::vector tmp_events(events); - std::vector all_events; // TODO - split should be handle in kernel selector by providing multiple kernels. auto split = get_split(); @@ -181,16 +181,13 @@ protected: auto event = _kernels[k].run(net_id, _kernel_data.kernels[k], tmp_events); new_events.push_back(event); - all_events.push_back(event); } tmp_events = new_events; } - if ((all_events.size() == 0) && (tmp_events.size() > 0)) - return aggregate_events(tmp_events, net_id); - - return aggregate_events(all_events, net_id); + bool group_events = split > 1 ? true : false; + return aggregate_events(tmp_events, net_id, group_events); } }; -- 2.7.4