From: 장지섭/동작제어Lab(SR)/Engineer/삼성전자 Date: Thu, 25 Oct 2018 09:43:05 +0000 (+0900) Subject: Support HashtableLookup greater than 2 dimensions (#3210) X-Git-Tag: 0.3~522 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8310159b824692d20cf9fff5f0c02a47de1372d5;p=platform%2Fcore%2Fml%2Fnnfw.git Support HashtableLookup greater than 2 dimensions (#3210) This commit supports HashtableLookup greater than 2 dimensions. Signed-off-by: jiseob.jang --- diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index 2fcdd8b..9071c8b 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -4391,9 +4391,7 @@ void Planner::visit(const ::internal::tflite::op::HashtableLookup::Node &node) const auto &values_shape = values_obj.shape(); const auto &output_shape = output_obj.shape(); - /* TODO: Support dimensions greater than two */ - assert(values_shape.rank() == 2); - assert(output_shape.rank() == 2); + assert(values_shape.rank() == output_shape.rank()); assert(lookups_shape.rank() == 1); assert(keys_shape.rank() == 1); diff --git a/runtimes/pure_arm_compute/src/internal/layers/HashtableLookupLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/HashtableLookupLayer.cc index c60378f..01a6689 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/HashtableLookupLayer.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/HashtableLookupLayer.cc @@ -17,6 +17,8 @@ #include "internal/layers/HashtableLookupLayer.h" #include +#include +#include void HashtableLookupLayer::configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *keys, ::arm_compute::ITensor *values, ::arm_compute::ITensor *output, @@ -27,6 +29,7 @@ void HashtableLookupLayer::configure(::arm_compute::ITensor *lookups, ::arm_comp _values = values; _output = output; _hits = hits; + _lookup_indices.resize(lookups->info()->dimension(0), -1); } void HashtableLookupLayer::run() @@ -43,8 +46,6 @@ void HashtableLookupLayer::run() const int32_t *lookups_buf = reinterpret_cast(_lookups->buffer()); const int32_t *keys_buf = reinterpret_cast(_keys->buffer()); - const auto values_buf = _values->buffer(); - auto output_buf = _output->buffer(); uint8_t *hits_buf = reinterpret_cast(_hits->buffer()); const auto lookups_info = _lookups->info(); @@ -52,47 +53,81 @@ void HashtableLookupLayer::run() const auto keys_info = _keys->info(); const auto output_info = _output->info(); - const size_t num_rows = values_info->dimension(1); - const size_t row_bytes = values_info->total_size() / num_rows; + // NOTE The first dimension's position must be always at the end of dimensions. + const auto first_dim_pos = values_info->num_dimensions() - 1; + const size_t first_dim = values_info->dimension(first_dim_pos); - int number_of_keys = keys_info->dimension(0); + std::map key_map; + const int keys_num = keys_info->dimension(0); + for (size_t key_index = 0; key_index < keys_num; key_index++) + { + key_map[keys_buf[key_index]] = key_index; + } - for (size_t i = 0; i < lookups_info->dimension(0); ++i) + const int lookups_num = lookups_info->dimension(0); + for (size_t i = 0; i < lookups_num; ++i) { - int idx = -1; - auto lookup_value = reinterpret_cast(lookups_buf) + i; - for (int key_index = 0; key_index < number_of_keys; key_index++) + const auto lookup_value = lookups_buf[i]; + const auto it = key_map.find(lookup_value); + if (it != key_map.end()) { - auto current_key = reinterpret_cast(keys_buf) + key_index; - if (*lookup_value == *current_key) - { - idx = key_index; - break; - } + if (it->second >= first_dim) + throw std::runtime_error("HashTable Lookup: index out of bounds."); + _lookup_indices[i] = it->second; } + } - if (idx >= num_rows || idx < 0) // Miss - { - size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, idx}); - size_t row_offset_by_i = output_info->offset_element_in_bytes({0, i}); - - unsigned char *sink_addr = output_buf + row_offset_by_i; - memset(sink_addr, 0, row_bytes); + // If each strides of values and output are different, applied padding size of the two tensors are + // different, therefore, it can not be copied at once. + auto can_copy_at_once = [&]() -> bool { + const auto &values_strides = values_info->strides_in_bytes(); + const auto &output_strides = output_info->strides_in_bytes(); - hits_buf[i] = 0; - } - else // Hit + for (size_t i = 0; i < first_dim_pos; ++i) { - size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, idx}); - size_t row_offset_by_i = output_info->offset_element_in_bytes({0, i}); + if (values_strides[i] != values_strides[i]) + return false; + } - unsigned char *sink_addr = output_buf + row_offset_by_i; - unsigned char *source_addr = values_buf + row_offset_by_idx; - memcpy(sink_addr, source_addr, row_bytes); + return true; + }; - hits_buf[i] = 1; - } + using ::arm_compute::Window; + using ::arm_compute::Iterator; + using ::arm_compute::Coordinates; + + size_t copy_bytes; + Window window; + if (can_copy_at_once()) + { + copy_bytes = values_info->total_size() / first_dim; + window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos); } + else + { + copy_bytes = values_info->dimension(0) * values_info->element_size(); + window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY); + } + + Iterator it(_output, window); + execute_window_loop(window, + [&](const Coordinates &id) { + Coordinates values_id = id; + const int idx = id[first_dim_pos]; + const int lookup_index = _lookup_indices[idx]; + if (lookup_index >= 0) + { + values_id.set(first_dim_pos, lookup_index); + memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes); + hits_buf[lookup_index] = 1; + } + else + { + memset(it.ptr(), 0, copy_bytes); + hits_buf[lookup_index] = 0; + } + }, + it); if (::internal::arm_compute::isGpuMode()) { diff --git a/runtimes/pure_arm_compute/src/internal/layers/HashtableLookupLayer.h b/runtimes/pure_arm_compute/src/internal/layers/HashtableLookupLayer.h index e6bff19..053bbd8 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/HashtableLookupLayer.h +++ b/runtimes/pure_arm_compute/src/internal/layers/HashtableLookupLayer.h @@ -20,6 +20,7 @@ #include "internal/arm_compute.h" #include #include +#include class HashtableLookupLayer : public ::arm_compute::IFunction { @@ -42,6 +43,7 @@ private: ::arm_compute::ITensor *_values; ::arm_compute::ITensor *_output; ::arm_compute::ITensor *_hits; + std::vector _lookup_indices; }; #endif /*__HASHTABLE_LOOKUP_H__ */