From: 장지섭/동작제어Lab(SR)/Engineer/삼성전자 Date: Thu, 25 Oct 2018 10:42:59 +0000 (+0900) Subject: Fix padding bug of EmbeddingLookup (#3184) X-Git-Tag: 0.3~521 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=10626a3009ad615546f6bddb3652bf9ff0ec647d;p=platform%2Fcore%2Fml%2Fnnfw.git Fix padding bug of EmbeddingLookup (#3184) This commit fixes padding bug of EmbeddingLookup. Signed-off-by: jiseob.jang --- diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc index 5f6069c..ae740bb 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc @@ -41,8 +41,6 @@ void SimpleEmbeddingLookup::run() // type of elements of lookups is always integer const int32_t *lookups_buf = reinterpret_cast(_lookups->buffer()); - const auto values_buf = _values->buffer(); - auto output_buf = _output->buffer(); const auto lookups_info = _lookups->info(); const auto values_info = _values->info(); @@ -50,29 +48,54 @@ void SimpleEmbeddingLookup::run() // NOTE The first dimension's position is always at the end of dimensions. const auto first_dim_pos = values_info->num_dimensions() - 1; - ::arm_compute::Coordinates offset_coord{}; - for (size_t i = 0; i < first_dim_pos; ++i) - { - offset_coord.set(i, 0); - } const size_t first_dim = values_info->dimension(first_dim_pos); - const size_t copy_bytes = values_info->total_size() / first_dim; for (size_t i = 0; i < lookups_info->dimension(0); ++i) { if (lookups_buf[i] < 0 || lookups_buf[i] >= first_dim) throw std::runtime_error("Embedding Lookup: index out of bounds."); + } + + // If each strides of values and output are different, applied padding size of the two tensors are + // different, therefore, it can not be copied at once. + auto can_copy_at_once = [&]() -> bool { + const auto &values_strides = values_info->strides_in_bytes(); + const auto &output_strides = output_info->strides_in_bytes(); + + for (size_t i = 0; i < first_dim_pos; ++i) + { + if (values_strides[i] != values_strides[i]) + return false; + } - size_t idx = lookups_buf[i]; - offset_coord.set(first_dim_pos, idx); - size_t values_offset = values_info->offset_element_in_bytes(offset_coord); - offset_coord.set(first_dim_pos, i); - size_t output_offset = output_info->offset_element_in_bytes(offset_coord); + return true; + }; - unsigned char *sink_addr = output_buf + output_offset; - unsigned char *source_addr = values_buf + values_offset; - memcpy(sink_addr, source_addr, copy_bytes); + using ::arm_compute::Window; + using ::arm_compute::Iterator; + + size_t copy_bytes; + Window window; + if (can_copy_at_once()) + { + copy_bytes = values_info->total_size() / first_dim; + window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos); } + else + { + copy_bytes = values_info->dimension(0) * values_info->element_size(); + window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY); + } + + Iterator it(_output, window); + execute_window_loop(window, + [&](const ::arm_compute::Coordinates &id) { + ::arm_compute::Coordinates values_id = id; + const int idx = id[first_dim_pos]; + values_id.set(first_dim_pos, lookups_buf[idx]); + memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes); + }, + it); if (::internal::arm_compute::isGpuMode()) {