From: Parichay Kapoor Date: Thu, 28 Oct 2021 08:03:02 +0000 (+0900) Subject: [tensor] Optimize element wise operator functions X-Git-Tag: accepted/tizen/unified/20220323.062643~239 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=52a2c78b11dba315a4bd05860bf8af767bd71631;p=platform%2Fcore%2Fml%2Fnntrainer.git [tensor] Optimize element wise operator functions This patch optimizes element-wise operator functions for multiply, divide, add and subtract by using std::transform than regular loop when the stride along the last dimension is 1. Signed-off-by: Parichay Kapoor --- diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index ccff3f1..c868c3d 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -424,11 +424,17 @@ Tensor &Tensor::multiply(Tensor const &m, Tensor &output, */ auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, float *out_buf) { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf * *m_buf + beta * *out_buf; - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += output.strides[3]; + if (e.strides[3] == 1 && output.strides[3] == 1 && strides[3] == 1 && + beta == 0.0) { + std::transform(buf, buf + e.buffer_size, m_buf, out_buf, + std::multiplies()); + } else { + for (unsigned int i = 0; i < e.buffer_size; ++i) { + *out_buf = *buf * *m_buf + beta * *out_buf; + buf += strides[3]; + m_buf += e.strides[3]; + out_buf += output.strides[3]; + } } }; @@ -485,11 +491,16 @@ Tensor Tensor::divide(Tensor const &m) const { Tensor &Tensor::divide(Tensor const &m, Tensor &output) const { auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, float *out_buf) { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf / *m_buf; - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += strides[3]; + if (e.strides[3] == 1 && output.strides[3] == 1 && strides[3] == 1) { + std::transform(buf, buf + e.buffer_size, m_buf, out_buf, + std::divides()); + } else { + for (unsigned int i = 0; i < e.buffer_size; ++i) { + *out_buf = *buf / *m_buf; + buf += strides[3]; + m_buf += e.strides[3]; + out_buf += output.strides[3]; + } } }; @@ -536,20 +547,25 @@ Tensor Tensor::add(Tensor const &m, float const alpha) const { return this->add(m, t, alpha); } -Tensor &Tensor::add(Tensor const &m, Tensor &out, float const alpha) const { +Tensor &Tensor::add(Tensor const &m, Tensor &output, float const alpha) const { auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, float *out_buf) { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf + *m_buf * alpha; - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += strides[3]; + if (e.strides[3] == 1 && strides[3] == 1 && strides[3] == 1 && alpha == 0) { + std::transform(buf, buf + e.buffer_size, m_buf, out_buf, + std::plus()); + } else { + for (unsigned int i = 0; i < e.buffer_size; ++i) { + *out_buf = *buf + *m_buf * alpha; + buf += strides[3]; + m_buf += e.strides[3]; + out_buf += strides[3]; + } } }; - apply_broadcast(m, f, out); + apply_broadcast(m, f, output); - return out; + return output; } int Tensor::subtract_i(float const &value) {