Add openmp parellelism to l2norm()

author Jihoon Lee <jhoon.it.lee@samsung.com>

Thu, 11 Jun 2020 08:59:26 +0000 (17:59 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Fri, 12 Jun 2020 02:41:02 +0000 (11:41 +0900)
author Jihoon Lee <jhoon.it.lee@samsung.com>
Thu, 11 Jun 2020 08:59:26 +0000 (17:59 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Fri, 12 Jun 2020 02:41:02 +0000 (11:41 +0900)
diff --git a/jni/Android.mk b/jni/Android.mk

index 23c5ee3..6c0645b 100644 (file)
--- a/jni/Android.mk
+++ b/jni/Android.mk
@@ -47,7 +47,7 @@ INIPARSER_INCLUDES := $(INIPARSER_ROOT)/src
  LOCAL_ARM_NEON      := true
  LOCAL_CFLAGS        += -pthread -fopenmp -fexceptions
  LOCAL_CXXFLAGS      += -std=c++11 -frtti -fexceptions
-LOCAL_LDFLAGS       += -fuse-ld=bfd
+LOCAL_LDFLAGS       += -fuse-ld=bfd -fopenmp
  LOCAL_MODULE_TAGS   := optional
  
  LOCAL_LDLIBS        := -llog
diff --git a/meson.build b/meson.build

index edf7fc4..6ae82df 100644 (file)
--- a/meson.build
+++ b/meson.build
@@ -77,6 +77,10 @@ nntrainer_conf.set('EXEC_PREFIX', nntrainer_bindir)
  nntrainer_conf.set('LIB_INSTALL_DIR', nntrainer_libdir)
  nntrainer_conf.set('INCLUDE_INSTALL_DIR', nntrainer_includedir)
  
+dummy_dep = dependency('', required: false)
+openmp_dep = dependency('openmp')
+
+blas_dep = dummy_dep
  # Dependencies
  if get_option('enable-blas')
    add_project_arguments('-DUSE_BLAS=1', language:['c','cpp'])
diff --git a/nntrainer/meson.build b/nntrainer/meson.build

index a9bff85..49d68e3 100644 (file)
--- a/nntrainer/meson.build
+++ b/nntrainer/meson.build
@@ -2,6 +2,7 @@ nntrainer_inc = include_directories('./include')
  
  # Dependencies
  nntrainer_base_deps=[
+  openmp_dep,
    blas_dep,
    iniparser_dep,
    libm_dep,
diff --git a/nntrainer/src/tensor.cpp b/nntrainer/src/tensor.cpp

index 1a66e5b..57abf47 100644 (file)
--- a/nntrainer/src/tensor.cpp
+++ b/nntrainer/src/tensor.cpp
@@ -435,11 +435,10 @@ Tensor Tensor::sum() const {
  #else
    unsigned int i;
    for (k = 0; k < dim.batch(); ++k) {
-
      unsigned int id = k * dim.getFeatureLen();
-    ret.data[id] = 0.0;
+    ret.data[k] = 0.0;
      for (i = 0; i < dim.getFeatureLen(); ++i) {
-      ret.data[id] += data[id + i];
+      ret.data[k] += data[id + i];
      }
    }
  #endif
@@ -778,7 +777,7 @@ Tensor &Tensor::copy(const Tensor &from) {
  #ifdef USE_BLAS
      cblas_scopy(dim.getDataLen(), from.data.data(), 1, this->data.data(), 1);
  #else
-    for (int i = 0; i < dim.getDataLen(); ++i)
+    for (unsigned int i = 0; i < dim.getDataLen(); ++i)
        data[i] = from.data[i];
  #endif
    }
@@ -845,8 +844,9 @@ float Tensor::l2norm() const {
    float tmp;
    unsigned int len = dim.getDataLen();
  
+  #pragma omp parallel for private(tmp) reduction(+:sum)
    for (unsigned int i = 0; i < len; i++) {
-    tmp = this->data[i];
+    tmp = data[i];
      sum += tmp * tmp;
    }
author	Jihoon Lee <jhoon.it.lee@samsung.com>
	Thu, 11 Jun 2020 08:59:26 +0000 (17:59 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Fri, 12 Jun 2020 02:41:02 +0000 (11:41 +0900)
jni/Android.mk		patch \| blob \| history
meson.build		patch \| blob \| history
nntrainer/meson.build		patch \| blob \| history
nntrainer/src/tensor.cpp		patch \| blob \| history