From 0dc78a5859eddedb311fa0fc009bcc5977c51578 Mon Sep 17 00:00:00 2001 From: Parichay Kapoor Date: Fri, 22 Oct 2021 16:25:08 +0900 Subject: [PATCH] [build] Add openmp dependency for improved performance This patch adds openmp dependency while building for improved performance. Signed-off-by: Parichay Kapoor --- Applications/Custom/LayerClient/jni/Android.mk | 4 ++-- Applications/KNN/jni/Android.mk | 4 ++-- Applications/LogisticRegression/jni/Android.mk | 4 ++-- Applications/MNIST/jni/Android.mk | 4 ++-- Applications/ProductRatings/jni/Android.mk | 4 ++-- Applications/ReinforcementLearning/DeepQ/jni/Android.mk | 4 ++-- Applications/Resnet/jni/Android.mk | 4 ++-- .../TransferLearning/CIFAR_Classification/jni/Android.mk | 8 ++++---- .../TransferLearning/Draw_Classification/jni/Android.mk | 4 ++-- Applications/VGG/jni/Android.mk | 4 ++-- Applications/utils/jni/Android.mk | 4 ++-- jni/Android.mk | 12 ++++++------ meson.build | 5 +++++ meson_options.txt | 8 ++++++-- nntrainer/meson.build | 3 ++- 15 files changed, 43 insertions(+), 33 deletions(-) diff --git a/Applications/Custom/LayerClient/jni/Android.mk b/Applications/Custom/LayerClient/jni/Android.mk index eb7b296..2a6330a 100644 --- a/Applications/Custom/LayerClient/jni/Android.mk +++ b/Applications/Custom/LayerClient/jni/Android.mk @@ -55,12 +55,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_layer_client_example -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp $(LOCAL_PATH)/../../pow.cpp $(LOCAL_PATH)/../../mae_loss.cpp diff --git a/Applications/KNN/jni/Android.mk b/Applications/KNN/jni/Android.mk index b0b001e..8e57ca6 100644 --- a/Applications/KNN/jni/Android.mk +++ b/Applications/KNN/jni/Android.mk @@ -68,12 +68,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/arm64-v8a/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := knn_sample -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main_sample.cpp diff --git a/Applications/LogisticRegression/jni/Android.mk b/Applications/LogisticRegression/jni/Android.mk index 73576dd..cdda9ea 100644 --- a/Applications/LogisticRegression/jni/Android.mk +++ b/Applications/LogisticRegression/jni/Android.mk @@ -42,12 +42,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_logistic -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp diff --git a/Applications/MNIST/jni/Android.mk b/Applications/MNIST/jni/Android.mk index 3f3fb7b..6bb33ac 100644 --- a/Applications/MNIST/jni/Android.mk +++ b/Applications/MNIST/jni/Android.mk @@ -35,12 +35,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_mnist -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp diff --git a/Applications/ProductRatings/jni/Android.mk b/Applications/ProductRatings/jni/Android.mk index d9c8b35..e349988 100644 --- a/Applications/ProductRatings/jni/Android.mk +++ b/Applications/ProductRatings/jni/Android.mk @@ -43,12 +43,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_product_ratings -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp diff --git a/Applications/ReinforcementLearning/DeepQ/jni/Android.mk b/Applications/ReinforcementLearning/DeepQ/jni/Android.mk index bbd9293..7d74e0e 100644 --- a/Applications/ReinforcementLearning/DeepQ/jni/Android.mk +++ b/Applications/ReinforcementLearning/DeepQ/jni/Android.mk @@ -38,12 +38,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib -fexceptions -DUSING_CUSTOM_ENV LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/arm64-v8a/ LOCAL_CXXFLAGS += -std=c++17 -DUSING_CUSTOM_ENV -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_deepq -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp $(ENVDIR)/CartPole/cartpole.cpp diff --git a/Applications/Resnet/jni/Android.mk b/Applications/Resnet/jni/Android.mk index dc2b015..1d11aa4 100644 --- a/Applications/Resnet/jni/Android.mk +++ b/Applications/Resnet/jni/Android.mk @@ -42,12 +42,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_resnet -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp cifar_dataloader.cpp diff --git a/Applications/TransferLearning/CIFAR_Classification/jni/Android.mk b/Applications/TransferLearning/CIFAR_Classification/jni/Android.mk index 98a4ac4..98f7da4 100644 --- a/Applications/TransferLearning/CIFAR_Classification/jni/Android.mk +++ b/Applications/TransferLearning/CIFAR_Classification/jni/Android.mk @@ -82,12 +82,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_classification -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp @@ -106,12 +106,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_classification_func -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main_func.cpp diff --git a/Applications/TransferLearning/Draw_Classification/jni/Android.mk b/Applications/TransferLearning/Draw_Classification/jni/Android.mk index 57b62d8..3c82957 100644 --- a/Applications/TransferLearning/Draw_Classification/jni/Android.mk +++ b/Applications/TransferLearning/Draw_Classification/jni/Android.mk @@ -81,12 +81,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_training -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp diff --git a/Applications/VGG/jni/Android.mk b/Applications/VGG/jni/Android.mk index 550f3d6..01354a4 100644 --- a/Applications/VGG/jni/Android.mk +++ b/Applications/VGG/jni/Android.mk @@ -50,12 +50,12 @@ LOCAL_ARM_NEON := true LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ LOCAL_CXXFLAGS += -std=c++17 -frtti -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_LDFLAGS += -fexceptions LOCAL_MODULE_TAGS := optional LOCAL_ARM_MODE := arm LOCAL_MODULE := nntrainer_vgg -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_SRC_FILES := main.cpp diff --git a/Applications/utils/jni/Android.mk b/Applications/utils/jni/Android.mk index 3cb6505..31205f0 100644 --- a/Applications/utils/jni/Android.mk +++ b/Applications/utils/jni/Android.mk @@ -13,12 +13,12 @@ UTILS_SRCS := $(NNTRAINER_APPLICATION)/utils/jni/bitmap_helpers.cpp UTILS_INCLUDES := $(NNTRAINER_APPLICATION)/utils/jni/includes LOCAL_ARM_NEON := true -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions LOCAL_LDFLAGS += -fuse-ld=bfd LOCAL_MODULE_TAGS := optional -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_MODULE := app_utils LOCAL_SRC_FILES := $(UTILS_SRCS) diff --git a/jni/Android.mk b/jni/Android.mk index e2b41a2..4b4a286 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -229,12 +229,12 @@ INIPARSER_SRCS := $(INIPARSER_ROOT)/src/iniparser.c \ INIPARSER_INCLUDES := $(INIPARSER_ROOT)/src LOCAL_ARM_NEON := true -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions LOCAL_LDFLAGS += -fuse-ld=bfd LOCAL_MODULE_TAGS := optional -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_MODULE := nntrainer LOCAL_SRC_FILES := $(NNTRAINER_SRCS) $(INIPARSER_SRCS) @@ -279,12 +279,12 @@ CCAPI_NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \ LOCAL_SHARED_LIBRARIES := nntrainer LOCAL_ARM_NEON := true -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions LOCAL_LDFLAGS += -fuse-ld=bfd LOCAL_MODULE_TAGS := optional -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_MODULE := ccapi-nntrainer LOCAL_SRC_FILES := $(CCAPI_NNTRAINER_SRCS) @@ -304,12 +304,12 @@ CAPI_NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \ LOCAL_SHARED_LIBRARIES := ccapi-nntrainer ml-api-inference nntrainer LOCAL_ARM_NEON := true -LOCAL_CFLAGS += -pthread -fexceptions +LOCAL_CFLAGS += -pthread -fexceptions -fopenmp LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions LOCAL_LDFLAGS += -fuse-ld=bfd LOCAL_MODULE_TAGS := optional -LOCAL_LDLIBS := -llog -landroid +LOCAL_LDLIBS := -llog -landroid -fopenmp LOCAL_MODULE := capi-nntrainer LOCAL_SRC_FILES := $(CAPI_NNTRAINER_SRCS) diff --git a/meson.build b/meson.build index d84d4cf..30062f4 100644 --- a/meson.build +++ b/meson.build @@ -116,6 +116,11 @@ if get_option('enable-blas') endif endif +openmp_dep = dummy_dep +if get_option('enable-openmp') + openmp_dep = dependency('openmp') +endif + if get_option('enable-profile') add_project_arguments('-DPROFILE=1', language:['c', 'cpp']) endif diff --git a/meson_options.txt b/meson_options.txt index cb4022e..28f180b 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,6 +1,4 @@ option('platform', type: 'combo', choices: ['none', 'tizen', 'yocto'], value: 'none') -option('enable-blas', type: 'boolean', value: true) -option('enable-cublas', type: 'boolean', value: false) option('enable-app', type: 'boolean', value: true) option('install-app', type: 'boolean', value: true) option('use_gym', type: 'boolean', value: false) @@ -26,3 +24,9 @@ option('capi-ml-common-actual', type: 'string', value: 'capi-ml-common', # test related option option('reduce-tolerance', type: 'boolean', value: true) option('enable-long-test', type: 'boolean', value: false) + +# backend options +option('enable-blas', type: 'boolean', value: true) +option('enable-cublas', type: 'boolean', value: false) +option('enable-openmp', type: 'boolean', value: true) + diff --git a/nntrainer/meson.build b/nntrainer/meson.build index 9b396c2..a2b947d 100644 --- a/nntrainer/meson.build +++ b/nntrainer/meson.build @@ -15,7 +15,8 @@ nntrainer_base_deps=[ ml_api_common_dep, libm_dep, libdl_dep, - thread_dep + thread_dep, + openmp_dep ] if get_option('platform') == 'tizen' -- 2.7.4