From: 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 Date: Mon, 18 Mar 2019 05:08:11 +0000 (+0900) Subject: Fix format in contrib (#4762) X-Git-Tag: submit/tizen/20190325.013700~41 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a33b0b25155e06bbb7ad050e4798985dc900ad7c;p=platform%2Fcore%2Fml%2Fnnfw.git Fix format in contrib (#4762) Fix format in contrib except benchmark_acl (bencmark_acl is ported codes from arm compute) Signed-off-by: Hyeongseok Oh --- diff --git a/contrib/.FORMATCHECKED b/contrib/.FORMATCHECKED new file mode 100644 index 0000000..e69de29 diff --git a/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h b/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h index cf51219..405ca98 100644 --- a/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h +++ b/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h @@ -34,28 +34,32 @@ extern "C" { #define LOG_TAG "TFLITE_NATIVE" #define TFLITE_NATIVE_LOG(log_level, format, args...) \ - do { \ - switch (log_level) { \ - case ERROR: \ - LOGE(format, ## args); \ - case WARNING: \ - LOGE(format, ## args); \ - default: \ - LOGI(format, ## args); \ - } \ - } while (0) + do \ + { \ + switch (log_level) \ + { \ + case ERROR: \ + LOGE(format, ##args); \ + case WARNING: \ + LOGE(format, ##args); \ + default: \ + LOGI(format, ##args); \ + } \ + } while (0) #else // __TIZEN__ -#define LEVEL_TO_STR(level) (\ - ((level) == ERROR) ? "ERROR" : \ - ((level) == WARNING) ? "WARNING" : \ - ((level) == INFO) ? "INFO": \ - ((level) == DEBUG) ? "DEBUG" : "DEFAULT") -#define TFLITE_NATIVE_LOG(log_level, format, args...) \ - do { \ - printf("%s: %s: ", LEVEL_TO_STR(log_level), __FILE__); \ - printf(format, ## args); \ - printf("\n"); \ - }while (0) +#define LEVEL_TO_STR(level) \ + (((level) == ERROR) \ + ? "ERROR" \ + : ((level) == WARNING) \ + ? "WARNING" \ + : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT") +#define TFLITE_NATIVE_LOG(log_level, format, args...) \ + do \ + { \ + printf("%s: %s: ", LEVEL_TO_STR(log_level), __FILE__); \ + printf(format, ##args); \ + printf("\n"); \ + } while (0) #endif // __TIZEN__ #ifdef __cplusplus diff --git a/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h b/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h index 7fddb54..c5a5375 100644 --- a/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h +++ b/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h @@ -26,28 +26,27 @@ extern "C" { #endif /*__cplusplus*/ -typedef enum -{ - /** 32-bit signed integer. */ - INT32 = 1, +typedef enum { + /** 32-bit signed integer. */ + INT32 = 1, - /** 32-bit single precision floating point. */ - FLOAT32 = 2, + /** 32-bit single precision floating point. */ + FLOAT32 = 2, - /** 8-bit unsigned integer. */ - UINT8 = 3, + /** 8-bit unsigned integer. */ + UINT8 = 3, - /** 64-bit signed integer. */ - INT64 = 4 + /** 64-bit signed integer. */ + INT64 = 4 } TFLiteNativeType; -void tflite_interpreter_setNumThreads(long* interpreterHandle, int numThreads); +void tflite_interpreter_setNumThreads(long *interpreterHandle, int numThreads); -long long tflite_flatbuffermodel_BuildFromFile(char* modelPath); +long long tflite_flatbuffermodel_BuildFromFile(char *modelPath); -long long tflite_builder_interpreterBuilder(long* modelHandle); +long long tflite_builder_interpreterBuilder(long *modelHandle); -void* tflite_interpreter_run(long* interpreterHandle, void* values, int inputLength, int dataType); +void *tflite_interpreter_run(long *interpreterHandle, void *values, int inputLength, int dataType); #ifdef __cplusplus } diff --git a/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp b/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp index 4133046..0304720 100644 --- a/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp +++ b/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp @@ -24,18 +24,19 @@ int getNumBytes(TFLiteNativeType dataType) { - switch (dataType) { + switch (dataType) + { case INT32: - return 4; + return 4; case FLOAT32: - return 4; + return 4; case UINT8: - return 1; + return 1; case INT64: - return 8; + return 8; default: - return 1; - } + return 1; + } } /// @@ -43,63 +44,63 @@ int getNumBytes(TFLiteNativeType dataType) /// /// Handle of the interpreter instance. /// Number of threads. -void tflite_interpreter_setNumThreads(long* interpreterHandle, int numThreads) +void tflite_interpreter_setNumThreads(long *interpreterHandle, int numThreads) { - assert(interpreterHandle != nullptr); - tflite::Interpreter* interpreter = reinterpret_cast(*interpreterHandle); + assert(interpreterHandle != nullptr); + tflite::Interpreter *interpreter = reinterpret_cast(*interpreterHandle); - interpreter->SetNumThreads(numThreads); + interpreter->SetNumThreads(numThreads); - TFLITE_NATIVE_LOG(DEBUG, "Number of threads: %d", numThreads); - return; + TFLITE_NATIVE_LOG(DEBUG, "Number of threads: %d", numThreads); + return; } /// /// Creates a Flat Buffer Model from the given .tflite model. /// /// Path of the model. -long long -tflite_flatbuffermodel_BuildFromFile(char* modelPath) +long long tflite_flatbuffermodel_BuildFromFile(char *modelPath) { - if (modelPath == nullptr) { - TFLITE_NATIVE_LOG(ERROR, "Invalid parameter"); - return 0; - } - TFLITE_NATIVE_LOG(ERROR, "Model Path: %s", modelPath); - - if (access(modelPath, F_OK) == -1) { - TFLITE_NATIVE_LOG(ERROR, "Failed to access model [%s]", - strerror(errno)); - return 0; - } - - auto model = tflite::FlatBufferModel::BuildFromFile(modelPath); - - TFLITE_NATIVE_LOG(DEBUG, "Successfully loaded model"); - return reinterpret_cast(model.release()); + if (modelPath == nullptr) + { + TFLITE_NATIVE_LOG(ERROR, "Invalid parameter"); + return 0; + } + TFLITE_NATIVE_LOG(ERROR, "Model Path: %s", modelPath); + + if (access(modelPath, F_OK) == -1) + { + TFLITE_NATIVE_LOG(ERROR, "Failed to access model [%s]", strerror(errno)); + return 0; + } + + auto model = tflite::FlatBufferModel::BuildFromFile(modelPath); + + TFLITE_NATIVE_LOG(DEBUG, "Successfully loaded model"); + return reinterpret_cast(model.release()); } /// /// Creates an interpreter instance taking the flatbuffer model as input. /// /// Address of the flatbuffer model. -long long -tflite_builder_interpreterBuilder(long* modelHandle) +long long tflite_builder_interpreterBuilder(long *modelHandle) { - assert(modelHandle != nullptr); - tflite::FlatBufferModel* model = reinterpret_cast(*modelHandle); + assert(modelHandle != nullptr); + tflite::FlatBufferModel *model = reinterpret_cast(*modelHandle); - tflite::ops::builtin::BuiltinOpResolver resolver; - std::unique_ptr interpreter; + tflite::ops::builtin::BuiltinOpResolver resolver; + std::unique_ptr interpreter; - TfLiteStatus status = tflite::InterpreterBuilder (*model, resolver)(&interpreter); + TfLiteStatus status = tflite::InterpreterBuilder(*model, resolver)(&interpreter); - if (status != kTfLiteOk) { - TFLITE_NATIVE_LOG(DEBUG, "Cannot create interpreter"); - return 0; - } - TFLITE_NATIVE_LOG(DEBUG, "CheckPoint interpreter"); - return reinterpret_cast(interpreter.release()); + if (status != kTfLiteOk) + { + TFLITE_NATIVE_LOG(DEBUG, "Cannot create interpreter"); + return 0; + } + TFLITE_NATIVE_LOG(DEBUG, "CheckPoint interpreter"); + return reinterpret_cast(interpreter.release()); } /// @@ -109,34 +110,33 @@ tflite_builder_interpreterBuilder(long* modelHandle) /// Input values for the model. /// Length of the input. /// Data type key of the input. -void* tflite_interpreter_run(long* interpreterHandle, void* values, int inputLength, - int dataType) +void *tflite_interpreter_run(long *interpreterHandle, void *values, int inputLength, int dataType) { - assert(interpreterHandle != nullptr); - tflite::Interpreter* interpreter = reinterpret_cast(*interpreterHandle); + assert(interpreterHandle != nullptr); + tflite::Interpreter *interpreter = reinterpret_cast(*interpreterHandle); - int inputTensorIndex = interpreter->inputs()[0]; + int inputTensorIndex = interpreter->inputs()[0]; - //TODO:: input tensor size will be passed as a parameter. It is hardcoded for now. - interpreter->ResizeInputTensor(inputTensorIndex, - { 1, 224, 224, 3 }); + // TODO:: input tensor size will be passed as a parameter. It is hardcoded for now. + interpreter->ResizeInputTensor(inputTensorIndex, {1, 224, 224, 3}); - if (interpreter->AllocateTensors() != kTfLiteOk) { - TFLITE_NATIVE_LOG(ERROR, "Failed to allocate tensors!"); - return nullptr; - } + if (interpreter->AllocateTensors() != kTfLiteOk) + { + TFLITE_NATIVE_LOG(ERROR, "Failed to allocate tensors!"); + return nullptr; + } - float* inputTensorPointer = interpreter->typed_tensor(inputTensorIndex); + float *inputTensorPointer = interpreter->typed_tensor(inputTensorIndex); - int numBytes = getNumBytes((TFLiteNativeType) dataType); + int numBytes = getNumBytes((TFLiteNativeType)dataType); - memcpy(inputTensorPointer, values, inputLength * numBytes); + memcpy(inputTensorPointer, values, inputLength * numBytes); - if (interpreter->Invoke() != kTfLiteOk) { - TFLITE_NATIVE_LOG(ERROR, "Failed to invoke"); - } + if (interpreter->Invoke() != kTfLiteOk) + { + TFLITE_NATIVE_LOG(ERROR, "Failed to invoke"); + } - float* output = interpreter->typed_output_tensor(0); - return output; + float *output = interpreter->typed_output_tensor(0); + return output; } - diff --git a/contrib/benchmark_acl/.FORMATDENY b/contrib/benchmark_acl/.FORMATDENY new file mode 100644 index 0000000..e69de29 diff --git a/contrib/detection/detection.cpp b/contrib/detection/detection.cpp index 8a988cc..8fe78ca 100644 --- a/contrib/detection/detection.cpp +++ b/contrib/detection/detection.cpp @@ -24,12 +24,13 @@ #include "misc/benchmark.h" -#define CHECK_TF(e) { \ - if(!(e).ok()) \ - { \ - throw std::runtime_error{"'" #e "' FAILED"}; \ - } \ -} +#define CHECK_TF(e) \ + { \ + if (!(e).ok()) \ + { \ + throw std::runtime_error{"'" #e "' FAILED"}; \ + } \ + } int main(int argc, char **argv) { @@ -46,7 +47,7 @@ int main(int argc, char **argv) output_nodes.emplace_back(argv[argn]); } - tensorflow::Session* sess; + tensorflow::Session *sess; CHECK_TF(tensorflow::NewSession(tensorflow::SessionOptions(), &sess)); @@ -62,7 +63,7 @@ int main(int argc, char **argv) { std::chrono::milliseconds elapsed(0); - nnfw::misc::benchmark::measure(elapsed) << [&] (void) { + nnfw::misc::benchmark::measure(elapsed) << [&](void) { CHECK_TF(sess->Run({{"input_node", input}}, output_nodes, {}, &outputs)); }; diff --git a/contrib/labs/jniacl/src/io_accessor.cc b/contrib/labs/jniacl/src/io_accessor.cc index 1036607..076c93f 100644 --- a/contrib/labs/jniacl/src/io_accessor.cc +++ b/contrib/labs/jniacl/src/io_accessor.cc @@ -47,13 +47,12 @@ bool InputAccessor::access_tensor(arm_compute::ITensor &tensor) arm_compute::Window window; window.use_tensor_dimensions(tensor.info()->tensor_shape()); - execute_window_loop(window, [&](const arm_compute::Coordinates& id) - { + execute_window_loop(window, [&](const arm_compute::Coordinates &id) { *reinterpret_cast(tensor.ptr_to_element(id)) = _test_input; _test_input += _inc ? 1.0 : 0.0; - __android_log_print(ANDROID_LOG_DEBUG, "LOG_TAG", "Input %d, %d = %lf\r\n", - id.y(), id.x(), *reinterpret_cast(tensor.ptr_to_element(id))); + __android_log_print(ANDROID_LOG_DEBUG, "LOG_TAG", "Input %d, %d = %lf\r\n", id.y(), id.x(), + *reinterpret_cast(tensor.ptr_to_element(id))); }); return true; } @@ -64,10 +63,9 @@ bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor) arm_compute::Window window; window.use_tensor_dimensions(tensor.info()->tensor_shape()); - execute_window_loop(window, [&](const arm_compute::Coordinates& id) - { - __android_log_print(ANDROID_LOG_DEBUG, "Output", "Input %d, %d = %lf\r\n", - id.y(), id.x(), *reinterpret_cast(tensor.ptr_to_element(id))); + execute_window_loop(window, [&](const arm_compute::Coordinates &id) { + __android_log_print(ANDROID_LOG_DEBUG, "Output", "Input %d, %d = %lf\r\n", id.y(), id.x(), + *reinterpret_cast(tensor.ptr_to_element(id))); }); return false; // end the network } @@ -78,8 +76,7 @@ bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor) arm_compute::Window window; window.use_tensor_dimensions(tensor.info()->tensor_shape()); - execute_window_loop(window, [&](const arm_compute::Coordinates& id) - { + execute_window_loop(window, [&](const arm_compute::Coordinates &id) { *reinterpret_cast(tensor.ptr_to_element(id)) = _test_weight; _test_weight += _inc ? 1.0 : 0.0; }); @@ -92,8 +89,7 @@ bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor) arm_compute::Window window; window.use_tensor_dimensions(tensor.info()->tensor_shape()); - execute_window_loop(window, [&](const arm_compute::Coordinates& id) - { + execute_window_loop(window, [&](const arm_compute::Coordinates &id) { *reinterpret_cast(tensor.ptr_to_element(id)) = 0.0; }); return true; diff --git a/contrib/labs/jniacl/src/io_accessor.h b/contrib/labs/jniacl/src/io_accessor.h index 4033020..bc43766 100644 --- a/contrib/labs/jniacl/src/io_accessor.h +++ b/contrib/labs/jniacl/src/io_accessor.h @@ -45,49 +45,49 @@ class InputAccessor : public arm_compute::graph::ITensorAccessor { public: - InputAccessor(bool inc) : _inc(inc) { _test_input = 1.0; } - InputAccessor(InputAccessor&&) = default; + InputAccessor(bool inc) : _inc(inc) { _test_input = 1.0; } + InputAccessor(InputAccessor &&) = default; - // Inherited methods overriden: - bool access_tensor(arm_compute::ITensor& tensor) override; + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor &tensor) override; private: - bool _inc; - float _test_input; + bool _inc; + float _test_input; }; class OutputAccessor : public arm_compute::graph::ITensorAccessor { public: - OutputAccessor() = default; - OutputAccessor(OutputAccessor&&) = default; + OutputAccessor() = default; + OutputAccessor(OutputAccessor &&) = default; - // Inherited methods overriden: - bool access_tensor(arm_compute::ITensor& tensor) override; + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor &tensor) override; }; class WeightAccessor : public arm_compute::graph::ITensorAccessor { public: - WeightAccessor(bool inc) : _inc(inc) { _test_weight = 1.0; } - WeightAccessor(WeightAccessor&&) = default; + WeightAccessor(bool inc) : _inc(inc) { _test_weight = 1.0; } + WeightAccessor(WeightAccessor &&) = default; - // Inherited methods overriden: - bool access_tensor(arm_compute::ITensor& tensor) override; + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor &tensor) override; private: - bool _inc; - float _test_weight; + bool _inc; + float _test_weight; }; class BiasAccessor : public arm_compute::graph::ITensorAccessor { public: - BiasAccessor() = default; - BiasAccessor(BiasAccessor&&) = default; + BiasAccessor() = default; + BiasAccessor(BiasAccessor &&) = default; - // Inherited methods overriden: - bool access_tensor(arm_compute::ITensor& tensor) override; + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor &tensor) override; }; #endif // __IO_ACCESSOR_H__ diff --git a/contrib/labs/jniacl/src/jniacl_main.cc b/contrib/labs/jniacl/src/jniacl_main.cc index 515f287..4e5f10d 100644 --- a/contrib/labs/jniacl/src/jniacl_main.cc +++ b/contrib/labs/jniacl/src/jniacl_main.cc @@ -9,31 +9,29 @@ extern "C" JNIEXPORT jstring JNICALL Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject) { - using arm_compute::DataType; - using arm_compute::graph::Tensor; - using arm_compute::graph::TargetHint; - using arm_compute::graph::Graph; - using arm_compute::TensorInfo; - using arm_compute::TensorShape; - - arm_compute::graph::Graph graph; - TargetHint target_hint = TargetHint::OPENCL; - bool autoinc = true; - - graph << target_hint - << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32), - std::unique_ptr(new InputAccessor(autoinc))) + using arm_compute::DataType; + using arm_compute::graph::Tensor; + using arm_compute::graph::TargetHint; + using arm_compute::graph::Graph; + using arm_compute::TensorInfo; + using arm_compute::TensorShape; + + arm_compute::graph::Graph graph; + TargetHint target_hint = TargetHint::OPENCL; + bool autoinc = true; + + graph << target_hint << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32), + std::unique_ptr(new InputAccessor(autoinc))) << arm_compute::graph::ConvolutionLayer( - 3U, 3U, 1U, - std::unique_ptr(new WeightAccessor(autoinc)), - std::unique_ptr(new BiasAccessor()), - arm_compute::PadStrideInfo(1, 1, 0, 0)) + 3U, 3U, 1U, std::unique_ptr(new WeightAccessor(autoinc)), + std::unique_ptr(new BiasAccessor()), + arm_compute::PadStrideInfo(1, 1, 0, 0)) << Tensor(std::unique_ptr(new OutputAccessor())); - ; + ; - graph.run(); + graph.run(); - std::string hello = "SoftMax Run OK"; + std::string hello = "SoftMax Run OK"; - return env->NewStringUTF(hello.c_str()); + return env->NewStringUTF(hello.c_str()); } diff --git a/contrib/labs/opencl_test/src/opencl_test.cc b/contrib/labs/opencl_test/src/opencl_test.cc index 93994ae..78a20bb 100644 --- a/contrib/labs/opencl_test/src/opencl_test.cc +++ b/contrib/labs/opencl_test/src/opencl_test.cc @@ -70,73 +70,71 @@ void printDeviceInfo(int n, cl::Device &device, cl::Device &default_device) std::cout << "\n"; } - class OpenCLGpu { - public: - cl::Platform platform_; - cl::Context context_; - cl::vector devices_; - std::vector q_; - cl::Program program_; - - OpenCLGpu() - { - cl_int cl_error; - - platform_ = cl::Platform::getDefault(); +public: + cl::Platform platform_; + cl::Context context_; + cl::vector devices_; + std::vector q_; + cl::Program program_; + + OpenCLGpu() + { + cl_int cl_error; - try - { - cl_context_properties properties[3] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)platform_(), 0 - }; + platform_ = cl::Platform::getDefault(); - context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &cl_error); - } - catch (cl::Error &err) // thrown when there is no Context for this platform - { - std::cout << "\t\t No Context Found\n"; - return; - } - - devices_ = context_.getInfo(); + try + { + cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)platform_(), 0}; - for (int dev_id = 0; dev_id < devices_.size(); dev_id++) - { - cl::CommandQueue* que = new cl::CommandQueue(context_, devices_[dev_id]); - q_.emplace_back(que); - } + context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &cl_error); } - - ~OpenCLGpu() + catch (cl::Error &err) // thrown when there is no Context for this platform { - for (auto each_q : q_) - delete each_q; + std::cout << "\t\t No Context Found\n"; + return; } - void buildProgram(std::string& kernel_source_code) + devices_ = context_.getInfo(); + + for (int dev_id = 0; dev_id < devices_.size(); dev_id++) { - std::vector programStrings {kernel_source_code}; + cl::CommandQueue *que = new cl::CommandQueue(context_, devices_[dev_id]); + q_.emplace_back(que); + } + } - program_ = cl::Program(context_, programStrings); + ~OpenCLGpu() + { + for (auto each_q : q_) + delete each_q; + } - try - { - program_.build("-cl-std=CL1.2"); - } - catch (cl::Error &err) + void buildProgram(std::string &kernel_source_code) + { + std::vector programStrings{kernel_source_code}; + + program_ = cl::Program(context_, programStrings); + + try + { + program_.build("-cl-std=CL1.2"); + } + catch (cl::Error &err) + { + cl_int buildErr = CL_SUCCESS; + auto buildInfo = program_.getBuildInfo(&buildErr); + for (auto &pair : buildInfo) { - cl_int buildErr = CL_SUCCESS; - auto buildInfo = program_.getBuildInfo(&buildErr); - for (auto &pair : buildInfo) { - std::cerr << pair.second << std::endl << std::endl; - } + std::cerr << pair.second << std::endl << std::endl; } } + } }; - void checkContextMem() { cl_int cl_error; @@ -174,51 +172,46 @@ void checkContextMem() for (int i = 0; i < length; i++) input[i] = i; - cl::Buffer input_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), input.data(), &cl_error); - cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), output.data(), &cl_error); + cl::Buffer input_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T), + input.data(), &cl_error); + cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T), + output.data(), &cl_error); // compile test cl code - std::string kernel_source { - "typedef int T; \n" \ - "kernel void memory_test( \n" \ - " const int dev_id, \n" \ - " global T* input, \n" \ - " global T* output, \n" \ - " const int start_idx, \n" \ - " const int count) \n" \ - "{ \n" \ - " int input_idx = get_global_id(0); \n" \ - " if(input_idx < count) \n" \ - " { \n" \ - " int output_idx = start_idx + input_idx; \n" \ - " output[output_idx] = input[input_idx] + dev_id; \n" \ - " } \n" \ - "} \n" - }; + std::string kernel_source{"typedef int T; \n" + "kernel void memory_test( \n" + " const int dev_id, \n" + " global T* input, \n" + " global T* output, \n" + " const int start_idx, \n" + " const int count) \n" + "{ \n" + " int input_idx = get_global_id(0); \n" + " if(input_idx < count) \n" + " { \n" + " int output_idx = start_idx + input_idx; \n" + " output[output_idx] = input[input_idx] + dev_id; \n" + " } \n" + "} \n"}; gpu.buildProgram(kernel_source); try { - auto kernel_functor = cl::KernelFunctor - (gpu.program_, "memory_test"); // name should be same as cl function name + auto kernel_functor = cl::KernelFunctor( + gpu.program_, "memory_test"); // name should be same as cl function name // create a queue per device and queue a kernel job for (int dev_id = 0; dev_id < devices.size(); dev_id++) { - kernel_functor( - cl::EnqueueArgs( - *(gpu.q_[dev_id]), - cl::NDRange(items_per_device)), - (cl_int)dev_id, // dev id - input_buf, - output_buf, - (cl_int)(items_per_device * dev_id), // start index - (cl_int)(items_per_device), // count - cl_error - ); + kernel_functor(cl::EnqueueArgs(*(gpu.q_[dev_id]), cl::NDRange(items_per_device)), + (cl_int)dev_id, // dev id + input_buf, output_buf, + (cl_int)(items_per_device * dev_id), // start index + (cl_int)(items_per_device), // count + cl_error); } // sync @@ -240,8 +233,8 @@ void checkContextMem() if (output[output_idx] != input[i] + dev_id) { std::cout << "Output[" << output_idx << "] : " - << "expected = " << input[i] + dev_id - << "; actual = " << output[output_idx] << "\n"; + << "expected = " << input[i] + dev_id << "; actual = " << output[output_idx] + << "\n"; use_same_memory = false; break; } @@ -261,10 +254,11 @@ void checkContextMem() void printHelp() { - std::cout << "opencl information: \n\n"; - std::cout << "\t -h : help\n"; - std::cout << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n"; - std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n"; + std::cout << "opencl information: \n\n"; + std::cout << "\t -h : help\n"; + std::cout + << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n"; + std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n"; } #include @@ -274,24 +268,25 @@ void printHelp() #define MAX_DEVICE_NUM 8 // just for testing -int kernel_idx[MAX_DEVICE_NUM]; +int kernel_idx[MAX_DEVICE_NUM]; unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed. -unsigned char kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed -int device_num; +unsigned char + kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed +int device_num; std::mutex kernel_complete_handler_mutex; std::condition_variable wakeup_main; std::mutex wakeup_main_mutex; -void notifyKernelFinished(cl_event ev, cl_int ev_info, void * device_idx) +void notifyKernelFinished(cl_event ev, cl_int ev_info, void *device_idx) { - std::cout << "callback from device[" << *((int*)device_idx) << "] : ==> completed.\n"; + std::cout << "callback from device[" << *((int *)device_idx) << "] : ==> completed.\n"; std::unique_lock lock(kernel_complete_handler_mutex); - kernel_completed |= 0x01 << *((int*)device_idx); + kernel_completed |= 0x01 << *((int *)device_idx); if (kernel_completed == kernel_completed_flag) - wakeup_main.notify_one(); + wakeup_main.notify_one(); } void testSync() @@ -300,33 +295,32 @@ void testSync() cl_int cl_error; typedef cl_int T; - const int items_per_device = 1024*768; + const int items_per_device = 1024 * 768; const int length = items_per_device * gpu.devices_.size(); std::vector output(length, 0); - cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), output.data(), &cl_error); - - std::string kernel_source { - "kernel void test(global float* output, const int count) \n" \ - "{ \n" \ - " int idx = get_global_id(0); \n" \ - " if(idx < count) \n" \ - " { \n" \ - " float x = hypot(idx/1.111, idx*1.111); \n" \ - " for (int y = 0; y < 200; y++) \n" \ - " x = rootn(log(pown(rootn(log(pown(x, 20)), 5), 20)), 5); \n" \ - " output[idx] = x; \n" \ - " } \n" \ - "} \n" - }; + cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T), + output.data(), &cl_error); + + std::string kernel_source{"kernel void test(global float* output, const int count) \n" + "{ \n" + " int idx = get_global_id(0); \n" + " if(idx < count) \n" + " { \n" + " float x = hypot(idx/1.111, idx*1.111); \n" + " for (int y = 0; y < 200; y++) \n" + " x = rootn(log(pown(rootn(log(pown(x, 20)), 5), 20)), 5); \n" + " output[idx] = x; \n" + " } \n" + "} \n"}; gpu.buildProgram(kernel_source); try { - auto kernel_functor = cl::KernelFunctor - (gpu.program_, "test"); // name should be same as cl function name + auto kernel_functor = cl::KernelFunctor( + gpu.program_, "test"); // name should be same as cl function name // variable init cl::Event ev[MAX_DEVICE_NUM]; @@ -345,20 +339,17 @@ void testSync() // queueing with callback function for (int dev_id = 0; dev_id < gpu.devices_.size(); dev_id++) { - ev[dev_id] = kernel_functor( - cl::EnqueueArgs( - *(gpu.q_[dev_id]), - cl::NDRange(items_per_device)), - output_buf, - (cl_int)(items_per_device), // count - cl_error - ); - ev[dev_id].setCallback(CL_COMPLETE, notifyKernelFinished, (void*)(kernel_idx+dev_id)); + ev[dev_id] = kernel_functor(cl::EnqueueArgs(*(gpu.q_[dev_id]), cl::NDRange(items_per_device)), + output_buf, + (cl_int)(items_per_device), // count + cl_error); + ev[dev_id].setCallback(CL_COMPLETE, notifyKernelFinished, (void *)(kernel_idx + dev_id)); // how to check kernel execution status // // auto status = ev[dev_id].getInfo(); - // std::cout << "Event status = " << (status == CL_QUEUED ? "CL_QUEUED" : status == CL_SUBMITTED ? "CL_SUBMITTED" : status == CL_COMPLETE ? "CL_COMPLETE" : "unknown") + // std::cout << "Event status = " << (status == CL_QUEUED ? "CL_QUEUED" : status == + // CL_SUBMITTED ? "CL_SUBMITTED" : status == CL_COMPLETE ? "CL_COMPLETE" : "unknown") // << std::endl; // std::cout << "Event status code = " << status << std::endl; } @@ -366,7 +357,7 @@ void testSync() // long wait until kernels are over { std::unique_lock lk(wakeup_main_mutex); - wakeup_main.wait(lk, []{ return (kernel_completed == kernel_completed_flag); }); + wakeup_main.wait(lk, [] { return (kernel_completed == kernel_completed_flag); }); std::cout << "all devices were completed.\n"; } @@ -375,7 +366,6 @@ void testSync() { std::cerr << "error: code: " << err.err() << ", what: " << err.what() << std::endl; } - } int main(const int argc, char **argv) diff --git a/contrib/tflite_test/tflite_test.cpp b/contrib/tflite_test/tflite_test.cpp index 4ee9e5a..3a27789 100644 --- a/contrib/tflite_test/tflite_test.cpp +++ b/contrib/tflite_test/tflite_test.cpp @@ -30,12 +30,13 @@ #include #include -#define TF_ENSURE(e) { \ - if(!(e).ok()) \ - { \ - throw std::runtime_error{"'" #e "' FAILED"}; \ - } \ -} +#define TF_ENSURE(e) \ + { \ + if (!(e).ok()) \ + { \ + throw std::runtime_error{"'" #e "' FAILED"}; \ + } \ + } using namespace tflite; using namespace tflite::ops::builtin; @@ -130,7 +131,7 @@ int main(int argc, char **argv) // const std::string full_model_path{argv[2]}; - tensorflow::Session* full_sess; + tensorflow::Session *full_sess; tensorflow::GraphDef full_model; TF_ENSURE(tensorflow::NewSession(tensorflow::SessionOptions(), &full_sess)); @@ -191,7 +192,7 @@ int main(int argc, char **argv) // // Compare Output // - auto equals = [] (float lhs, float rhs) { + auto equals = [](float lhs, float rhs) { // TODO Allow users to set tolerance if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs)) { @@ -213,7 +214,8 @@ int main(int argc, char **argv) const auto element_count = count_elements(tensor); - std::cout << "Compare output #" << n << "(" << tensor->name << ", " << element_count << " elements)" << std::endl; + std::cout << "Compare output #" << n << "(" << tensor->name << ", " << element_count + << " elements)" << std::endl; for (uint32_t index = 0; index < element_count; ++index) { const auto full_value = output_nodes.at(n).flat().data()[index]; @@ -221,7 +223,8 @@ int main(int argc, char **argv) if (!equals(full_value, lite_value)) { - std::cerr << full_value << " is expected, but " << lite_value << " is obtaeind (at " << n << ":" << index << ")" << std::endl; + std::cerr << full_value << " is expected, but " << lite_value << " is obtaeind (at " << n + << ":" << index << ")" << std::endl; matched = false; } } diff --git a/contrib/uben/Convolution.cpp b/contrib/uben/Convolution.cpp index a5a4e8c..ad69f1c 100644 --- a/contrib/uben/Convolution.cpp +++ b/contrib/uben/Convolution.cpp @@ -50,14 +50,15 @@ using namespace arm_compute; namespace { -enum Layout { NCHW, NHWC }; +enum Layout +{ + NCHW, + NHWC +}; struct Initializer { - Initializer() - { - CLScheduler::get().default_init(); - } + Initializer() { CLScheduler::get().default_init(); } }; Initializer initializer; @@ -68,9 +69,9 @@ TensorInfo make_info(uint32_t N) return TensorInfo{shape, 1, DataType::F32}; } -template TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W); +template TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W); -template<> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W) +template <> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W) { TensorShape shape{W, H, C, N}; TensorInfo info{shape, 1, DataType::F32}; @@ -78,7 +79,7 @@ template<> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32 return info; } -template<> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W) +template <> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W) { TensorShape shape{C, W, H, N}; TensorInfo info{shape, 1, DataType::F32}; @@ -180,18 +181,35 @@ struct Configuration left_padding = pad_W / 2; right_padding = pad_W / 2; - if (is_odd(pad_H)) top_padding += 1; - if (is_odd(pad_W)) left_padding += 1; + if (is_odd(pad_H)) + top_padding += 1; + if (is_odd(pad_W)) + left_padding += 1; } - template TensorInfo src_info() const { return make_info(ifm_N, ifm_C, ifm_H, ifm_W); } - template TensorInfo dst_info() const { return make_info(ofm_N, ofm_C, ofm_H, ofm_W); } - template TensorInfo ker_info() const { return make_info(ker_N, ker_C, ker_H, ker_W); } + template TensorInfo src_info() const + { + return make_info(ifm_N, ifm_C, ifm_H, ifm_W); + } + template TensorInfo dst_info() const + { + return make_info(ofm_N, ofm_C, ofm_H, ofm_W); + } + template TensorInfo ker_info() const + { + return make_info(ker_N, ker_C, ker_H, ker_W); + } TensorInfo bias_info(void) const { return make_info(ker_N); } PadStrideInfo pad_stride_info(void) const { - return PadStrideInfo{horizontal_stride, vertical_stride, left_padding, right_padding, top_padding, bottom_padding, DimensionRoundingType::FLOOR}; + return PadStrideInfo{horizontal_stride, + vertical_stride, + left_padding, + right_padding, + top_padding, + bottom_padding, + DimensionRoundingType::FLOOR}; } }; @@ -205,8 +223,7 @@ struct Configuration #endif // CL_DIRECT_CONVOLUTION #if CL_DIRECT_CONVOLUTION -NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter) -{ +NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter) { CLDirectConvolutionLayer conv; // Configure @@ -222,7 +239,8 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter) ker_tensor.allocator()->init(p.ker_info()); bias_tensor.allocator()->init(p.bias_info()); - check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info())); + check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), + p.pad_stride_info())); conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info()); src_tensor.allocator()->allocate(); @@ -231,11 +249,13 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter) dst_tensor.allocator()->allocate(); // Run! - meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); }); + meter.measure([&](int) { + conv.run(); + CLScheduler::get().sync(); + }); }) -NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter) -{ +NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter) { CLDirectConvolutionLayer conv; // Configure @@ -251,7 +271,8 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter) ker_tensor.allocator()->init(p.ker_info()); bias_tensor.allocator()->init(p.bias_info()); - check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info())); + check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), + p.pad_stride_info())); conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info()); src_tensor.allocator()->allocate(); @@ -260,7 +281,10 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter) dst_tensor.allocator()->allocate(); // Run! - meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); }); + meter.measure([&](int) { + conv.run(); + CLScheduler::get().sync(); + }); }) #endif // CL_DIRECT_CONVOLUTION @@ -269,8 +293,7 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter) #endif // CL_GEMM_CONVOLUTION #if CL_GEMM_CONVOLUTION -NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter) -{ +NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter) { CLGEMMConvolutionLayer conv; // Configure @@ -286,7 +309,8 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter) ker_tensor.allocator()->init(p.ker_info()); bias_tensor.allocator()->init(p.bias_info()); - check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info())); + check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), + p.pad_stride_info())); conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info()); src_tensor.allocator()->allocate(); @@ -295,11 +319,13 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter) dst_tensor.allocator()->allocate(); // Run - meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); }); + meter.measure([&](int) { + conv.run(); + CLScheduler::get().sync(); + }); }) -NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter) -{ +NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter) { CLGEMMConvolutionLayer conv; // Configure @@ -315,7 +341,8 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter) ker_tensor.allocator()->init(p.ker_info()); bias_tensor.allocator()->init(p.bias_info()); - check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info())); + check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), + p.pad_stride_info())); conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info()); src_tensor.allocator()->allocate(); @@ -324,7 +351,10 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter) dst_tensor.allocator()->allocate(); // Run - meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); }); + meter.measure([&](int) { + conv.run(); + CLScheduler::get().sync(); + }); }) #endif // CL_GEMM_CONVOLUTION @@ -333,8 +363,7 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter) #endif // CL_WINOGRAD_CONVOLUTION #if CL_WINOGRAD_CONVOLUTION -NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer meter) -{ +NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer meter) { CLWinogradConvolutionLayer conv; // Configure @@ -350,7 +379,8 @@ NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer mete ker_tensor.allocator()->init(p.ker_info()); bias_tensor.allocator()->init(p.bias_info()); - check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info())); + check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), + p.pad_stride_info())); conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info()); src_tensor.allocator()->allocate(); @@ -359,11 +389,13 @@ NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer mete dst_tensor.allocator()->allocate(); // Run - meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); }); + meter.measure([&](int) { + conv.run(); + CLScheduler::get().sync(); + }); }) -NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer meter) -{ +NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer meter) { CLWinogradConvolutionLayer conv; // Configure @@ -379,7 +411,8 @@ NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer mete ker_tensor.allocator()->init(p.ker_info()); bias_tensor.allocator()->init(p.bias_info()); - check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info())); + check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), + p.pad_stride_info())); conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info()); src_tensor.allocator()->allocate(); @@ -388,6 +421,9 @@ NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer mete dst_tensor.allocator()->allocate(); // Run - meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); }); + meter.measure([&](int) { + conv.run(); + CLScheduler::get().sync(); + }); }) #endif // CL_WINOGRAD_CONVOLUTION