Fix format in contrib (#4762)
author오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Mon, 18 Mar 2019 05:08:11 +0000 (14:08 +0900)
committer이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
Mon, 18 Mar 2019 05:08:11 +0000 (14:08 +0900)
Fix format in contrib except benchmark_acl
(bencmark_acl is ported codes from arm compute)

Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
12 files changed:
contrib/.FORMATCHECKED [new file with mode: 0644]
contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp
contrib/benchmark_acl/.FORMATDENY [new file with mode: 0644]
contrib/detection/detection.cpp
contrib/labs/jniacl/src/io_accessor.cc
contrib/labs/jniacl/src/io_accessor.h
contrib/labs/jniacl/src/jniacl_main.cc
contrib/labs/opencl_test/src/opencl_test.cc
contrib/tflite_test/tflite_test.cpp
contrib/uben/Convolution.cpp

diff --git a/contrib/.FORMATCHECKED b/contrib/.FORMATCHECKED
new file mode 100644 (file)
index 0000000..e69de29
index cf51219..405ca98 100644 (file)
@@ -34,28 +34,32 @@ extern "C" {
 #define LOG_TAG "TFLITE_NATIVE"
 
 #define TFLITE_NATIVE_LOG(log_level, format, args...) \
-       do { \
-               switch (log_level) { \
-               case ERROR: \
-                       LOGE(format, ## args); \
-               case WARNING: \
-                       LOGE(format, ## args); \
-               default: \
-                       LOGI(format, ## args); \
-               } \
-       } while (0)
+  do                                                  \
+  {                                                   \
+    switch (log_level)                                \
+    {                                                 \
+      case ERROR:                                     \
+        LOGE(format, ##args);                         \
+      case WARNING:                                   \
+        LOGE(format, ##args);                         \
+      default:                                        \
+        LOGI(format, ##args);                         \
+    }                                                 \
+  } while (0)
 #else // __TIZEN__
-#define LEVEL_TO_STR(level) (\
-       ((level) == ERROR) ? "ERROR" : \
-       ((level) == WARNING) ? "WARNING" : \
-       ((level) == INFO) ? "INFO": \
-       ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
-#define TFLITE_NATIVE_LOG(log_level, format, args...) \
-       do { \
-               printf("%s: %s: ", LEVEL_TO_STR(log_level), __FILE__); \
-               printf(format, ## args); \
-               printf("\n"); \
-       }while (0)
+#define LEVEL_TO_STR(level)   \
+  (((level) == ERROR)         \
+       ? "ERROR"              \
+       : ((level) == WARNING) \
+             ? "WARNING"      \
+             : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
+#define TFLITE_NATIVE_LOG(log_level, format, args...)      \
+  do                                                       \
+  {                                                        \
+    printf("%s: %s: ", LEVEL_TO_STR(log_level), __FILE__); \
+    printf(format, ##args);                                \
+    printf("\n");                                          \
+  } while (0)
 #endif // __TIZEN__
 
 #ifdef __cplusplus
index 7fddb54..c5a5375 100644 (file)
 extern "C" {
 #endif /*__cplusplus*/
 
-typedef enum
-{
-    /** 32-bit signed integer. */
-    INT32 = 1,
+typedef enum {
+  /** 32-bit signed integer. */
+  INT32 = 1,
 
-    /** 32-bit single precision floating point. */
-    FLOAT32 = 2,
+  /** 32-bit single precision floating point. */
+  FLOAT32 = 2,
 
-    /** 8-bit unsigned integer. */
-    UINT8 = 3,
+  /** 8-bit unsigned integer. */
+  UINT8 = 3,
 
-    /** 64-bit signed integer. */
-    INT64 = 4
+  /** 64-bit signed integer. */
+  INT64 = 4
 } TFLiteNativeType;
 
-void tflite_interpreter_setNumThreads(longinterpreterHandle, int numThreads);
+void tflite_interpreter_setNumThreads(long *interpreterHandle, int numThreads);
 
-long long tflite_flatbuffermodel_BuildFromFile(charmodelPath);
+long long tflite_flatbuffermodel_BuildFromFile(char *modelPath);
 
-long long tflite_builder_interpreterBuilder(longmodelHandle);
+long long tflite_builder_interpreterBuilder(long *modelHandle);
 
-void* tflite_interpreter_run(long* interpreterHandle, void* values, int inputLength, int dataType);
+void *tflite_interpreter_run(long *interpreterHandle, void *values, int inputLength, int dataType);
 
 #ifdef __cplusplus
 }
index 4133046..0304720 100644 (file)
 
 int getNumBytes(TFLiteNativeType dataType)
 {
-    switch (dataType) {
+  switch (dataType)
+  {
     case INT32:
-        return 4;
+      return 4;
     case FLOAT32:
-        return 4;
+      return 4;
     case UINT8:
-        return 1;
+      return 1;
     case INT64:
-        return 8;
+      return 8;
     default:
-        return 1;
-    }
+      return 1;
+  }
 }
 
 /// <summary>
@@ -43,63 +44,63 @@ int getNumBytes(TFLiteNativeType dataType)
 /// </summary>
 /// <param name="interpreterHandle">Handle of the interpreter instance.</param>
 /// <param name="numThreads">Number of threads.</param>
-void tflite_interpreter_setNumThreads(longinterpreterHandle, int numThreads)
+void tflite_interpreter_setNumThreads(long *interpreterHandle, int numThreads)
 {
-    assert(interpreterHandle != nullptr);
-    tflite::Interpreter* interpreter = reinterpret_cast<tflite::Interpreter*>(*interpreterHandle);
+  assert(interpreterHandle != nullptr);
+  tflite::Interpreter *interpreter = reinterpret_cast<tflite::Interpreter *>(*interpreterHandle);
 
-    interpreter->SetNumThreads(numThreads);
+  interpreter->SetNumThreads(numThreads);
 
-    TFLITE_NATIVE_LOG(DEBUG, "Number of threads: %d", numThreads);
-    return;
+  TFLITE_NATIVE_LOG(DEBUG, "Number of threads: %d", numThreads);
+  return;
 }
 
 /// <summary>
 /// Creates a Flat Buffer Model from the given .tflite model.
 /// </summary>
 /// <param name="modelPath">Path of the model.</param>
-long long
-tflite_flatbuffermodel_BuildFromFile(char* modelPath)
+long long tflite_flatbuffermodel_BuildFromFile(char *modelPath)
 {
-    if (modelPath == nullptr) {
-        TFLITE_NATIVE_LOG(ERROR, "Invalid parameter");
-        return 0;
-    }
-    TFLITE_NATIVE_LOG(ERROR, "Model Path: %s", modelPath);
-
-    if (access(modelPath, F_OK) == -1) {
-        TFLITE_NATIVE_LOG(ERROR, "Failed to access model [%s]",
-            strerror(errno));
-        return 0;
-    }
-
-    auto model = tflite::FlatBufferModel::BuildFromFile(modelPath);
-
-    TFLITE_NATIVE_LOG(DEBUG, "Successfully loaded model");
-    return reinterpret_cast<long>(model.release());
+  if (modelPath == nullptr)
+  {
+    TFLITE_NATIVE_LOG(ERROR, "Invalid parameter");
+    return 0;
+  }
+  TFLITE_NATIVE_LOG(ERROR, "Model Path: %s", modelPath);
+
+  if (access(modelPath, F_OK) == -1)
+  {
+    TFLITE_NATIVE_LOG(ERROR, "Failed to access model [%s]", strerror(errno));
+    return 0;
+  }
+
+  auto model = tflite::FlatBufferModel::BuildFromFile(modelPath);
+
+  TFLITE_NATIVE_LOG(DEBUG, "Successfully loaded model");
+  return reinterpret_cast<long>(model.release());
 }
 
 /// <summary>
 /// Creates an interpreter instance taking the flatbuffer model as input.
 /// </summary>
 /// <param name="modelHandle">Address of the flatbuffer model.</param>
-long long
-tflite_builder_interpreterBuilder(long* modelHandle)
+long long tflite_builder_interpreterBuilder(long *modelHandle)
 {
-    assert(modelHandle != nullptr);
-    tflite::FlatBufferModel* model = reinterpret_cast<tflite::FlatBufferModel*>(*modelHandle);
+  assert(modelHandle != nullptr);
+  tflite::FlatBufferModel *model = reinterpret_cast<tflite::FlatBufferModel *>(*modelHandle);
 
-    tflite::ops::builtin::BuiltinOpResolver resolver;
-    std::unique_ptr<tflite::Interpreter> interpreter;
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  std::unique_ptr<tflite::Interpreter> interpreter;
 
-    TfLiteStatus status = tflite::InterpreterBuilder (*model, resolver)(&interpreter);
+  TfLiteStatus status = tflite::InterpreterBuilder(*model, resolver)(&interpreter);
 
-    if (status != kTfLiteOk) {
-        TFLITE_NATIVE_LOG(DEBUG, "Cannot create interpreter");
-        return 0;
-    }
-    TFLITE_NATIVE_LOG(DEBUG, "CheckPoint interpreter");
-    return reinterpret_cast<long>(interpreter.release());
+  if (status != kTfLiteOk)
+  {
+    TFLITE_NATIVE_LOG(DEBUG, "Cannot create interpreter");
+    return 0;
+  }
+  TFLITE_NATIVE_LOG(DEBUG, "CheckPoint interpreter");
+  return reinterpret_cast<long>(interpreter.release());
 }
 
 /// <summary>
@@ -109,34 +110,33 @@ tflite_builder_interpreterBuilder(long* modelHandle)
 /// <param name="values">Input values for the model.</param>
 /// <param name="inpLength">Length of the input.</param>
 /// <param name="dataType">Data type key of the input.</param>
-void* tflite_interpreter_run(long* interpreterHandle, void* values, int inputLength,
-    int dataType)
+void *tflite_interpreter_run(long *interpreterHandle, void *values, int inputLength, int dataType)
 {
-    assert(interpreterHandle != nullptr);
-    tflite::Interpreter* interpreter = reinterpret_cast<tflite::Interpreter*>(*interpreterHandle);
+  assert(interpreterHandle != nullptr);
+  tflite::Interpreter *interpreter = reinterpret_cast<tflite::Interpreter *>(*interpreterHandle);
 
-    int inputTensorIndex = interpreter->inputs()[0];
+  int inputTensorIndex = interpreter->inputs()[0];
 
-    //TODO:: input tensor size will be passed as a parameter. It is hardcoded for now.
-    interpreter->ResizeInputTensor(inputTensorIndex,
-        { 1, 224, 224, 3 });
+  // TODO:: input tensor size will be passed as a parameter. It is hardcoded for now.
+  interpreter->ResizeInputTensor(inputTensorIndex, {1, 224, 224, 3});
 
-    if (interpreter->AllocateTensors() != kTfLiteOk) {
-        TFLITE_NATIVE_LOG(ERROR, "Failed to allocate tensors!");
-        return nullptr;
-    }
+  if (interpreter->AllocateTensors() != kTfLiteOk)
+  {
+    TFLITE_NATIVE_LOG(ERROR, "Failed to allocate tensors!");
+    return nullptr;
+  }
 
-    float* inputTensorPointer = interpreter->typed_tensor<float>(inputTensorIndex);
+  float *inputTensorPointer = interpreter->typed_tensor<float>(inputTensorIndex);
 
-    int numBytes = getNumBytes((TFLiteNativeType) dataType);
+  int numBytes = getNumBytes((TFLiteNativeType)dataType);
 
-    memcpy(inputTensorPointer, values, inputLength * numBytes);
+  memcpy(inputTensorPointer, values, inputLength * numBytes);
 
-    if (interpreter->Invoke() != kTfLiteOk) {
-        TFLITE_NATIVE_LOG(ERROR, "Failed to invoke");
-    }
+  if (interpreter->Invoke() != kTfLiteOk)
+  {
+    TFLITE_NATIVE_LOG(ERROR, "Failed to invoke");
+  }
 
-    float* output = interpreter->typed_output_tensor<float>(0);
-    return output;
+  float *output = interpreter->typed_output_tensor<float>(0);
+  return output;
 }
-
diff --git a/contrib/benchmark_acl/.FORMATDENY b/contrib/benchmark_acl/.FORMATDENY
new file mode 100644 (file)
index 0000000..e69de29
index 8a988cc..8fe78ca 100644 (file)
 
 #include "misc/benchmark.h"
 
-#define CHECK_TF(e) {                             \
-  if(!(e).ok())                                   \
-  {                                               \
-    throw std::runtime_error{"'" #e "' FAILED"};  \
-  }                                               \
-}
+#define CHECK_TF(e)                                \
+  {                                                \
+    if (!(e).ok())                                 \
+    {                                              \
+      throw std::runtime_error{"'" #e "' FAILED"}; \
+    }                                              \
+  }
 
 int main(int argc, char **argv)
 {
@@ -46,7 +47,7 @@ int main(int argc, char **argv)
     output_nodes.emplace_back(argv[argn]);
   }
 
-  tensorflow::Sessionsess;
+  tensorflow::Session *sess;
 
   CHECK_TF(tensorflow::NewSession(tensorflow::SessionOptions(), &sess));
 
@@ -62,7 +63,7 @@ int main(int argc, char **argv)
   {
     std::chrono::milliseconds elapsed(0);
 
-    nnfw::misc::benchmark::measure(elapsed) << [&] (void) {
+    nnfw::misc::benchmark::measure(elapsed) << [&](void) {
       CHECK_TF(sess->Run({{"input_node", input}}, output_nodes, {}, &outputs));
     };
 
index 1036607..076c93f 100644 (file)
@@ -47,13 +47,12 @@ bool InputAccessor::access_tensor(arm_compute::ITensor &tensor)
   arm_compute::Window window;
   window.use_tensor_dimensions(tensor.info()->tensor_shape());
 
-  execute_window_loop(window, [&](const arm_compute::Coordinates& id)
-  {
+  execute_window_loop(window, [&](const arm_compute::Coordinates &id) {
     *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = _test_input;
     _test_input += _inc ? 1.0 : 0.0;
 
-    __android_log_print(ANDROID_LOG_DEBUG, "LOG_TAG", "Input %d, %d = %lf\r\n",
-        id.y(), id.x(), *reinterpret_cast<float *>(tensor.ptr_to_element(id)));
+    __android_log_print(ANDROID_LOG_DEBUG, "LOG_TAG", "Input %d, %d = %lf\r\n", id.y(), id.x(),
+                        *reinterpret_cast<float *>(tensor.ptr_to_element(id)));
   });
   return true;
 }
@@ -64,10 +63,9 @@ bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor)
   arm_compute::Window window;
   window.use_tensor_dimensions(tensor.info()->tensor_shape());
 
-  execute_window_loop(window, [&](const arm_compute::Coordinates& id)
-  {
-    __android_log_print(ANDROID_LOG_DEBUG, "Output", "Input %d, %d = %lf\r\n",
-      id.y(), id.x(), *reinterpret_cast<float *>(tensor.ptr_to_element(id)));
+  execute_window_loop(window, [&](const arm_compute::Coordinates &id) {
+    __android_log_print(ANDROID_LOG_DEBUG, "Output", "Input %d, %d = %lf\r\n", id.y(), id.x(),
+                        *reinterpret_cast<float *>(tensor.ptr_to_element(id)));
   });
   return false; // end the network
 }
@@ -78,8 +76,7 @@ bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor)
   arm_compute::Window window;
   window.use_tensor_dimensions(tensor.info()->tensor_shape());
 
-  execute_window_loop(window, [&](const arm_compute::Coordinates& id)
-  {
+  execute_window_loop(window, [&](const arm_compute::Coordinates &id) {
     *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = _test_weight;
     _test_weight += _inc ? 1.0 : 0.0;
   });
@@ -92,8 +89,7 @@ bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor)
   arm_compute::Window window;
   window.use_tensor_dimensions(tensor.info()->tensor_shape());
 
-  execute_window_loop(window, [&](const arm_compute::Coordinates& id)
-  {
+  execute_window_loop(window, [&](const arm_compute::Coordinates &id) {
     *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = 0.0;
   });
   return true;
index 4033020..bc43766 100644 (file)
 class InputAccessor : public arm_compute::graph::ITensorAccessor
 {
 public:
-    InputAccessor(bool inc) : _inc(inc) { _test_input = 1.0; }
-    InputAccessor(InputAccessor&&) = default;
+  InputAccessor(bool inc) : _inc(inc) { _test_input = 1.0; }
+  InputAccessor(InputAccessor &&) = default;
 
-    // Inherited methods overriden:
-    bool access_tensor(arm_compute::ITensor& tensor) override;
+  // Inherited methods overriden:
+  bool access_tensor(arm_compute::ITensor &tensor) override;
 
 private:
-    bool _inc;
-    float _test_input;
+  bool _inc;
+  float _test_input;
 };
 
 class OutputAccessor : public arm_compute::graph::ITensorAccessor
 {
 public:
-    OutputAccessor() = default;
-    OutputAccessor(OutputAccessor&&) = default;
+  OutputAccessor() = default;
+  OutputAccessor(OutputAccessor &&) = default;
 
-    // Inherited methods overriden:
-    bool access_tensor(arm_compute::ITensor& tensor) override;
+  // Inherited methods overriden:
+  bool access_tensor(arm_compute::ITensor &tensor) override;
 };
 
 class WeightAccessor : public arm_compute::graph::ITensorAccessor
 {
 public:
-    WeightAccessor(bool inc) : _inc(inc) { _test_weight = 1.0; }
-    WeightAccessor(WeightAccessor&&) = default;
+  WeightAccessor(bool inc) : _inc(inc) { _test_weight = 1.0; }
+  WeightAccessor(WeightAccessor &&) = default;
 
-    // Inherited methods overriden:
-    bool access_tensor(arm_compute::ITensor& tensor) override;
+  // Inherited methods overriden:
+  bool access_tensor(arm_compute::ITensor &tensor) override;
 
 private:
-    bool _inc;
-    float _test_weight;
+  bool _inc;
+  float _test_weight;
 };
 
 class BiasAccessor : public arm_compute::graph::ITensorAccessor
 {
 public:
-    BiasAccessor() = default;
-    BiasAccessor(BiasAccessor&&) = default;
+  BiasAccessor() = default;
+  BiasAccessor(BiasAccessor &&) = default;
 
-    // Inherited methods overriden:
-    bool access_tensor(arm_compute::ITensor& tensor) override;
+  // Inherited methods overriden:
+  bool access_tensor(arm_compute::ITensor &tensor) override;
 };
 
 #endif // __IO_ACCESSOR_H__
index 515f287..4e5f10d 100644 (file)
@@ -9,31 +9,29 @@
 extern "C" JNIEXPORT jstring JNICALL
 Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject)
 {
-    using arm_compute::DataType;
-    using arm_compute::graph::Tensor;
-    using arm_compute::graph::TargetHint;
-    using arm_compute::graph::Graph;
-    using arm_compute::TensorInfo;
-    using arm_compute::TensorShape;
-
-    arm_compute::graph::Graph graph;
-    TargetHint target_hint = TargetHint::OPENCL;
-    bool autoinc = true;
-
-    graph << target_hint
-        << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
-                  std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
+  using arm_compute::DataType;
+  using arm_compute::graph::Tensor;
+  using arm_compute::graph::TargetHint;
+  using arm_compute::graph::Graph;
+  using arm_compute::TensorInfo;
+  using arm_compute::TensorShape;
+
+  arm_compute::graph::Graph graph;
+  TargetHint target_hint = TargetHint::OPENCL;
+  bool autoinc = true;
+
+  graph << target_hint << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
+                                 std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
         << arm_compute::graph::ConvolutionLayer(
-              3U, 3U, 1U,
-              std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
-              std::unique_ptr<BiasAccessor>(new BiasAccessor()),
-              arm_compute::PadStrideInfo(1, 1, 0, 0))
+               3U, 3U, 1U, std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
+               std::unique_ptr<BiasAccessor>(new BiasAccessor()),
+               arm_compute::PadStrideInfo(1, 1, 0, 0))
         << Tensor(std::unique_ptr<OutputAccessor>(new OutputAccessor()));
-        ;
+  ;
 
-    graph.run();
+  graph.run();
 
-    std::string hello = "SoftMax Run OK";
+  std::string hello = "SoftMax Run OK";
 
-    return env->NewStringUTF(hello.c_str());
+  return env->NewStringUTF(hello.c_str());
 }
index 93994ae..78a20bb 100644 (file)
@@ -70,73 +70,71 @@ void printDeviceInfo(int n, cl::Device &device, cl::Device &default_device)
   std::cout << "\n";
 }
 
-
 class OpenCLGpu
 {
-  public:
-    cl::Platform platform_;
-    cl::Context context_;
-    cl::vector<cl::Device> devices_;
-    std::vector<cl::CommandQueue*> q_;
-    cl::Program program_;
-
-    OpenCLGpu()
-    {
-      cl_int cl_error;
-
-      platform_ = cl::Platform::getDefault();
+public:
+  cl::Platform platform_;
+  cl::Context context_;
+  cl::vector<cl::Device> devices_;
+  std::vector<cl::CommandQueue *> q_;
+  cl::Program program_;
+
+  OpenCLGpu()
+  {
+    cl_int cl_error;
 
-      try
-      {
-        cl_context_properties properties[3] = {
-            CL_CONTEXT_PLATFORM, (cl_context_properties)platform_(), 0
-          };
+    platform_ = cl::Platform::getDefault();
 
-        context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &cl_error);
-      }
-      catch (cl::Error &err) // thrown when there is no Context for this platform
-      {
-        std::cout << "\t\t No Context Found\n";
-        return;
-      }
-
-      devices_ = context_.getInfo<CL_CONTEXT_DEVICES>();
+    try
+    {
+      cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM,
+                                             (cl_context_properties)platform_(), 0};
 
-      for (int dev_id = 0; dev_id < devices_.size(); dev_id++)
-      {
-        cl::CommandQueue* que = new cl::CommandQueue(context_, devices_[dev_id]);
-        q_.emplace_back(que);
-      }
+      context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &cl_error);
     }
-
-    ~OpenCLGpu()
+    catch (cl::Error &err) // thrown when there is no Context for this platform
     {
-      for (auto each_q : q_)
-        delete each_q;
+      std::cout << "\t\t No Context Found\n";
+      return;
     }
 
-    void buildProgram(std::string& kernel_source_code)
+    devices_ = context_.getInfo<CL_CONTEXT_DEVICES>();
+
+    for (int dev_id = 0; dev_id < devices_.size(); dev_id++)
     {
-      std::vector<std::string> programStrings {kernel_source_code};
+      cl::CommandQueue *que = new cl::CommandQueue(context_, devices_[dev_id]);
+      q_.emplace_back(que);
+    }
+  }
 
-      program_ = cl::Program(context_, programStrings);
+  ~OpenCLGpu()
+  {
+    for (auto each_q : q_)
+      delete each_q;
+  }
 
-      try
-      {
-        program_.build("-cl-std=CL1.2");
-      }
-      catch (cl::Error &err)
+  void buildProgram(std::string &kernel_source_code)
+  {
+    std::vector<std::string> programStrings{kernel_source_code};
+
+    program_ = cl::Program(context_, programStrings);
+
+    try
+    {
+      program_.build("-cl-std=CL1.2");
+    }
+    catch (cl::Error &err)
+    {
+      cl_int buildErr = CL_SUCCESS;
+      auto buildInfo = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
+      for (auto &pair : buildInfo)
       {
-        cl_int buildErr = CL_SUCCESS;
-        auto buildInfo = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
-        for (auto &pair : buildInfo) {
-            std::cerr << pair.second << std::endl << std::endl;
-        }
+        std::cerr << pair.second << std::endl << std::endl;
       }
     }
+  }
 };
 
-
 void checkContextMem()
 {
   cl_int cl_error;
@@ -174,51 +172,46 @@ void checkContextMem()
   for (int i = 0; i < length; i++)
     input[i] = i;
 
-  cl::Buffer input_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), input.data(), &cl_error);
-  cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), output.data(), &cl_error);
+  cl::Buffer input_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T),
+                       input.data(), &cl_error);
+  cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T),
+                        output.data(), &cl_error);
 
   // compile test cl code
 
-  std::string kernel_source {
-    "typedef int T;                                                 \n" \
-    "kernel void memory_test(                                       \n" \
-    "   const int dev_id,                                           \n" \
-    "   global T* input,                                            \n" \
-    "   global T* output,                                           \n" \
-    "   const int start_idx,                                        \n" \
-    "   const int count)                                            \n" \
-    "{                                                              \n" \
-    "   int input_idx = get_global_id(0);                           \n" \
-    "   if(input_idx < count)                                       \n" \
-    "   {                                                           \n" \
-    "       int output_idx = start_idx + input_idx;                 \n" \
-    "       output[output_idx] = input[input_idx] + dev_id;         \n" \
-    "   }                                                           \n" \
-    "}                                                              \n"
-    };
+  std::string kernel_source{"typedef int T;                                                 \n"
+                            "kernel void memory_test(                                       \n"
+                            "   const int dev_id,                                           \n"
+                            "   global T* input,                                            \n"
+                            "   global T* output,                                           \n"
+                            "   const int start_idx,                                        \n"
+                            "   const int count)                                            \n"
+                            "{                                                              \n"
+                            "   int input_idx = get_global_id(0);                           \n"
+                            "   if(input_idx < count)                                       \n"
+                            "   {                                                           \n"
+                            "       int output_idx = start_idx + input_idx;                 \n"
+                            "       output[output_idx] = input[input_idx] + dev_id;         \n"
+                            "   }                                                           \n"
+                            "}                                                              \n"};
 
   gpu.buildProgram(kernel_source);
 
   try
   {
-    auto kernel_functor = cl::KernelFunctor<cl_int, cl::Buffer, cl::Buffer, cl_int, cl_int>
-                              (gpu.program_, "memory_test"); // name should be same as cl function name
+    auto kernel_functor = cl::KernelFunctor<cl_int, cl::Buffer, cl::Buffer, cl_int, cl_int>(
+        gpu.program_, "memory_test"); // name should be same as cl function name
 
     // create a queue per device and queue a kernel job
 
     for (int dev_id = 0; dev_id < devices.size(); dev_id++)
     {
-      kernel_functor(
-          cl::EnqueueArgs(
-              *(gpu.q_[dev_id]),
-              cl::NDRange(items_per_device)),
-          (cl_int)dev_id, // dev id
-          input_buf,
-          output_buf,
-          (cl_int)(items_per_device * dev_id), // start index
-          (cl_int)(items_per_device), // count
-          cl_error
-          );
+      kernel_functor(cl::EnqueueArgs(*(gpu.q_[dev_id]), cl::NDRange(items_per_device)),
+                     (cl_int)dev_id, // dev id
+                     input_buf, output_buf,
+                     (cl_int)(items_per_device * dev_id), // start index
+                     (cl_int)(items_per_device),          // count
+                     cl_error);
     }
 
     // sync
@@ -240,8 +233,8 @@ void checkContextMem()
         if (output[output_idx] != input[i] + dev_id)
         {
           std::cout << "Output[" << output_idx << "] : "
-                    << "expected = "  << input[i] + dev_id
-                    << "; actual = " << output[output_idx] << "\n";
+                    << "expected = " << input[i] + dev_id << "; actual = " << output[output_idx]
+                    << "\n";
           use_same_memory = false;
           break;
         }
@@ -261,10 +254,11 @@ void checkContextMem()
 
 void printHelp()
 {
-    std::cout << "opencl information: \n\n";
-    std::cout << "\t -h : help\n";
-    std::cout << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
-    std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n";
+  std::cout << "opencl information: \n\n";
+  std::cout << "\t -h : help\n";
+  std::cout
+      << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
+  std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n";
 }
 
 #include <mutex>
@@ -274,24 +268,25 @@ void printHelp()
 
 #define MAX_DEVICE_NUM 8 // just for testing
 
-int  kernel_idx[MAX_DEVICE_NUM];
+int kernel_idx[MAX_DEVICE_NUM];
 unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed.
-unsigned char kernel_completed_flag;   // if comparing kernel_completed with this var, all kernels are completed
-int  device_num;
+unsigned char
+    kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed
+int device_num;
 std::mutex kernel_complete_handler_mutex;
 
 std::condition_variable wakeup_main;
 std::mutex wakeup_main_mutex;
 
-void notifyKernelFinished(cl_event ev, cl_int ev_info, void * device_idx)
+void notifyKernelFinished(cl_event ev, cl_int ev_info, void *device_idx)
 {
-  std::cout << "callback from device[" << *((int*)device_idx) << "] : ==> completed.\n";
+  std::cout << "callback from device[" << *((int *)device_idx) << "] : ==> completed.\n";
 
   std::unique_lock<std::mutex> lock(kernel_complete_handler_mutex);
 
-  kernel_completed |= 0x01 << *((int*)device_idx);
+  kernel_completed |= 0x01 << *((int *)device_idx);
   if (kernel_completed == kernel_completed_flag)
-     wakeup_main.notify_one();
+    wakeup_main.notify_one();
 }
 
 void testSync()
@@ -300,33 +295,32 @@ void testSync()
 
   cl_int cl_error;
   typedef cl_int T;
-  const int items_per_device = 1024*768;
+  const int items_per_device = 1024 * 768;
   const int length = items_per_device * gpu.devices_.size();
 
   std::vector<T> output(length, 0);
 
-  cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), output.data(), &cl_error);
-
-  std::string kernel_source {
-    "kernel void test(global float* output, const int count)  \n" \
-    "{                                                        \n" \
-    "   int idx = get_global_id(0);                           \n" \
-    "   if(idx < count)                                       \n" \
-    "   {                                                     \n" \
-    "       float x = hypot(idx/1.111, idx*1.111);            \n" \
-    "       for (int y = 0; y < 200; y++)                     \n" \
-    "         x = rootn(log(pown(rootn(log(pown(x, 20)), 5), 20)), 5);  \n" \
-    "       output[idx] = x;                                  \n" \
-    "   }                                                     \n" \
-    "}                                                        \n"
-    };
+  cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T),
+                        output.data(), &cl_error);
+
+  std::string kernel_source{"kernel void test(global float* output, const int count)  \n"
+                            "{                                                        \n"
+                            "   int idx = get_global_id(0);                           \n"
+                            "   if(idx < count)                                       \n"
+                            "   {                                                     \n"
+                            "       float x = hypot(idx/1.111, idx*1.111);            \n"
+                            "       for (int y = 0; y < 200; y++)                     \n"
+                            "         x = rootn(log(pown(rootn(log(pown(x, 20)), 5), 20)), 5);  \n"
+                            "       output[idx] = x;                                  \n"
+                            "   }                                                     \n"
+                            "}                                                        \n"};
 
   gpu.buildProgram(kernel_source);
 
   try
   {
-    auto kernel_functor = cl::KernelFunctor<cl::Buffer, cl_int>
-                              (gpu.program_, "test"); // name should be same as cl function name
+    auto kernel_functor = cl::KernelFunctor<cl::Buffer, cl_int>(
+        gpu.program_, "test"); // name should be same as cl function name
 
     // variable init
     cl::Event ev[MAX_DEVICE_NUM];
@@ -345,20 +339,17 @@ void testSync()
     // queueing with callback function
     for (int dev_id = 0; dev_id < gpu.devices_.size(); dev_id++)
     {
-      ev[dev_id] = kernel_functor(
-                          cl::EnqueueArgs(
-                              *(gpu.q_[dev_id]),
-                              cl::NDRange(items_per_device)),
-                          output_buf,
-                          (cl_int)(items_per_device), // count
-                          cl_error
-                          );
-      ev[dev_id].setCallback(CL_COMPLETE, notifyKernelFinished, (void*)(kernel_idx+dev_id));
+      ev[dev_id] = kernel_functor(cl::EnqueueArgs(*(gpu.q_[dev_id]), cl::NDRange(items_per_device)),
+                                  output_buf,
+                                  (cl_int)(items_per_device), // count
+                                  cl_error);
+      ev[dev_id].setCallback(CL_COMPLETE, notifyKernelFinished, (void *)(kernel_idx + dev_id));
 
       // how to check kernel execution status
       //
       // auto status  = ev[dev_id].getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>();
-      // std::cout << "Event status = " << (status == CL_QUEUED ? "CL_QUEUED" : status == CL_SUBMITTED ? "CL_SUBMITTED" : status == CL_COMPLETE ? "CL_COMPLETE" : "unknown")
+      // std::cout << "Event status = " << (status == CL_QUEUED ? "CL_QUEUED" : status ==
+      // CL_SUBMITTED ? "CL_SUBMITTED" : status == CL_COMPLETE ? "CL_COMPLETE" : "unknown")
       //           << std::endl;
       // std::cout << "Event status code = " << status << std::endl;
     }
@@ -366,7 +357,7 @@ void testSync()
     // long wait until kernels are over
     {
       std::unique_lock<std::mutex> lk(wakeup_main_mutex);
-      wakeup_main.wait(lk, []{ return (kernel_completed == kernel_completed_flag); });
+      wakeup_main.wait(lk, [] { return (kernel_completed == kernel_completed_flag); });
 
       std::cout << "all devices were completed.\n";
     }
@@ -375,7 +366,6 @@ void testSync()
   {
     std::cerr << "error: code: " << err.err() << ", what: " << err.what() << std::endl;
   }
-
 }
 
 int main(const int argc, char **argv)
index 4ee9e5a..3a27789 100644 (file)
 #include <string>
 #include <vector>
 
-#define TF_ENSURE(e) {                            \
-  if(!(e).ok())                                   \
-  {                                               \
-    throw std::runtime_error{"'" #e "' FAILED"};  \
-  }                                               \
-}
+#define TF_ENSURE(e)                               \
+  {                                                \
+    if (!(e).ok())                                 \
+    {                                              \
+      throw std::runtime_error{"'" #e "' FAILED"}; \
+    }                                              \
+  }
 
 using namespace tflite;
 using namespace tflite::ops::builtin;
@@ -130,7 +131,7 @@ int main(int argc, char **argv)
   //
   const std::string full_model_path{argv[2]};
 
-  tensorflow::Sessionfull_sess;
+  tensorflow::Session *full_sess;
   tensorflow::GraphDef full_model;
 
   TF_ENSURE(tensorflow::NewSession(tensorflow::SessionOptions(), &full_sess));
@@ -191,7 +192,7 @@ int main(int argc, char **argv)
   //
   // Compare Output
   //
-  auto equals = [] (float lhs, float rhs) {
+  auto equals = [](float lhs, float rhs) {
     // TODO Allow users to set tolerance
     if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
     {
@@ -213,7 +214,8 @@ int main(int argc, char **argv)
 
     const auto element_count = count_elements(tensor);
 
-    std::cout << "Compare output #" << n << "(" << tensor->name << ", " << element_count << " elements)" << std::endl;
+    std::cout << "Compare output #" << n << "(" << tensor->name << ", " << element_count
+              << " elements)" << std::endl;
     for (uint32_t index = 0; index < element_count; ++index)
     {
       const auto full_value = output_nodes.at(n).flat<float>().data()[index];
@@ -221,7 +223,8 @@ int main(int argc, char **argv)
 
       if (!equals(full_value, lite_value))
       {
-        std::cerr << full_value << " is expected, but " << lite_value << " is obtaeind (at " << n << ":" << index << ")" << std::endl;
+        std::cerr << full_value << " is expected, but " << lite_value << " is obtaeind (at " << n
+                  << ":" << index << ")" << std::endl;
         matched = false;
       }
     }
index a5a4e8c..ad69f1c 100644 (file)
@@ -50,14 +50,15 @@ using namespace arm_compute;
 namespace
 {
 
-enum Layout { NCHW, NHWC };
+enum Layout
+{
+  NCHW,
+  NHWC
+};
 
 struct Initializer
 {
-  Initializer()
-  {
-    CLScheduler::get().default_init();
-  }
+  Initializer() { CLScheduler::get().default_init(); }
 };
 
 Initializer initializer;
@@ -68,9 +69,9 @@ TensorInfo make_info(uint32_t N)
   return TensorInfo{shape, 1, DataType::F32};
 }
 
-template<enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
+template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
 
-template<> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
+template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
 {
   TensorShape shape{W, H, C, N};
   TensorInfo info{shape, 1, DataType::F32};
@@ -78,7 +79,7 @@ template<> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32
   return info;
 }
 
-template<> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
+template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
 {
   TensorShape shape{C, W, H, N};
   TensorInfo info{shape, 1, DataType::F32};
@@ -180,18 +181,35 @@ struct Configuration
     left_padding = pad_W / 2;
     right_padding = pad_W / 2;
 
-    if (is_odd(pad_H)) top_padding += 1;
-    if (is_odd(pad_W)) left_padding += 1;
+    if (is_odd(pad_H))
+      top_padding += 1;
+    if (is_odd(pad_W))
+      left_padding += 1;
   }
 
-  template<Layout L> TensorInfo src_info() const { return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W); }
-  template<Layout L> TensorInfo dst_info() const { return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W); }
-  template<Layout L> TensorInfo ker_info() const { return make_info<L>(ker_N, ker_C, ker_H, ker_W); }
+  template <Layout L> TensorInfo src_info() const
+  {
+    return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
+  }
+  template <Layout L> TensorInfo dst_info() const
+  {
+    return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
+  }
+  template <Layout L> TensorInfo ker_info() const
+  {
+    return make_info<L>(ker_N, ker_C, ker_H, ker_W);
+  }
   TensorInfo bias_info(void) const { return make_info(ker_N); }
 
   PadStrideInfo pad_stride_info(void) const
   {
-    return PadStrideInfo{horizontal_stride, vertical_stride, left_padding, right_padding, top_padding, bottom_padding, DimensionRoundingType::FLOOR};
+    return PadStrideInfo{horizontal_stride,
+                         vertical_stride,
+                         left_padding,
+                         right_padding,
+                         top_padding,
+                         bottom_padding,
+                         DimensionRoundingType::FLOOR};
   }
 };
 
@@ -205,8 +223,7 @@ struct Configuration
 #endif // CL_DIRECT_CONVOLUTION
 
 #if CL_DIRECT_CONVOLUTION
-NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter)
-{
+NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter) {
   CLDirectConvolutionLayer conv;
 
   // Configure
@@ -222,7 +239,8 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter)
   ker_tensor.allocator()->init(p.ker_info<NCHW>());
   bias_tensor.allocator()->init(p.bias_info());
 
-  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));
+  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+                      p.pad_stride_info()));
   conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
 
   src_tensor.allocator()->allocate();
@@ -231,11 +249,13 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter)
   dst_tensor.allocator()->allocate();
 
   // Run!
-  meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); });
+  meter.measure([&](int) {
+    conv.run();
+    CLScheduler::get().sync();
+  });
 })
 
-NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter)
-{
+NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter) {
   CLDirectConvolutionLayer conv;
 
   // Configure
@@ -251,7 +271,8 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter)
   ker_tensor.allocator()->init(p.ker_info<NHWC>());
   bias_tensor.allocator()->init(p.bias_info());
 
-  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));
+  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+                      p.pad_stride_info()));
   conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
 
   src_tensor.allocator()->allocate();
@@ -260,7 +281,10 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter)
   dst_tensor.allocator()->allocate();
 
   // Run!
-  meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); });
+  meter.measure([&](int) {
+    conv.run();
+    CLScheduler::get().sync();
+  });
 })
 #endif // CL_DIRECT_CONVOLUTION
 
@@ -269,8 +293,7 @@ NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter)
 #endif // CL_GEMM_CONVOLUTION
 
 #if CL_GEMM_CONVOLUTION
-NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter)
-{
+NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter) {
   CLGEMMConvolutionLayer conv;
 
   // Configure
@@ -286,7 +309,8 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter)
   ker_tensor.allocator()->init(p.ker_info<NCHW>());
   bias_tensor.allocator()->init(p.bias_info());
 
-  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));
+  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+                      p.pad_stride_info()));
   conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
 
   src_tensor.allocator()->allocate();
@@ -295,11 +319,13 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter)
   dst_tensor.allocator()->allocate();
 
   // Run
-  meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); });
+  meter.measure([&](int) {
+    conv.run();
+    CLScheduler::get().sync();
+  });
 })
 
-NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter)
-{
+NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter) {
   CLGEMMConvolutionLayer conv;
 
   // Configure
@@ -315,7 +341,8 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter)
   ker_tensor.allocator()->init(p.ker_info<NHWC>());
   bias_tensor.allocator()->init(p.bias_info());
 
-  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));
+  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+                      p.pad_stride_info()));
   conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
 
   src_tensor.allocator()->allocate();
@@ -324,7 +351,10 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter)
   dst_tensor.allocator()->allocate();
 
   // Run
-  meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); });
+  meter.measure([&](int) {
+    conv.run();
+    CLScheduler::get().sync();
+  });
 })
 #endif // CL_GEMM_CONVOLUTION
 
@@ -333,8 +363,7 @@ NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter)
 #endif // CL_WINOGRAD_CONVOLUTION
 
 #if CL_WINOGRAD_CONVOLUTION
-NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer meter)
-{
+NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer meter) {
   CLWinogradConvolutionLayer conv;
 
   // Configure
@@ -350,7 +379,8 @@ NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer mete
   ker_tensor.allocator()->init(p.ker_info<NCHW>());
   bias_tensor.allocator()->init(p.bias_info());
 
-  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));
+  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+                      p.pad_stride_info()));
   conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
 
   src_tensor.allocator()->allocate();
@@ -359,11 +389,13 @@ NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer mete
   dst_tensor.allocator()->allocate();
 
   // Run
-  meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); });
+  meter.measure([&](int) {
+    conv.run();
+    CLScheduler::get().sync();
+  });
 })
 
-NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer meter)
-{
+NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer meter) {
   CLWinogradConvolutionLayer conv;
 
   // Configure
@@ -379,7 +411,8 @@ NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer mete
   ker_tensor.allocator()->init(p.ker_info<NHWC>());
   bias_tensor.allocator()->init(p.bias_info());
 
-  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));
+  check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+                      p.pad_stride_info()));
   conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
 
   src_tensor.allocator()->allocate();
@@ -388,6 +421,9 @@ NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer mete
   dst_tensor.allocator()->allocate();
 
   // Run
-  meter.measure([&](int) { conv.run(); CLScheduler::get().sync(); });
+  meter.measure([&](int) {
+    conv.run();
+    CLScheduler::get().sync();
+  });
 })
 #endif // CL_WINOGRAD_CONVOLUTION