--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_RELU6H__
+#define __NNFW_KERNEL_ACL_RELU6H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool relu6Float32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+bool relu6LUFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+namespace neon {
+
+ bool relu6Float32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+ bool relu6LUFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+} // namespace neon
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_RELU6H__
"src/cl/Concatenation.cpp"
"src/cl/Add.cpp"
"src/cl/ReLU.cpp"
+ "src/cl/ReLU6.cpp"
"src/neon/Mul.cpp"
"src/neon/Conv2D.cpp"
"src/neon/DepthwiseConv2D.cpp"
"src/neon/Concatenation.cpp"
"src/neon/Add.cpp"
"src/neon/ReLU.cpp"
+ "src/neon/ReLU6.cpp"
)
add_library(${LIB_KERNELACL} SHARED ${KERNELACL_SRCS})
"src/cl/Concatenation.test.cpp"
"src/cl/Add.test.cpp"
"src/cl/ReLU.test.cpp"
+ "src/cl/ReLU6.test.cpp"
"src/neon/Mul.test.cpp"
"src/neon/Conv2D.test.cpp"
"src/neon/DepthwiseConv2D.test.cpp"
"src/neon/Concatenation.test.cpp"
"src/neon/Add.test.cpp"
"src/neon/ReLU.test.cpp"
+ "src/neon/ReLU6.test.cpp"
)
add_executable(${LIB_KERNELACL_TEST} ${KERNELACL_TEST_SRCS})
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../UniqueTensor.h"
+#include "../util.h"
+
+namespace nnfw
+{
+namespace kernel
+{
+namespace acl
+{
+
+bool relu6Float32(const float *inputData, const nnfw::rt::Shape &inputShape, float *outputData,
+ const nnfw::rt::Shape &outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+
+ auto relu_f = std::make_shared<arm_compute::CLActivationLayer>();
+ relu_f->configure(input.ptr(), output.ptr(), act_info);
+
+ input.allocate();
+ output.allocate();
+
+ if (inputShape.dimensions.size() == 4)
+ {
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ relu_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else if (inputShape.dimensions.size() == 2)
+ {
+ TensorAccess<MatrixInputAccessor>(input.ref(), inputData, inputShape);
+
+ relu_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else
+ {
+ assert("undefined dimension of input" && 0);
+ return false;
+ }
+
+ return true;
+}
+
+bool relu6LUFloat32(const float *inputData, const nnfw::rt::Shape &inputShape, float *outputData,
+ const nnfw::rt::Shape &outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+
+ auto relu_f = std::make_shared<arm_compute::CLActivationLayer>();
+ relu_f->configure(input.ptr(), output.ptr(), act_info);
+
+ input.allocate();
+ output.allocate();
+
+ if (inputShape.dimensions.size() == 4)
+ {
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ relu_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else if (inputShape.dimensions.size() == 2)
+ {
+ TensorAccess<MatrixInputAccessor>(input.ref(), inputData, inputShape);
+
+ relu_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else
+ {
+ assert("undefined dimension of input" && 0);
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/ReLU6.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, relu6Float32_2d)
+{
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t output_h = input_h;
+ uint32_t output_w = input_w;
+
+ util::TensorWrapper input({input_h, input_w});
+ util::TensorWrapper output({output_h, output_w});
+
+ float nv = -10.f;
+ float p1v = 3.f;
+ float p2v = 10.f;
+
+ input.initValue([&](uint32_t h, uint32_t w) {
+ if (h < w)
+ return nv;
+ else if (h == w)
+ return p1v;
+ else
+ return p2v;
+ });
+
+ output.initValue([](uint32_t h, uint32_t w) { return 0.f; });
+
+ bool bret = relu6Float32(input.ptr<float>(), input.shape(), output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({output_h, output_w});
+ expected.initValue([&](uint32_t h, uint32_t w) {
+ if (h < w)
+ return 0.f;
+ else if (h == w)
+ return p1v;
+ else
+ return 6.f;
+ });
+
+ bret = util::compareData(output.ptr<float>(), expected.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, relu6LUFloat32_2d)
+{
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t output_h = input_h;
+ uint32_t output_w = input_w;
+
+ util::TensorWrapper input({input_h, input_w});
+ util::TensorWrapper output({output_h, output_w});
+
+ float nv = -10.f;
+ float p1v = 3.f;
+ float p2v = 10.f;
+
+ input.initValue([&](uint32_t h, uint32_t w) {
+ if (h < w)
+ return nv;
+ else if (h == w)
+ return p1v;
+ else
+ return p2v;
+ });
+
+ output.initValue([](uint32_t h, uint32_t w) { return 0.f; });
+
+ bool bret =
+ relu6LUFloat32(input.ptr<float>(), input.shape(), output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({output_h, output_w});
+ expected.initValue([&](uint32_t h, uint32_t w) {
+ if (h < w)
+ return 0.f;
+ else if (h == w)
+ return p1v;
+ else
+ return 6.f;
+ });
+
+ bret = util::compareData(output.ptr<float>(), expected.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, relu6Float32_4d)
+{
+ uint32_t input_n = 1;
+ uint32_t input_c = 3;
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t output_n = input_n;
+ uint32_t output_c = input_c;
+ uint32_t output_h = input_h;
+ uint32_t output_w = input_w;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper output({output_n, output_h, output_w, output_c});
+
+ float nv = -10.f;
+ float p1v = 3.f;
+ float p2v = 10.f;
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ if (h < w)
+ return nv;
+ else if (h == w)
+ return p1v;
+ else
+ return p2v;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { return 0.f; });
+
+ bool bret = relu6Float32(input.ptr<float>(), input.shape(), output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({output_n, output_h, output_w, output_c});
+ expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ if (h < w)
+ return 0.f;
+ else if (h == w)
+ return p1v;
+ else
+ return 6.f;
+ });
+
+ bret = util::compareData(output.ptr<float>(), expected.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, relu6LUFloat32_4d)
+{
+ uint32_t input_n = 1;
+ uint32_t input_c = 3;
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t output_n = input_n;
+ uint32_t output_c = input_c;
+ uint32_t output_h = input_h;
+ uint32_t output_w = input_w;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper output({output_n, output_h, output_w, output_c});
+
+ float nv = -10.f;
+ float p1v = 3.f;
+ float p2v = 10.f;
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ if (h < w)
+ return nv;
+ else if (h == w)
+ return p1v;
+ else
+ return p2v;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { return 0.f; });
+
+ bool bret =
+ relu6LUFloat32(input.ptr<float>(), input.shape(), output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({output_n, output_h, output_w, output_c});
+ expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ if (h < w)
+ return 0.f;
+ else if (h == w)
+ return p1v;
+ else
+ return 6.f;
+ });
+
+ bret = util::compareData(output.ptr<float>(), expected.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../UniqueTensor.h"
+#include "../util.h"
+
+namespace nnfw
+{
+namespace kernel
+{
+namespace acl
+{
+namespace neon
+{
+
+bool relu6Float32(const float *inputData, const nnfw::rt::Shape &inputShape, float *outputData,
+ const nnfw::rt::Shape &outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+
+ auto relu_f = std::make_shared<arm_compute::CLActivationLayer>();
+ relu_f->configure(input.ptr(), output.ptr(), act_info);
+
+ input.allocate();
+ output.allocate();
+
+ if (inputShape.dimensions.size() == 4)
+ {
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ relu_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else if (inputShape.dimensions.size() == 2)
+ {
+ TensorAccess<MatrixInputAccessor>(input.ref(), inputData, inputShape);
+
+ relu_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else
+ {
+ assert("undefined dimension of input" && 0);
+ return false;
+ }
+
+ return true;
+}
+
+bool relu6LUFloat32(const float *inputData, const nnfw::rt::Shape &inputShape, float *outputData,
+ const nnfw::rt::Shape &outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+
+ auto relu_f = std::make_shared<arm_compute::CLActivationLayer>();
+ relu_f->configure(input.ptr(), output.ptr(), act_info);
+
+ input.allocate();
+ output.allocate();
+
+ if (inputShape.dimensions.size() == 4)
+ {
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ relu_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else if (inputShape.dimensions.size() == 2)
+ {
+ TensorAccess<MatrixInputAccessor>(input.ref(), inputData, inputShape);
+
+ relu_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else
+ {
+ assert("undefined dimension of input" && 0);
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/ReLU6.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, neon_relu6Float32_2d)
+{
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t output_h = input_h;
+ uint32_t output_w = input_w;
+
+ util::TensorWrapper input({input_h, input_w});
+ util::TensorWrapper output({output_h, output_w});
+
+ float nv = -10.f;
+ float p1v = 3.f;
+ float p2v = 10.f;
+
+ input.initValue([&](uint32_t h, uint32_t w) {
+ if (h < w)
+ return nv;
+ else if (h == w)
+ return p1v;
+ else
+ return p2v;
+ });
+
+ output.initValue([](uint32_t h, uint32_t w) { return 0.f; });
+
+ bool bret =
+ neon::relu6Float32(input.ptr<float>(), input.shape(), output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({output_h, output_w});
+ expected.initValue([&](uint32_t h, uint32_t w) {
+ if (h < w)
+ return 0.f;
+ else if (h == w)
+ return p1v;
+ else
+ return 6.f;
+ });
+
+ bret = util::compareData(output.ptr<float>(), expected.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_relu6LUFloat32_2d)
+{
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t output_h = input_h;
+ uint32_t output_w = input_w;
+
+ util::TensorWrapper input({input_h, input_w});
+ util::TensorWrapper output({output_h, output_w});
+
+ float nv = -10.f;
+ float p1v = 3.f;
+ float p2v = 10.f;
+
+ input.initValue([&](uint32_t h, uint32_t w) {
+ if (h < w)
+ return nv;
+ else if (h == w)
+ return p1v;
+ else
+ return p2v;
+ });
+
+ output.initValue([](uint32_t h, uint32_t w) { return 0.f; });
+
+ bool bret =
+ neon::relu6LUFloat32(input.ptr<float>(), input.shape(), output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({output_h, output_w});
+ expected.initValue([&](uint32_t h, uint32_t w) {
+ if (h < w)
+ return 0.f;
+ else if (h == w)
+ return p1v;
+ else
+ return 6.f;
+ });
+
+ bret = util::compareData(output.ptr<float>(), expected.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_relu6Float32_4d)
+{
+ uint32_t input_n = 1;
+ uint32_t input_c = 3;
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t output_n = input_n;
+ uint32_t output_c = input_c;
+ uint32_t output_h = input_h;
+ uint32_t output_w = input_w;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper output({output_n, output_h, output_w, output_c});
+
+ float nv = -10.f;
+ float p1v = 3.f;
+ float p2v = 10.f;
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ if (h < w)
+ return nv;
+ else if (h == w)
+ return p1v;
+ else
+ return p2v;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { return 0.f; });
+
+ bool bret =
+ neon::relu6Float32(input.ptr<float>(), input.shape(), output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({output_n, output_h, output_w, output_c});
+ expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ if (h < w)
+ return 0.f;
+ else if (h == w)
+ return p1v;
+ else
+ return 6.f;
+ });
+
+ bret = util::compareData(output.ptr<float>(), expected.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_relu6LUFloat32_4d)
+{
+ uint32_t input_n = 1;
+ uint32_t input_c = 3;
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t output_n = input_n;
+ uint32_t output_c = input_c;
+ uint32_t output_h = input_h;
+ uint32_t output_w = input_w;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper output({output_n, output_h, output_w, output_c});
+
+ float nv = -10.f;
+ float p1v = 3.f;
+ float p2v = 10.f;
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ if (h < w)
+ return nv;
+ else if (h == w)
+ return p1v;
+ else
+ return p2v;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { return 0.f; });
+
+ bool bret =
+ neon::relu6LUFloat32(input.ptr<float>(), input.shape(), output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({output_n, output_h, output_w, output_c});
+ expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ if (h < w)
+ return 0.f;
+ else if (h == w)
+ return p1v;
+ else
+ return 6.f;
+ });
+
+ bret = util::compareData(output.ptr<float>(), expected.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+}