#include <arm_compute/graph/Graph.h>
#include <arm_compute/graph/Nodes.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/CL/functions/CLConvolution.h>
+
#include "io_accessor.h"
#include "util/environment.h"
using arm_compute::TensorInfo;
using arm_compute::TensorShape;
+namespace acl_graph {
+
bool convFloat32(const float* inputData, const Shape& inputShape,
const float* filterData, const Shape& filterShape,
const float* biasData, const Shape& biasShape,
int32_t padding_top, int32_t padding_bottom,
int32_t stride_width, int32_t stride_height,
int32_t activation,
- float* outputData, const Shape& outputShape,
- bool useACL) {
- if (!useACL)
- return convFloat32(inputData, inputShape, filterData, filterShape,
- biasData, biasShape, padding_left, padding_right,
- padding_top, padding_bottom, stride_width, stride_height,
- activation, outputData, outputShape);
-
+ float* outputData, const Shape& outputShape)
+{
// Try with simple build-run with ACL Layer
arm_compute::graph::Graph graph;
return true;
}
+} // namespace acl_graph
+
+//-----------------------------------------------------------------------------
+
+namespace acl_runtime {
+
+TensorShape calculate_convolution_layer_output_shape(
+ const arm_compute::TensorShape &input_shape,
+ const arm_compute::TensorShape &weights_shape,
+ const arm_compute::PadStrideInfo &conv_info)
+{
+ unsigned int output_width = 0;
+ unsigned int output_height = 0;
+
+ // Get output width and height
+ std::tie(output_width, output_height) =
+ arm_compute::scaled_dimensions(
+ input_shape.x(), input_shape.y(),
+ weights_shape.x(), weights_shape.y(),
+ conv_info);
+
+ // Create output shape
+ TensorShape output_shape = input_shape;
+ output_shape.set(0, output_width);
+ output_shape.set(1, output_height);
+ output_shape.set(2, weights_shape[3]);
+
+ return output_shape;
+}
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+ const float* filterData, const Shape& filterShape,
+ const float* biasData, const Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const Shape& outputShape)
+{
+ arm_compute::CLScheduler::get().default_init();
+
+ uint32_t tsi_c = getSizeOfDimension(inputShape, 0);
+ uint32_t tsi_h = getSizeOfDimension(inputShape, 1);
+ uint32_t tsi_w = getSizeOfDimension(inputShape, 2);
+ uint32_t tsi_n = getSizeOfDimension(inputShape, 3);
+
+ uint32_t tsk_h = getSizeOfDimension(filterShape, 1);
+ uint32_t tsk_w = getSizeOfDimension(filterShape, 2);
+ uint32_t tsk_n = getSizeOfDimension(filterShape, 3);
+
+ TensorShape input_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n);
+ TensorShape filter_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n);
+ arm_compute::PadStrideInfo conv_info =
+ arm_compute::PadStrideInfo(stride_width, stride_height, padding_top, padding_bottom);
+
+ TensorShape output_shape = calculate_convolution_layer_output_shape(
+ input_shape, filter_shape, conv_info);
+
+ uint32_t tso_c = output_shape[0];
+ uint32_t tso_w = output_shape[1];
+ uint32_t tso_h = output_shape[2];
+ uint32_t tso_n = output_shape[3];
+
+ arm_compute::CLTensor input, output, bias, filter;
+
+ input.allocator()->init(TensorInfo(tsi_w, tsi_h, arm_compute::Format::F32));
+ output.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32));
+ bias.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32));
+ filter.allocator()->init(TensorInfo(tsk_w, tsk_h, arm_compute::Format::F32));
+
+ input.allocator()->allocate();
+ output.allocator()->allocate();
+ bias.allocator()->allocate();
+ filter.allocator()->allocate();
+
+ input.map();
+ InputAccessor ia(inputData, inputShape);
+ ia.access_tensor(input);
+ input.unmap();
+
+ bias.map();
+ BiasAccessor ba(biasData, biasShape);
+ ba.access_tensor(bias);
+ bias.unmap();
+
+ filter.map();
+ WeightAccessor fa(filterData, filterShape);
+ fa.access_tensor(filter);
+ filter.unmap();
+
+ arm_compute::CLConvolutionLayer conv_f;
+ conv_f.configure(&input, &filter, &bias, &output, conv_info);
+
+ arm_compute::CLScheduler::get().sync();
+
+ conv_f.run();
+
+ output.map();
+ OutputAccessor oa(outputData, outputShape);
+ oa.access_tensor(output);
+ output.unmap();
+
+ return true;
+}
+
+} // namespace acl_runtime
+
+//-----------------------------------------------------------------------------
+
+enum COMPUTE_TYPE {
+ COMPUTE_DEFAULT = 0,
+ COMPUTE_ACLGRAPH,
+ COMPUTE_ACLRT
+};
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+ const float* filterData, const Shape& filterShape,
+ const float* biasData, const Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const Shape& outputShape,
+ COMPUTE_TYPE compType) {
+
+ switch (compType)
+ {
+ case COMPUTE_DEFAULT :
+ return convFloat32(inputData, inputShape, filterData, filterShape,
+ biasData, biasShape, padding_left, padding_right,
+ padding_top, padding_bottom, stride_width, stride_height,
+ activation, outputData, outputShape);
+
+ case COMPUTE_ACLGRAPH :
+ return acl_graph::convFloat32(inputData, inputShape, filterData, filterShape,
+ biasData, biasShape, padding_left, padding_right,
+ padding_top, padding_bottom, stride_width, stride_height,
+ activation, outputData, outputShape);
+
+ case COMPUTE_ACLRT :
+ return acl_runtime::convFloat32(inputData, inputShape, filterData, filterShape,
+ biasData, biasShape, padding_left, padding_right,
+ padding_top, padding_bottom, stride_width, stride_height,
+ activation, outputData, outputShape);
+ }
+ return false;
+}
+
//-----------------------------------------------------------------------------
void dumpData(const char* name, const float* data, const Shape& shape)
return true;
}
-int test_3x3_1x1_one(void)
+int test_3x3_1x1_one(COMPUTE_TYPE comptype)
{
float inputData[9];
const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
stride_width, stride_height,
activation,
expectData, outputShape,
- false);
+ COMPUTE_DEFAULT);
bret = convFloat32(inputData, inputShape,
filterData, filterShape,
stride_width, stride_height,
activation,
outputData, outputShape,
- true);
+ comptype);
dumpData("Input ", inputData, inputShape);
dumpData("Filter ", filterData, filterShape);
stride_width, stride_height,
activation,
expectData, outputShape,
- false);
+ COMPUTE_DEFAULT);
bret = convFloat32(inputData, inputShape,
filterData, filterShape,
stride_width, stride_height,
activation,
outputData, outputShape,
- true);
+ COMPUTE_ACLGRAPH);
dumpData("Input ", inputData, inputShape);
dumpData("Filter ", filterData, filterShape);
stride_width, stride_height,
activation,
expectData, outputShape,
- false);
+ COMPUTE_DEFAULT);
bret = convFloat32(inputData, inputShape,
filterData, filterShape,
stride_width, stride_height,
activation,
outputData, outputShape,
- true);
+ COMPUTE_ACLGRAPH);
dumpData("Input ", inputData, inputShape);
dumpData("Filter ", filterData, filterShape);
int result;
// input 3x3, output 1x1, all data 1.0
- result = test_3x3_1x1_one();
+ result = test_3x3_1x1_one(COMPUTE_ACLGRAPH);
+ if (result) return result;
+ result = test_3x3_1x1_one(COMPUTE_ACLRT);
if (result) return result;
// input 3x3, output 3x3, all data 1.0