variables = [
BoolVariable("benchmark_examples", "Build benchmark examples programs", True),
BoolVariable("validate_examples", "Build validate examples programs", True),
+ BoolVariable("reference_openmp", "Build reference validation with openmp", True),
#FIXME Switch the following two options to False before releasing
BoolVariable("validation_tests", "Build validation test programs", True),
BoolVariable("benchmark_tests", "Build benchmark test programs", True),
if test_env['linker_script']:
bm_link_flags += ['-Wl,--build-id=none', '-T', env['linker_script']]
+if test_env['reference_openmp'] and env['os'] != 'bare_metal':
+ test_env['CXXFLAGS'].append('-fopenmp')
+ test_env['LINKFLAGS'].append('-fopenmp')
+
if test_env['validation_tests']:
- arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LIBS= [ arm_compute_test_framework, arm_compute_core_a])
+ arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework, arm_compute_core_a])
Depends(arm_compute_validation_framework , arm_compute_test_framework)
Depends(arm_compute_validation_framework , arm_compute_core_a)
{
const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = dequantize_qasymm8(src[i], quantization_info);
const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = dequantize_qasymm8_signed(src[i], quantization_info);
const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = dequantize_qasymm16(src[i], quantization_info);
SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, quantization_info };
const UniformQuantizationInfo &qinfo = quantization_info.uniform();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = quantize_qasymm8(src[i], qinfo);
SimpleTensor<int8_t> dst{ src.shape(), DataType::QASYMM8_SIGNED, 1, quantization_info };
const UniformQuantizationInfo &qinfo = quantization_info.uniform();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = quantize_qasymm8_signed(src[i], qinfo);
SimpleTensor<uint16_t> dst{ src.shape(), DataType::QASYMM16, 1, quantization_info };
const UniformQuantizationInfo &qinfo = quantization_info.uniform();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = quantize_qasymm16(src[i], qinfo);
SimpleTensor<int16_t> dst{ src.shape(), DataType::QSYMM16, 1, quantization_info };
const UniformQuantizationInfo &qinfo = quantization_info.uniform();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = quantize_qsymm16(src[i], qinfo);
const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = dequantize_qsymm16(src[i], quantization_info);
const int N = b.shape()[0]; // Cols
const int K = b.shape()[1];
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(2)
+#endif /* _OPENMP */
for(int y = 0; y < M; ++y)
{
for(int x = 0; x < N; ++x)
const int width = in.shape()[0];
const int height = in.shape()[1];
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(2)
+#endif /* _OPENMP */
for(int y = 0; y < height; ++y)
{
for(int x = 0; x < width; ++x)
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
SimpleTensor<T> result(src1.shape(), dst_data_type);
using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src1.num_elements(); ++i)
{
intermediate_type val = std::abs(static_cast<intermediate_type>(src1[i]) - static_cast<intermediate_type>(src2[i]));
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
library->fill_tensor_uniform(dst, 1, static_cast<T2>(0), static_cast<T2>(std::numeric_limits<T1>::max()));
using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
intermediate_type val = static_cast<intermediate_type>(src[i]) + static_cast<intermediate_type>(dst[i]);
library->fill_tensor_uniform(dst, 1, static_cast<T2>(0), static_cast<T2>(std::numeric_limits<T1>::max()));
using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
double val = (1. - static_cast<double>(alpha)) * static_cast<intermediate_type>(dst[i]) + static_cast<double>(alpha) * static_cast<intermediate_type>(src[i]);
using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
intermediate_type denom = 1 << shift;
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
intermediate_type val = static_cast<intermediate_type>(dst[i]) + (static_cast<intermediate_type>(src[i]) * static_cast<intermediate_type>(src[i]) / denom);
// Compute reference
const T a(info.a());
const T b(info.b());
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = activate_float<T>(src[i], a, b, info.activation());
SimpleTensor<int16_t> dst = convert_to_symmetric<int16_t>(dst_tmp, dst_qinfo);
return dst;
}
-
template SimpleTensor<int32_t> activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
template SimpleTensor<float> activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
template SimpleTensor<half> activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
id_src1.set(dim - 1, 0);
id_src2.set(dim - 1, 0);
id_dst.set(dim - 1, 0);
-
- for(size_t i = 0; i < dst.shape()[dim - 1]; ++i, ++id_dst[dim - 1])
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
+ for(size_t i = 0; i < dst.shape()[dim - 1]; ++i)
{
BroadcastUnroll < dim - 1 >::unroll(src1, src2, dst, id_src1, id_src2, id_dst);
id_src1[dim - 1] += !src1_is_broadcast;
id_src2[dim - 1] += !src2_is_broadcast;
+ ++id_dst[dim - 1];
}
}
};
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
id_src1.set(dim - 1, 0);
id_src2.set(dim - 1, 0);
id_dst.set(dim - 1, 0);
-
- for(size_t i = 0; i < dst.shape()[dim - 1]; ++i, ++id_dst[dim - 1])
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
+ for(size_t i = 0; i < dst.shape()[dim - 1]; ++i)
{
BroadcastUnroll < dim - 1 >::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
id_src1[dim - 1] += !src1_is_broadcast;
id_src2[dim - 1] += !src2_is_broadcast;
+ ++id_dst[dim - 1];
}
}
};
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const auto rows = static_cast<int>(src.shape()[1]);
const auto depth = static_cast<int>(src.shape()[2]);
const int upper_dims = src.shape().total_size() / (cols * rows * depth);
-
+#if defined(_OPENMP)
+ #pragma omp parallel for schedule(dynamic, 1) collapse(4)
+#endif /* _OPENMP */
for(int r = 0; r < upper_dims; ++r)
{
for(int i = 0; i < depth; ++i)
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
SimpleTensor<T> bitwise_and(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2)
{
SimpleTensor<T> dst(src1.shape(), src1.data_type());
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src1.num_elements(); ++i)
{
dst[i] = src1[i] & src2[i];
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
SimpleTensor<T> bitwise_not(const SimpleTensor<T> &src)
{
SimpleTensor<T> dst(src.shape(), src.data_type());
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = ~src[i];
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
SimpleTensor<T> bitwise_or(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2)
{
SimpleTensor<T> dst(src1.shape(), src1.data_type());
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src1.num_elements(); ++i)
{
dst[i] = src1[i] | src2[i];
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
SimpleTensor<T> bitwise_xor(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2)
{
SimpleTensor<T> dst(src1.shape(), src1.data_type());
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src1.num_elements(); ++i)
{
dst[i] = src1[i] ^ src2[i];
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const size_t box_fields = 4;
const size_t class_fields = 4;
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(size_t i = 0; i < num_boxes; ++i)
{
// Extract ROI information
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const std::array<T, 9> filter{ { 1, 1, 1, 1, 1, 1, 1, 1, 1 } };
const float scale = 1.f / static_cast<float>(filter.size());
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx)
{
const Coordinates id = index2coord(src.shape(), element_idx);
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
{
std::vector<SimpleTensor<T>> dst = create_image_planes<T>(shape, format);
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(unsigned int plane_idx = 0; plane_idx < dst.size(); ++plane_idx)
{
SimpleTensor<T> &dst_tensor = dst[plane_idx];
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const int height = dst.shape().y();
// Loop over each pixel and extract channel
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(2)
+#endif /* _OPENMP */
for(int y = 0; y < height; ++y)
{
for(int x = 0; x < width; ++x)
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const T *src_ref = src.data();
T *dst_ref = dst.data();
-
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(2)
+#endif /* _OPENMP */
for(int n = 0; n < batches; ++n)
{
for(int g = 0; g < num_groups; ++g)
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
if(num_groups == 1)
{
// Batches are on the 3rd dimension of the input tensor
- int dst_idx = 0;
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(3)
+#endif /* _OPENMP */
for(size_t b = 0; b < batches; ++b)
{
for(size_t x = 0; x < src_width; ++x)
{
for(size_t y = 0; y < src_height; ++y)
{
- dst[dst_idx++] = src[coord2index(src.shape(), Coordinates(x, y, b))];
+ const int dst_idx = y + x * src_height + b * src_height * src_width;
+ dst[dst_idx] = src[coord2index(src.shape(), Coordinates(x, y, b))];
}
}
}
}
else
{
- int dst_idx = 0;
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(4)
+#endif /* _OPENMP */
for(size_t b = 0; b < batches; ++b)
{
for(size_t g = 0; g < num_groups; ++g)
{
for(size_t y = 0; y < src_height; ++y)
{
- dst[dst_idx++] = src[coord2index(src.shape(), Coordinates(x, y, g, b))];
+ const int dst_idx = y + x * src_height + g * src_height * src_width + b * src_height * src_width * num_groups;
+ dst[dst_idx] = src[coord2index(src.shape(), Coordinates(x, y, g, b))];
}
}
}
id_src1.set(dim - 1, 0);
id_src2.set(dim - 1, 0);
id_dst.set(dim - 1, 0);
-
- for(size_t i = 0; i < dst.shape()[dim - 1]; ++i, ++id_dst[dim - 1])
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
+ for(size_t i = 0; i < dst.shape()[dim - 1]; ++i)
{
BroadcastUnroll < dim - 1 >::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst);
id_src1[dim - 1] += !src1_is_broadcast;
id_src2[dim - 1] += !src2_is_broadcast;
+ ++id_dst[dim - 1];
}
}
};
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
T *all_anchors_ptr = all_anchors.data();
// Iterate over the input grid and anchors
+#if defined(_OPENMP)
+ #pragma omp parallel for schedule(dynamic, 1) collapse(3)
+#endif /* _OPENMP */
for(int y = 0; y < height; y++)
{
for(int x = 0; x < width; x++)
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const unsigned int factor_2 = is_nchw_to_nhwc ? num_channels : num_elems_per_input_plane;
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t i = 0; i < num_elements; ++i)
{
const Coordinates coords_in = index2coords(src.shape(), i);
SimpleTensor<T> dst(src.shape(), output_data_type);
SimpleTensor<int32_t> sum(src.shape(), output_data_type);
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx)
{
const Coordinates id = index2coord(src.shape(), element_idx);
const int end_xi = output_wh.first * stride_xi;
const int end_yi = output_wh.second * stride_yi;
const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in);
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(5)
+#endif /* _OPENMP */
for(int r = 0; r < num_batches; ++r)
{
for(int yi = start_yi; yi < start_yi + end_yi; yi += stride_yi)
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
template <typename T>
void rdft_1d_step(const T *src_ptr, size_t N, T *dst_ptr, size_t K)
{
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(unsigned int k = 0; k < K; ++k)
{
float Xr = 0;
template <typename T>
void dft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
{
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(unsigned int k = 0; k < N; ++k)
{
float Xr = 0;
const bool is_odd = N % 2;
const unsigned int Nleft = N - K;
const int tail_start = is_odd ? K - 1 : K - 2;
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(unsigned int n = 0; n < N; ++n)
{
float xr = 0;
template <typename T>
void idft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
{
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(unsigned int n = 0; n < N; ++n)
{
float xr = 0;
SimpleTensor<T> dst(dst_shape, src.data_type(), num_channels);
const unsigned int upper_dims = src.shape().total_size_upper(1);
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(unsigned int du = 0; du < upper_dims; ++du)
{
const T *src_row_ptr = src.data() + du * N * src.num_channels();
SimpleTensor<T> dst(src.shape(), src.data_type(), src.num_channels());
const unsigned int upper_dims = src.shape().total_size_upper(1);
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(unsigned int du = 0; du < upper_dims; ++du)
{
const T *src_row_ptr = src.data() + du * N * src.num_channels();
{
const int total_elements = tensor.num_elements() * tensor.num_channels();
T *data_ptr = tensor.data();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < total_elements; ++i)
{
data_ptr[i] /= scaling_factor;
// MemSet dst memory to zero
std::memset(dst.data(), 0, dst.size());
-
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(5)
+#endif /* _OPENMP */
for(uint32_t b = 0; b < N; ++b)
{
for(uint32_t co = 0; co < Co; ++co)
// Flip weights by 180 degrees
SimpleTensor<T> weights_flipped{ weights.shape(), weights.data_type(), 1, weights.quantization_info() };
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int ud = 0; ud < weights_upper_dims; ++ud)
{
const int offset = ud * weights_width * weights_height;
}
}
}
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int slice = 0; slice < num_2d_slices; ++slice)
{
const int offset_slice_in = slice * width_in * height_in;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
if(srcs[0].data_type() == DataType::QASYMM8 && std::any_of(srcs.cbegin(), srcs.cend(), have_different_quantization_info))
{
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int b = 0; b < batches; ++b)
{
// input tensors can have smaller width and height than the output, so for each output's slice we need to requantize 0 (as this is the value
// Up-casting
if(element_size_from_data_type(src.data_type()) < element_size_from_data_type(dt_out))
{
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
result[i] = src[i] << shift;
// Down-casting
else
{
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
T1 val = src[i] >> shift;
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
ARM_COMPUTE_ERROR_ON(block_shape <= 0);
SimpleTensor<T> result(dst_shape, src.data_type());
- int in_pos = 0;
const auto width_in = static_cast<int>(src.shape()[0]);
const auto height_in = static_cast<int>(src.shape()[1]);
const auto channel_in = static_cast<int>(src.shape()[2]);
const auto batch_in = static_cast<int>(src.shape()[3]);
const int r = channel_in / (block_shape * block_shape);
-
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(4)
+#endif /* _OPENMP */
for(int b = 0; b < batch_in; ++b)
{
for(int z = 0; z < channel_in; ++z)
const int out_x = (block_shape * x + (z / r) % block_shape);
const int out_y = (block_shape * y + (z / r) / block_shape);
const int out_pos = out_x + dst_shape[0] * out_y + (z % r) * dst_shape[0] * dst_shape[1] + b * dst_shape[0] * dst_shape[1] * dst_shape[2];
+ const int in_pos = x + width_in * y + z * width_in * height_in + b * width_in * height_in * channel_in;
result[out_pos] = src[in_pos];
- ++in_pos;
}
}
}
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const int N = src.shape().total_size() / (WH * C);
const std::vector<float> qscales = src.quantization_info().scale();
-
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(2)
+#endif /* _OPENMP */
for(int n = 0; n < N; ++n)
{
for(int c = 0; c < C; ++c)
{
const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
ARM_COMPUTE_ERROR_ON(quantization_info.offset != 0 && src_data_type == DataType::QSYMM8);
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = static_cast<TOut>(dequantize<TOut>(static_cast<TIn>(src[i]), quantization_info, src_data_type));
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(filter_size / 2));
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t i = 0; i < num_elements; ++i)
{
Coordinates coord = index2coord(src.shape(), i);
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
SimpleTensor<T> dst(src.shape(), src.data_type());
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t i = 0; i < num_elements; ++i)
{
Coordinates coord = index2coord(src.shape(), i);
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
// Fill output tensor with equalized values
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = lut[src[i]];
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
SimpleTensor<T> dst(src.shape(), src.data_type());
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t i = 0; i < num_elements; ++i)
{
Coordinates coord = index2coord(src.shape(), i);
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
SimpleTensor<T> dst{ src.shape(), src.data_type() };
// Compute reference
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = std::floor(src[i]);
const T *weights_ptr = weights.data();
const TB *bias_ptr = bias.data();
T *dst_ptr = dst.data() + offset_dst;
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int y = 0; y < rows_weights; ++y)
{
- dst_ptr[y] = std::inner_product(src_ptr, src_ptr + cols_weights, weights_ptr, static_cast<T>(0)) + bias_ptr[y];
- weights_ptr += cols_weights;
+ dst_ptr[y] = std::inner_product(src_ptr, src_ptr + cols_weights, &weights_ptr[cols_weights * y], static_cast<T>(0)) + bias_ptr[y];
}
}
const int min = std::numeric_limits<T>::lowest();
const int max = std::numeric_limits<T>::max();
-
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int y = 0; y < rows_weights; ++y)
{
// Reset accumulator
for(int x = 0; x < cols_weights; ++x)
{
- acc += (src_ptr[x] + input_offset) * (weights_ptr[x] + weights_offset);
+ acc += (src_ptr[x] + input_offset) * (weights_ptr[x + y * cols_weights] + weights_offset);
}
// Accumulate the bias
// Store the result
dst_ptr[y] = static_cast<T>(acc);
-
- weights_ptr += cols_weights;
}
}
} // namespace
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* SOFTWARE.
*/
#include "FuseBatchNormalization.h"
+#include "tests/validation/Helpers.h"
namespace arm_compute
{
const unsigned int height = w.shape()[1];
const unsigned int dim2 = w.shape()[2];
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(unsigned int b = 0; b < dim2; ++b)
{
const auto mean_val = mean.data()[b];
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*/
#include "GEMM.h"
+#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
const int c_stride_z = N * M;
const int c_stride_w = N * M * D;
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(2)
+#endif /* _OPENMP */
for(int w = 0; w < W; ++w)
{
for(int depth = 0; depth < D; ++depth)
const int c_stride_z = N * M;
const int c_stride_w = N * M * D;
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(2)
+#endif /* _OPENMP */
for(int w = 0; w < W; ++w)
{
for(int depth = 0; depth < D; ++depth)
const int cols_in = in->shape().x();
const bool is_per_channel = result_mult_int.size() > 1;
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < in->num_elements(); ++i)
{
int32_t result = ((*in)[i] + result_offset);
const int cols_in = in->shape().x();
const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < in->num_elements(); ++i)
{
TIn result = (*in)[i];
const int cols_in = in->shape().x();
const bool is_per_channel = result_real_multiplier.size() > 1;
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < in->num_elements(); ++i)
{
TIn result = (*in)[i];
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const unsigned int offset_output_x = rhs_info.interleave ? tile_to_use->shape()[0] : tile_to_use->shape()[0] * tile_to_use->shape()[1];
const unsigned int step_output_x = rhs_info.interleave ? tile_to_use->shape()[0] * rhs_info.h0 : tile_to_use->shape()[0];
-
+#ifdef ARM_COMPUTE_OPENMP
+ #pragma omp parallel for schedule(dynamic, 1) collapse(3)
+#endif /* _OPENMP */
for(unsigned int z = 0; z < B; ++z)
{
for(unsigned int y = 0; y < num_tiles_y; ++y)
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const float scale = 1.f / 16.f;
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx)
{
const Coordinates id = index2coord(src.shape(), element_idx);
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const float scale = 1.f / 256.f;
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx)
{
const Coordinates id = index2coord(src.shape(), element_idx);
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
// Compute width and height of the convolved tensors
std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(src_width, src_height, kernel_dims.width, kernel_dims.height, conv_info);
-
+#if defined(_OPENMP)
+ #pragma omp parallel for schedule(dynamic, 1) collapse(2)
+#endif /* _OPENMP */
for(int b = 0; b < batches; ++b)
{
for(int yo = 0; yo < dst_height; ++yo)
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const size_t h_size = src.shape()[1];
const size_t c_size = src.shape()[2];
const size_t n_size = src.shape()[3];
-
+#if defined(_OPENMP)
+ #pragma omp parallel for collapse(2)
+#endif /* _OPENMP */
for(size_t n_i = 0; n_i < n_size; ++n_i)
{
for(size_t c_i = 0; c_i < c_size; ++c_i)
{
- float sum_h_w = 0;
+ float sum_h_w = 0;
float sum_sq_h_w = 0;
for(size_t h_i = 0; h_i < h_size; ++h_i)
{
float val = src[coord2index(src.shape(), Coordinates(w_i, h_i, c_i, n_i))];
sum_h_w += val;
- sum_sq_h_w += val*val;
+ sum_sq_h_w += val * val;
}
}
//Compute mean
const float mean_h_w = sum_h_w / (h_size * w_size);
//Compute variance
- const float var_h_w = sum_sq_h_w / (h_size * w_size) - mean_h_w * mean_h_w;;
+ const float var_h_w = sum_sq_h_w / (h_size * w_size) - mean_h_w * mean_h_w;
+ ;
//Apply mean
for(size_t h_i = 0; h_i < h_size; ++h_i)
switch(output_data_type)
{
case DataType::QASYMM8:
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = quantize_qasymm8((src[i]), qinfo, rounding_policy);
}
break;
case DataType::QASYMM8_SIGNED:
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
#ifdef __aarch64__
}
break;
case DataType::QASYMM16:
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int i = 0; i < src.num_elements(); ++i)
{
dst[i] = quantize_qasymm16((src[i]), qinfo, rounding_policy);
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
// Calculate layer reorg in NCHW
Coordinates map_coords;
+
+#if defined(_OPENMP)
+ #pragma omp parallel for private(map_coords)
+#endif /* _OPENMP */
for(unsigned int b = 0; b < outer_dims; ++b)
{
map_coords.set(3, b);
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
const uint32_t num_elements = src.num_elements();
+
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t i = 0; i < num_elements; ++i)
{
const Coordinates src_coord = index2coord(src.shape(), i);
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
upper_dims *= src.shape()[i];
}
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(int r = 0; r < upper_dims; ++r)
{
const T *src_row_ptr = src.data() + r * lower_dims;
return softmax_layer_generic<T>(src, beta, axis, false);
}
-template <typename T, typename std::enable_if<std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int>::type>
+template < typename T, typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type >
SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, size_t axis)
{
const QuantizationInfo output_quantization_info = arm_compute::get_softmax_output_quantization_info(src.data_type(), false);
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
const T b(info.b());
const uint32_t num_elements = src.num_elements();
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
for(uint32_t i = 0; i < num_elements; ++i)
{
const size_t z = index2coord(dst.shape(), i).z() % (num_classes + 5);