24 #ifndef __ARM_COMPUTE_UTILS_H__ 25 #define __ARM_COMPUTE_UTILS_H__ 37 #include <type_traits> 50 template <
typename S,
typename T>
51 constexpr
auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
53 return (val + m - 1) / m;
63 template <
typename S,
typename T>
64 inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor)
67 return DIV_CEIL(value, divisor) * divisor;
77 template <
typename S,
typename T>
81 return (value / divisor) * divisor;
99 std::string
read_file(
const std::string &filename,
bool binary);
131 return sizeof(size_t);
579 int32_t min_col = -1;
580 int16_t min_col_val = -1;
582 for(int32_t i = 0; i < size; ++i)
584 if(conv[i] != 0 && (min_col < 0 ||
abs(min_col_val) >
abs(conv[i])))
587 min_col_val = conv[i];
596 for(uint32_t j = 0; j < size; ++j)
598 conv_col[j] = conv[min_col + j * size];
601 for(uint32_t i = 0; i < size; i++)
603 if(static_cast<int>(i) == min_col)
609 int16_t coeff = conv[i] / conv[min_col];
611 for(uint32_t j = 1; j < size; ++j)
613 if(conv[i + j * size] != (conv_col[j] * coeff))
639 const size_t size = matrix_size * matrix_size;
650 template <
typename T>
653 TensorShape out_shape = inputs_vector[0]->info()->tensor_shape();
659 for(
const auto &tensor : inputs_vector)
668 out_shape.
set(0, max_x);
669 out_shape.
set(1, max_y);
670 out_shape.
set(2, depth);
695 output.
set(0, output.x() & ~1
U);
701 output.
set(1, output.y() & ~1
U);
725 output.
set(0, output.x() / 2
U);
731 output.
set(1, output.y() / 2
U);
752 auto gez = [](
const int16_t &v)
757 auto accu_neg = [](
const int &first,
const int &second)
759 return first + (second < 0 ? second : 0);
762 auto accu_pos = [](
const int &first,
const int &second)
764 return first + (second > 0 ? second : 0);
767 const bool only_positive_coefficients = std::all_of(conv_row, conv_row + size, gez) && std::all_of(conv_col, conv_col + size, gez);
769 if(only_positive_coefficients)
771 const int max_row_value =
std::accumulate(conv_row, conv_row + size, 0) * UINT8_MAX;
772 const int max_value =
std::accumulate(conv_col, conv_col + size, 0) * max_row_value;
780 const int min_row_value =
std::accumulate(conv_row, conv_row + size, 0, accu_neg) * UINT8_MAX;
781 const int max_row_value =
std::accumulate(conv_row, conv_row + size, 0, accu_pos) * UINT8_MAX;
782 const int neg_coeffs_sum =
std::accumulate(conv_col, conv_col + size, 0, accu_neg);
783 const int pos_coeffs_sum =
std::accumulate(conv_col, conv_col + size, 0, accu_pos);
784 const int min_value = neg_coeffs_sum * max_row_value + pos_coeffs_sum * min_row_value;
785 const int max_value = neg_coeffs_sum * min_row_value + pos_coeffs_sum * max_row_value;
792 return std::make_pair(first_stage, second_stage);
805 auto gez = [](
const int16_t v)
810 const bool only_positive_coefficients = std::all_of(conv, conv + size, gez);
812 if(only_positive_coefficients)
814 const int max_conv_value =
std::accumulate(conv, conv + size, 0) * UINT8_MAX;
815 if(max_conv_value <= UINT16_MAX)
828 return b < 0 ? a + b :
a;
832 const int max_value =
std::accumulate(conv, conv + size, 0, [](
int a,
int b)
834 return b > 0 ? a + b :
a;
838 if((INT16_MIN <= min_value) && (INT16_MAX >= max_value))
885 unsigned int kernel_width,
unsigned int kernel_height,
886 unsigned int padx,
unsigned int pady,
unsigned int inner_border_right,
unsigned int inner_border_top,
887 unsigned int stride_x,
unsigned int stride_y);
900 const std::pair<unsigned int, unsigned int>
scaled_dimensions(
unsigned int width,
unsigned int height,
901 unsigned int kernel_width,
unsigned int kernel_height,
1075 std::stringstream ss;
1076 ss.precision(std::numeric_limits<float>::digits10 + 1);
1089 template <
typename T>
1092 using print_type =
typename std::conditional<std::is_floating_point<T>::value, T,
int>::type;
1094 for(
unsigned int i = 0; i < n; ++i)
1097 if(stream_width != 0)
1099 s.width(stream_width);
1102 if(std::is_same<
typename std::decay<T>::type,
half>::value)
1105 s << std::right << static_cast<T>(ptr[i]) << element_delim;
1109 s << std::right << static_cast<print_type>(ptr[i]) << element_delim;
1122 template <
typename T>
1125 using print_type =
typename std::conditional<std::is_floating_point<T>::value, T,
int>::type;
1128 for(
unsigned int i = 0; i < n; ++i)
1130 std::stringstream ss;
1133 if(std::is_same<
typename std::decay<T>::type,
half>::value)
1136 ss << static_cast<T>(ptr[i]);
1140 ss << static_cast<print_type>(ptr[i]);
1143 max_width = std::max<int>(max_width, ss.str().size());
BorderMode
Methods available to handle borders.
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor-1)/divisor)*divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
#define ARM_COMPUTE_ERROR(...)
Print the given message then throw an std::runtime_error.
const std::string & string_from_channel(Channel channel)
Convert a channel identity into a string.
A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte.
quantized, symmetric fixed-point 16-bit number
InterpolationPolicy
Interpolation method.
bool has_format_vertical_subsampling(Format format)
Return true if the given format has vertical subsampling.
const std::pair< unsigned int, unsigned int > deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int inner_border_right, unsigned int inner_border_top, unsigned int stride_x, unsigned int stride_y)
Returns expected width and height of the deconvolution's output tensor.
const std::string & string_from_interpolation_policy(InterpolationPolicy policy)
Translates a given interpolation policy to a string.
T z() const
Alias to access the size of the third dimension.
TensorShape calculate_depth_concatenate_shape(const std::vector< T * > &inputs_vector)
Calculate the output shapes of the depth concatenate function.
TensorShape deconvolution_output_shape(const std::pair< unsigned int, unsigned int > &out_dims, TensorShape input, TensorShape weights)
Returns expected shape for the deconvolution output tensor.
quantized, symmetric fixed-point 8-bit number
int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n)
Identify the maximum width of n consecutive elements.
int plane_idx_from_channel(Format format, Channel channel)
Return the plane index of a given channel given an input format.
TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel=Channel::UNKNOWN)
Calculate subsampled shape for a given format and channel.
size_t num_planes_from_format(Format format)
Return the number of planes for a given format.
1 channel, 1 U8 per channel
size_t element_size_from_data_type(DataType dt)
The size in bytes of the data type.
half_float::half half
16-bit floating point type
1 channel, 1 F32 per channel
std::pair< DataType, DataType > data_type_for_convolution(const int16_t *conv_col, const int16_t *conv_row, size_t size)
Calculate accurary required by the horizontal and vertical convolution computations.
PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info)
Calculate padding requirements in case of SAME padding.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const std::string & string_from_border_mode(BorderMode border_mode)
Translates a given border mode policy to a string.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
size_t num_channels_from_format(Format format)
Return the number of channels for a given single-planar pixel format.
A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling.
1 channel, 1 U16 per channel
const std::string & string_from_matrix_pattern(MatrixPattern pattern)
Convert a matrix pattern into a string.
constexpr auto DIV_CEIL(S val, T m) -> decltype((val+m-1)/m)
Calculate the rounded up quotient of val / m.
uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matrix_size)
Calculate the scale of the given square matrix.
A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling.
const std::string & string_from_non_linear_filter_function(NonLinearFilterFunction function)
Translates a given non linear function to a string.
This file contains all available output stages for GEMMLowp on OpenCL.
ActivationFunction
Available activation functions.
1 channel, 1 F16 per channel
std::string lower_string(const std::string &val)
Lower a given string.
T x() const
Alias to access the size of the first dimension.
1 channel, 1 S32 per channel
3 channels, 1 U8 per channel
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
DataType get_promoted_data_type(DataType dt)
Return the promoted data type of a given data type.
1 channel, 1 U32 per channel
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Channel
Available channels.
Format
Image colour formats.
quantized, asymmetric fixed-point 8-bit number
bool is_data_type_fixed_point(DataType dt)
Check if a given data type is of fixed point type.
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
auto floor_to_multiple(S value, T divisor) -> decltype((value/divisor)*divisor)
Computes the largest number smaller or equal to value that is a multiple of divisor.
int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, unsigned int n)
Identify the maximum width of n consecutive elements.
A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
Padding and stride information class.
1 channel, 1 S16 per channel
TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
Adjust tensor shape size if width or height are odd for a given multi-planar format.
DataType data_type_from_format(Format format)
Return the data type used by a given single-planar pixel format.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes.
4 channels, 1 U8 per channel
const std::string & string_from_norm_type(NormType type)
Translates a given normalization type to a string.
PoolingType
Available pooling types.
size_t pixel_size_from_format(Format format)
The size in bytes of the pixel format.
T y() const
Alias to access the size of the second dimension.
Class for specifying the size of an image or rectangle.
const std::string & string_from_pooling_type(PoolingType type)
Translates a given pooling type to a string.
const std::pair< unsigned int, unsigned int > scaled_dimensions(unsigned int width, unsigned int height, unsigned int kernel_width, unsigned int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width=0, const std::string &element_delim=" ")
Print consecutive elements to an output stream.
2 channel, 1 U8 per channel
std::string build_information()
Returns the arm_compute library build information.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true)
Accessor to set the value of one of the dimensions.
bool has_format_horizontal_subsampling(Format format)
Return true if the given format has horizontal subsampling.
__kernel void accumulate(__global uchar *input_ptr, uint input_stride_x, uint input_step_x, uint input_stride_y, uint input_step_y, uint input_offset_first_element_in_bytes, __global uchar *accu_ptr, uint accu_stride_x, uint accu_step_x, uint accu_stride_y, uint accu_step_y, uint accu_offset_first_element_in_bytes)
This function accumulates an input image into output image.
bool separate_matrix(const int16_t *conv, int16_t *conv_col, int16_t *conv_row, uint8_t size)
Separate a 2D convolution into two 1D convolutions.
fixed_point< T > max(fixed_point< T > x, fixed_point< T > y)
quantized, symmetric fixed-point 32-bit number
64-bit floating-point number
fixed_point< T > abs(fixed_point< T > x)
A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes.
std::string read_file(const std::string &filename, bool binary)
Load an entire file in memory.
DataType
Available data types.
const std::string & string_from_format(Format format)
Convert a tensor format into a string.
void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim=" ")
Print consecutive elements to an output stream.
DataLayout
Supported tensor data layouts.
NormType
The normalization type used for the normalization layer.
MatrixPattern
Available matrix patterns.
NonLinearFilterFunction
Available non linear functions.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
int channel_idx_from_format(Format format, Channel channel)
Return the channel index of a given channel given an input format.
DataType data_type_for_convolution_matrix(const int16_t *conv, size_t size)
Calculate the accuracy required by the squared convolution calculation.