24 #ifndef __ARM_COMPUTE_UTILS_H__ 25 #define __ARM_COMPUTE_UTILS_H__ 37 #include <type_traits> 44 template <
typename S,
typename T>
45 constexpr
auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
47 return (val + m - 1) / m;
51 template <
typename S,
typename T>
59 template <
typename S,
typename T>
63 return (
value / divisor) * divisor;
81 std::string
read_file(
const std::string &filename,
bool binary);
113 return sizeof(size_t);
561 int32_t min_col = -1;
562 int16_t min_col_val = -1;
564 for(int32_t i = 0; i <
size; ++i)
566 if(conv[i] != 0 && (min_col < 0 ||
abs(min_col_val) >
abs(conv[i])))
569 min_col_val = conv[i];
578 for(uint32_t j = 0; j <
size; ++j)
580 conv_col[j] = conv[min_col + j *
size];
583 for(uint32_t i = 0; i <
size; i++)
585 if(static_cast<int>(i) == min_col)
591 int16_t coeff = conv[i] / conv[min_col];
593 for(uint32_t j = 1; j <
size; ++j)
595 if(conv[i + j * size] != (conv_col[j] * coeff))
621 const size_t size = matrix_size * matrix_size;
632 template <
typename T>
635 TensorShape out_shape = inputs_vector[0]->info()->tensor_shape();
641 for(
const auto &tensor : inputs_vector)
650 out_shape.
set(0, max_x);
651 out_shape.
set(1, max_y);
652 out_shape.
set(2, depth);
676 output.
set(0, output.x() & ~1
U);
682 output.
set(1, output.y() & ~1
U);
706 output.
set(0, output.x() / 2
U);
712 output.
set(1, output.y() / 2
U);
733 auto gez = [](
const int16_t &v)
738 auto accu_neg = [](
const int &first,
const int &second)
740 return first + (second < 0 ? second : 0);
743 auto accu_pos = [](
const int &first,
const int &second)
745 return first + (second > 0 ? second : 0);
748 const bool only_positive_coefficients = std::all_of(conv_row, conv_row + size, gez) && std::all_of(conv_col, conv_col + size, gez);
750 if(only_positive_coefficients)
752 const int max_row_value =
std::accumulate(conv_row, conv_row + size, 0) * UINT8_MAX;
753 const int max_value =
std::accumulate(conv_col, conv_col + size, 0) * max_row_value;
761 const int min_row_value =
std::accumulate(conv_row, conv_row + size, 0, accu_neg) * UINT8_MAX;
762 const int max_row_value =
std::accumulate(conv_row, conv_row + size, 0, accu_pos) * UINT8_MAX;
763 const int neg_coeffs_sum =
std::accumulate(conv_col, conv_col + size, 0, accu_neg);
764 const int pos_coeffs_sum =
std::accumulate(conv_col, conv_col + size, 0, accu_pos);
765 const int min_value = neg_coeffs_sum * max_row_value + pos_coeffs_sum * min_row_value;
766 const int max_value = neg_coeffs_sum * min_row_value + pos_coeffs_sum * max_row_value;
773 return std::make_pair(first_stage, second_stage);
786 auto gez = [](
const int16_t v)
791 const bool only_positive_coefficients = std::all_of(conv, conv + size, gez);
793 if(only_positive_coefficients)
795 const int max_conv_value =
std::accumulate(conv, conv + size, 0) * UINT8_MAX;
796 if(max_conv_value <= UINT16_MAX)
809 return b < 0 ? a + b :
a;
813 const int max_value =
std::accumulate(conv, conv + size, 0, [](
int a,
int b)
815 return b > 0 ? a + b :
a;
819 if((INT16_MIN <= min_value) && (INT16_MAX >= max_value))
866 unsigned int kernel_width,
unsigned int kernel_height,
867 unsigned int padx,
unsigned int pady,
unsigned int inner_border_right,
unsigned int inner_border_top,
868 unsigned int stride_x,
unsigned int stride_y);
880 const std::pair<unsigned int, unsigned int>
scaled_dimensions(
unsigned int width,
unsigned int height,
881 unsigned int kernel_width,
unsigned int kernel_height,
1048 std::stringstream ss;
1049 ss.precision(std::numeric_limits<float>::digits10 + 1);
1062 template <
typename T>
1067 for(
unsigned int i = 0; i < n; ++i)
1070 if(stream_width != 0)
1072 s.width(stream_width);
1078 s << std::right << static_cast<T>(ptr[i]) << element_delim;
1082 s << std::right << static_cast<print_type>(ptr[i]) << element_delim;
1095 template <
typename T>
1101 for(
unsigned int i = 0; i < n; ++i)
1103 std::stringstream ss;
1109 ss << static_cast<T>(ptr[i]);
1113 ss << static_cast<print_type>(ptr[i]);
1116 max_width = std::max<int>(max_width, ss.str().size());
BorderMode
Methods available to handle borders.
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor-1)/divisor)*divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
const std::string & string_from_channel(Channel channel)
Convert a channel identity into a string.
A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte.
InterpolationPolicy
Interpolation method.
bool has_format_vertical_subsampling(Format format)
Return true if the given format has vertical subsampling.
const std::pair< unsigned int, unsigned int > deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int inner_border_right, unsigned int inner_border_top, unsigned int stride_x, unsigned int stride_y)
Returns expected width and height of the deconvolution's output tensor.
const std::string & string_from_interpolation_policy(InterpolationPolicy policy)
Translates a given interpolation policy to a string.
T z() const
Alias to access the size of the third dimension.
TensorShape calculate_depth_concatenate_shape(const std::vector< T * > &inputs_vector)
Calculate the output shapes of the depth concatenate function.
TensorShape deconvolution_output_shape(const std::pair< unsigned int, unsigned int > &out_dims, TensorShape input, TensorShape weights)
Returns expected shape for the deconvolution output tensor.
int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n)
Identify the maximum width of n consecutive elements.
int plane_idx_from_channel(Format format, Channel channel)
Return the plane index of a given channel given an input format.
TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel=Channel::UNKNOWN)
Calculate subsampled shape for a given format and channel.
size_t num_planes_from_format(Format format)
Return the number of planes for a given format.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
1 channel, 1 U8 per channel
size_t element_size_from_data_type(DataType dt)
The size in bytes of the data type.
#define ARM_COMPUTE_ERROR(...)
Print the given message then throw an std::runtime_error.
half_float::half half
16-bit floating point type
1 channel, 1 F32 per channel
std::pair< DataType, DataType > data_type_for_convolution(const int16_t *conv_col, const int16_t *conv_row, size_t size)
Calculate accurary required by the horizontal and vertical convolution computations.
PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info)
Calculate padding requirements in case of SAME padding.
const std::string & string_from_border_mode(BorderMode border_mode)
Translates a given border mode policy to a string.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
size_t num_channels_from_format(Format format)
Return the number of channels for a given single-planar pixel format.
A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling.
1 channel, 1 U16 per channel
const std::string & string_from_matrix_pattern(MatrixPattern pattern)
Convert a matrix pattern into a string.
constexpr auto DIV_CEIL(S val, T m) -> decltype((val+m-1)/m)
Calculate the rounded up quotient of val / m.
uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matrix_size)
Calculate the scale of the given square matrix.
A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling.
const std::string & string_from_non_linear_filter_function(NonLinearFilterFunction function)
Translates a given non linear function to a string.
This file contains all available output stages for GEMMLowp on OpenCL.
ActivationFunction
Available activation functions.
1 channel, 1 F16 per channel
std::string lower_string(const std::string &val)
Lower a given string.
T x() const
Alias to access the size of the first dimension.
1 channel, 1 S32 per channel
3 channels, 1 U8 per channel
DataType get_promoted_data_type(DataType dt)
Return the promoted data type of a given data type.
1 channel, 1 U32 per channel
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Channel
Available channels.
Format
Image colour formats.
bool is_data_type_fixed_point(DataType dt)
Check if a given data type is of fixed point type.
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
auto floor_to_multiple(S value, T divisor) -> decltype((value/divisor)*divisor)
Computes the largest number smaller or equal to value that is a multiple of divisor.
int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, unsigned int n)
Identify the maximum width of n consecutive elements.
A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
Padding and stride information class.
1 channel, 1 S16 per channel
TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
Adjust tensor shape size if width or height are odd for a given multi-planar format.
DataType data_type_from_format(Format format)
Return the data type used by a given single-planar pixel format.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes.
4 channels, 1 U8 per channel
const std::string & string_from_norm_type(NormType type)
Translates a given normalization type to a string.
PoolingType
Available pooling types.
TensorShape & set(size_t dimension, size_t value)
Accessor to set the value of one of the dimensions.
size_t pixel_size_from_format(Format format)
The size in bytes of the pixel format.
T y() const
Alias to access the size of the second dimension.
const std::pair< unsigned int, unsigned int > scaled_dimensions(unsigned int width, unsigned int height, unsigned int kernel_width, unsigned int kernel_height, const PadStrideInfo &pad_stride_info)
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
const std::string & string_from_pooling_type(PoolingType type)
Translates a given pooling type to a string.
void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width=0, const std::string &element_delim=" ")
Print consecutive elements to an output stream.
2 channel, 1 U8 per channel
std::string build_information()
Returns the arm_compute library build information.
bool has_format_horizontal_subsampling(Format format)
Return true if the given format has horizontal subsampling.
__kernel void accumulate(__global uchar *input_ptr, uint input_stride_x, uint input_step_x, uint input_stride_y, uint input_step_y, uint input_offset_first_element_in_bytes, __global uchar *accu_ptr, uint accu_stride_x, uint accu_step_x, uint accu_stride_y, uint accu_step_y, uint accu_offset_first_element_in_bytes)
This function accumulates an input image into output image.
bool separate_matrix(const int16_t *conv, int16_t *conv_col, int16_t *conv_row, uint8_t size)
Separate a 2D convolution into two 1D convolutions.
fixed_point< T > max(fixed_point< T > x, fixed_point< T > y)
fixed_point< T > abs(fixed_point< T > x)
A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes.
std::string read_file(const std::string &filename, bool binary)
Load an entire file in memory.
DataType
Available data types.
const std::string & string_from_format(Format format)
Convert a tensor format into a string.
void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim=" ")
Print consecutive elements to an output stream.
NormType
The normalization type used for the normalization layer.
MatrixPattern
Available matrix patterns.
NonLinearFilterFunction
Available non linear functions.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
int channel_idx_from_format(Format format, Channel channel)
Return the channel index of a given channel given an input format.
DataType data_type_for_convolution_matrix(const int16_t *conv, size_t size)
Calculate the accuracy required by the squared convolution calculation.