24 #ifndef __ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__ 25 #define __ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__ 36 namespace convolution_3d
42 return (i >= min && i < max);
48 int i_offset,
int w_offset,
int b_offset,
int o_offset,
49 int xi,
int yi,
int width_in,
int height_in,
int depth_in,
int width_weights,
int height_weights,
int dilation_x = 1,
int dilation_y = 1)
51 const T *in_ptr = in.
data() + i_offset;
52 const T *w_ptr = weights.
data() + w_offset;
53 const TB *b_ptr = bias.
data() + b_offset;
54 T *out_ptr = out.
data() + o_offset;
56 const int half_width_weights_start = width_weights / 2;
57 const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
58 const int half_height_weights_start = height_weights / 2;
59 const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
65 for(
int ifm = 0; ifm < depth_in; ++ifm)
68 const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
71 for(
int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
73 for(
int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
78 const int idx = xk + half_width_weights_start;
79 const int idy = yk + half_height_weights_start;
81 const T i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
82 const T w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
84 acc += i_value * w_value;
91 *out_ptr = acc + (*b_ptr);
95 template < typename T, typename TB, typename std::enable_if < std::is_integral<T>::value &&std::is_integral<TB>::value,
int >::type = 0 >
97 int i_offset,
int w_offset,
int b_offset,
int o_offset,
98 int xi,
int yi,
int width_in,
int height_in,
int depth_in,
int width_weights,
int height_weights,
int dilation_x = 1,
int dilation_y = 1)
100 const T *in_ptr = in.
data() + i_offset;
101 const T *w_ptr = weights.
data() + w_offset;
102 const T *b_ptr = bias.
data() + b_offset;
103 T *out_ptr = out.
data() + o_offset;
106 const int half_width_weights_start = width_weights / 2;
107 const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
108 const int half_height_weights_start = height_weights / 2;
109 const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
111 using namespace fixed_point_arithmetic;
115 fixed_point<promoted_type> acc(0, fixed_point_position);
118 for(
int ifm = 0; ifm < depth_in; ++ifm)
121 const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
124 for(
int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
126 for(
int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
131 const int idx = xk + half_width_weights_start;
132 const int idy = yk + half_height_weights_start;
134 const fixed_point<promoted_type> i_value(in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in], fixed_point_position,
true);
135 const fixed_point<promoted_type> w_value(w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position,
true);
136 const fixed_point<promoted_type> iw = i_value * w_value;
144 const fixed_point<promoted_type>
b(*b_ptr, fixed_point_position,
true);
148 fixed_point<T> res(acc);
149 *out_ptr = res.raw();
155 int i_offset,
int w_offset,
int b_offset,
int o_offset,
156 int xi,
int yi,
int width_in,
int height_in,
int depth_in,
int width_weights,
int height_weights,
int dilation_x,
int dilation_y)
158 const uint8_t *in_ptr = in.
data() + i_offset;
159 const uint8_t *w_ptr = weights.
data() + w_offset;
160 const int32_t *b_ptr = bias.
data() + b_offset;
161 uint8_t *out_ptr = out.
data() + o_offset;
170 int output_multiplier = 0;
171 int output_shift = 0;
172 const float multiplier = input_scale * weights_scale / output_scale;
175 const int half_width_weights_start = width_weights / 2;
176 const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
177 const int half_height_weights_start = height_weights / 2;
178 const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
184 for(
int ifm = 0; ifm < depth_in; ++ifm)
187 const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
190 for(
int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
192 for(
int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
197 const int idx = xk + half_width_weights_start;
198 const int idy = yk + half_height_weights_start;
200 const uint8_t i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
201 const uint8_t w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
203 acc += (i_value + input_offset) * (w_value + weights_offset);
213 acc += output_offset;
214 acc = utility::clamp<int32_t>(acc, 0, 255);
int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent)
Rounded to nearest division by a power-of-two.
fixed_point< T > min(fixed_point< T > x, fixed_point< T > y)
typename promote< T >::type promote_t
Get promoted type.
arm_compute::Status calculate_quantized_multiplier_less_than_one(double multiplier, int *quant_multiplier, int *right_shift)
Calculate quantized representation of multiplier with value less than one.
bool is_valid_pixel(int i, int min, int max)
This file contains all available output stages for GEMMLowp on OpenCL.
int32_t asymm_int_mult(int32_t a, int32_t b)
Multiplication of two integers.
const T * data() const
Constant pointer to the underlying buffer.
Simple tensor object that stores elements in a consecutive chunk of memory.
fixed_point< T > max(fixed_point< T > x, fixed_point< T > y)
QuantizationInfo quantization_info() const override
Quantization info in case of asymmetric quantized type.
void convolution3d(const SimpleTensor< T > &in, const SimpleTensor< T > &weights, const SimpleTensor< TB > &bias, SimpleTensor< T > &out, int i_offset, int w_offset, int b_offset, int o_offset, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x=1, int dilation_y=1)
int fixed_point_position() const override
Number of bits for the fractional part.