26 #if defined(FIXED_POINT_POSITION) 29 #define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE_PROMOTED, 8) 30 #define MUL_OP(a, b) MUL_SAT_OP_EXPAND(CONVERT((a), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), CONVERT((b), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), DATA_TYPE_PROMOTED, 8, FIXED_POINT_POSITION) 38 #define ADD_OP(a, b) ((a) + (b)) 39 #define MUL_OP(a, b) ((a) * (b)) 40 #define CONVERT_SAT(a, b) ((a)) 44 #if defined(DATA_TYPE) && defined(DATA_SIZE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) 47 #define INPUT_PIXEL_STR(data_size) extract_input_stride3_##data_size 48 #define INPUT_PIXEL(data_size) INPUT_PIXEL_STR(data_size) 50 #define INPUT_PIXEL(data_size) extract_input_stride2 52 #define INPUT_PIXEL(data_size) extract_input_stride1 54 #error "Only support strides 1, 2 and 3" 65 return vload8(0, input_pixel);
77 temp = vload16(0, input_pixel);
78 return temp.s02468ace;
90 temp1 = vload4(0, input_pixel);
92 temp2 = vload4(0, input_pixel + 6);
94 temp3 = vload4(0, input_pixel + 12);
96 temp4 = vload4(0, input_pixel + 18);
109 temp1 = vload8(0, input_pixel);
111 temp2 = vload8(0, input_pixel + 8);
113 temp3 = vload8(0, input_pixel + 16);
126 temp1 = vload16(0, input_pixel);
128 temp2 = vload16(0, input_pixel + 12);
170 __kernel
void direct_convolution1x1(
177 unsigned int weights_stride_w)
190 const uint z_index = get_global_id(2);
192 weights.
ptr += z_index * weights_stride_w;
194 for(
volatile int d = 0; d < WEIGHTS_DEPTH; ++d)
198 input_pixel = INPUT_PIXEL(DATA_SIZE)((__global
DATA_TYPE *)src.
ptr);
200 src.
ptr += src_stride_z;
201 weights.
ptr += weights_stride_z;
210 #endif // defined(DATA_TYPE) && defined(DATA_SIZE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) 212 #if defined(WEIGHTS_DEPTH) 214 #define CONVOLUTION1x1_BIFROST(acc, src, weight_value) \ 216 acc.s0 = mad(src.s0, weight_value, acc.s0); \ 217 acc.s1 = mad(src.s1, weight_value, acc.s1); \ 218 acc.s2 = mad(src.s2, weight_value, acc.s2); \ 219 acc.s3 = mad(src.s3, weight_value, acc.s3); \ 258 __kernel
void direct_convolution1x1_f32_bifrost(
265 unsigned int weights_stride_w)
268 const int kernel_index = get_global_id(2);
278 __global uchar *weights_addr = (__global uchar *)(weights_ptr + weights_offset_first_element_in_bytes + kernel_index * weights_stride_w);
279 __global uchar *src_addr = (__global uchar *)
offset(&src, 0, 0);
281 for(ushort d = 0; d < (ushort)WEIGHTS_DEPTH; ++d)
284 float weight = *((__global
float *)weights_addr);
287 float4 src0 = vload4(0, (__global
float *)(src_addr + 0 * src_stride_y));
288 float4 src1 = vload4(0, (__global
float *)(src_addr + 1 * src_stride_y));
289 float4 src2 = vload4(0, (__global
float *)(src_addr + 2 * src_stride_y));
290 float4 src3 = vload4(0, (__global
float *)(src_addr + 3 * src_stride_y));
292 CONVOLUTION1x1_BIFROST(acc0, src0, weight);
293 CONVOLUTION1x1_BIFROST(acc1, src1, weight);
294 CONVOLUTION1x1_BIFROST(acc2, src2, weight);
295 CONVOLUTION1x1_BIFROST(acc3, src3, weight);
297 src_addr += src_stride_z;
298 weights_addr += weights_stride_z;
304 float bias = (float) * ((__global
float *)(
vector_offset(&biases, kernel_index)));
324 vstore4(acc0, 0, (__global
float *)(dst.
ptr + 0 * dst_stride_y));
325 vstore4(acc1, 0, (__global
float *)(dst.
ptr + 1 * dst_stride_y));
326 vstore4(acc2, 0, (__global
float *)(dst.
ptr + 2 * dst_stride_y));
327 vstore4(acc3, 0, (__global
float *)(dst.
ptr + 3 * dst_stride_y));
329 #endif // defined(WEIGHTS_DEPTH) Structure to hold Vector information.
#define CONVERT_TO_TENSOR3D_STRUCT(name)
#define CONVERT_SAT(a, b)
Structure to hold 3D tensor information.
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
#define CONVERT_TO_IMAGE_STRUCT(name)
#define VECTOR_DECLARATION(name)
Structure to hold Image information.
#define TENSOR3D_DECLARATION(name)
#define MULQ_SAT_IMPL(type, itype)
__global uchar * ptr
Pointer to the starting postion of the buffer.
#define VEC_DATA_TYPE(type, size)
#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name)
#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name)
__global const uchar * vector_offset(const Vector *vec, int x)
Get the pointer position of a Vector.
__global uchar * ptr
Pointer to the starting postion of the buffer.
convolution configure & src