27 #define MINVAL HALF_MIN 28 #define SELECT_DATA_TYPE short 29 #define DATA_TYPE half 31 #define MINVAL FLT_MIN 32 #define SELECT_DATA_TYPE int 33 #define DATA_TYPE float 37 __constant uint16
idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
72 const uint width4 = width >> 4;
73 for(uint i = 0; i < width4; i++)
76 data = vload16(0, (__global DATA_TYPE *)
offset(&src, i << 4, 0));
77 max_val = max(data, max_val);
80 #if defined NON_MULTIPLE_OF_16 83 data = vload16(0, (__global DATA_TYPE *)
offset(&src, width4 << 4, 0));
86 max_val = max(max_val, select(
type_min, data, widx));
90 max_val.s01234567 = max(max_val.s01234567, max_val.s89ABCDEF);
91 max_val.s0123 = max(max_val.s0123, max_val.s4567);
92 max_val.s01 = max(max_val.s01, max_val.s23);
93 max_val.s0 = max(max_val.s0, max_val.s1);
96 *((__global DATA_TYPE *)dst.
ptr) = max_val.s0;
152 const uint width4 = width >> 4;
153 for(uint i = 0; i < width4; i++)
157 data = exp(data - max_val);
162 #if defined NON_MULTIPLE_OF_16 166 data = exp(data - max_val);
169 data = select(0, data, widx);
175 sum1D.s01234567 = sum1D.s01234567 + sum1D.s89ABCDEF;
176 sum1D.s0123 = sum1D.s0123 + sum1D.s4567;
177 sum1D.s01 = sum1D.s01 + sum1D.s23;
178 sum1D.s0 = sum1D.s0 + sum1D.s1;
#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name)
#define IMAGE_DECLARATION(name)
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
#define CONVERT_TO_IMAGE_STRUCT(name)
#define VEC_DATA_TYPE(type, size)
__kernel void softmax_layer_shift_exp_sum(__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *max_ptr, uint max_stride_x, uint max_step_x, uint max_stride_y, uint max_step_y, uint max_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes, __global uchar *sum_ptr, uint sum_stride_x, uint sum_step_x, uint sum_stride_y, uint sum_step_y, uint sum_offset_first_element_in_bytes, uint width)
Shifts the values of the input tensor by the max calculated in softmax_layer_max kernel, then gets the exponent of each element as sums all elements across each row.
Structure to hold Image information.
__kernel void softmax_layer_max(__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes, uint width)
Identifies the maximum value across the 1st dimension.
__global uchar * ptr
Pointer to the starting postion of the buffer.
__constant float16 type_min
__kernel void softmax_layer_norm(__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *sum_ptr, uint sum_stride_x, uint sum_step_x, uint sum_stride_y, uint sum_step_y, uint sum_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes)
Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum ker...