ARM Compute Library
17.04
|
#include "helpers.h"
Go to the source code of this file.
Functions | |
__kernel void | gemm_transpose1x4_f32 (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes) |
This OpenCL kernel computes the "vector" 1x4 transposition of input matrix. More... | |
__kernel void | gemm_transpose1x8_f16 (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes) |
This OpenCL kernel computes the "vector" 1x8 transposition of input matrix. More... | |
__kernel void | gemm_transpose1x16_u8 (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes) |
This OpenCL kernel computes the "vector" 1x16 transposition of input matrix. More... | |
__kernel void | gemm_interleave4x4_f32 (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes) |
This OpenCL kernel reshapes the input matrix transposing each 4x4 block and interleaving the values. More... | |
__kernel void | gemm_interleave4x4_f16 (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes) |
This OpenCL kernel reshapes the input matrix transposing each 4x4 block and interleaving the values. More... | |
__kernel void | gemm_interleave4x4_u8 (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes) |
This OpenCL kernel reshapes the input matrix transposing each 4x4 block and interleaving the values. More... | |
__kernel void | gemm_accumulate_biases_f32 (__global uchar *accum_ptr, uint accum_stride_x, uint accum_step_x, uint accum_stride_y, uint accum_step_y, uint accum_offset_first_element_in_bytes, __global uchar *biases_ptr, uint biases_stride_x, uint biases_step_x, uint biases_offset_first_element_in_bytes) |
This kernel accumulates each row with the biases vector. More... | |
__kernel void | gemm_accumulate_biases_f16 (__global uchar *accum_ptr, uint accum_stride_x, uint accum_step_x, uint accum_stride_y, uint accum_step_y, uint accum_offset_first_element_in_bytes, __global uchar *biases_ptr, uint biases_stride_x, uint biases_step_x, uint biases_offset_first_element_in_bytes) |
This kernel accumulates each row with the biases vector. More... | |
__kernel void gemm_accumulate_biases_f16 | ( | __global uchar * | accum_ptr, |
uint | accum_stride_x, | ||
uint | accum_step_x, | ||
uint | accum_stride_y, | ||
uint | accum_step_y, | ||
uint | accum_offset_first_element_in_bytes, | ||
__global uchar * | biases_ptr, | ||
uint | biases_stride_x, | ||
uint | biases_step_x, | ||
uint | biases_offset_first_element_in_bytes | ||
) |
This kernel accumulates each row with the biases vector.
[in,out] | accum_ptr | Pointer to the accumulate tensor. Supported data type: F16 |
[in] | accum_stride_x | Stride of the accumulate tensor in X dimension (in bytes) |
[in] | accum_step_x | accum_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | accum_stride_y | Stride of the accumlulate tensor in Y dimension (in bytes) |
[in] | accum_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | accum_offset_first_element_in_bytes | The offset of the first element in the accumulate tensor |
[in] | biases_ptr | Pointer to the biases vector. Same as input. |
[in] | biases_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | biases_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | biases_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
Definition at line 290 of file gemm.cl.
References CONVERT_TO_IMAGE_STRUCT, CONVERT_TO_VECTOR_STRUCT, IMAGE_DECLARATION, offset(), Vector::ptr, and Image::ptr.
__kernel void gemm_accumulate_biases_f32 | ( | __global uchar * | accum_ptr, |
uint | accum_stride_x, | ||
uint | accum_step_x, | ||
uint | accum_stride_y, | ||
uint | accum_step_y, | ||
uint | accum_offset_first_element_in_bytes, | ||
__global uchar * | biases_ptr, | ||
uint | biases_stride_x, | ||
uint | biases_step_x, | ||
uint | biases_offset_first_element_in_bytes | ||
) |
This kernel accumulates each row with the biases vector.
[in,out] | accum_ptr | Pointer to the accumulate tensor. Supported data type: F32 |
[in] | accum_stride_x | Stride of the accmulate tensor in X dimension (in bytes) |
[in] | accum_step_x | accum_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | accum_stride_y | Stride of the accumlulate tensor in Y dimension (in bytes) |
[in] | accum_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | accum_offset_first_element_in_bytes | The offset of the first element in the accumulate tensor |
[in] | biases_ptr | Pointer to the biases vector. Same as input. |
[in] | biases_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | biases_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | biases_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
Definition at line 262 of file gemm.cl.
References CONVERT_TO_IMAGE_STRUCT, CONVERT_TO_VECTOR_STRUCT, Vector::ptr, and Image::ptr.
__kernel void gemm_interleave4x4_f16 | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_offset_first_element_in_bytes | ||
) |
This OpenCL kernel reshapes the input matrix transposing each 4x4 block and interleaving the values.
[in] | src_ptr | Pointer to the source matrix. Supported data types: F16 |
[in] | src_stride_x | Stride of the source matrix in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source matrix in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source matrix |
[out] | dst_ptr | Pointer to the destination matrix Supported data types: F16 |
[in] | dst_stride_x | Stride of the destination matrix in X dimension (in bytes) |
[in] | dst_step_x | dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination matrix in Y dimension (in bytes) |
[in] | dst_step_y | dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination matrix |
Definition at line 178 of file gemm.cl.
References CONVERT_TO_IMAGE_STRUCT, offset(), and Image::ptr.
__kernel void gemm_interleave4x4_f32 | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_offset_first_element_in_bytes | ||
) |
This OpenCL kernel reshapes the input matrix transposing each 4x4 block and interleaving the values.
[in] | src_ptr | Pointer to the source matrix. Supported data types: F32 |
[in] | src_stride_x | Stride of the source matrix in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source matrix in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source matrix |
[out] | dst_ptr | Pointer to the destination matrix Supported data types: F32 |
[in] | dst_stride_x | Stride of the destination matrix in X dimension (in bytes) |
[in] | dst_step_x | dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination matrix in Y dimension (in bytes) |
[in] | dst_step_y | dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination matrix |
Definition at line 137 of file gemm.cl.
References CONVERT_TO_IMAGE_STRUCT, offset(), and Image::ptr.
__kernel void gemm_interleave4x4_u8 | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_offset_first_element_in_bytes | ||
) |
This OpenCL kernel reshapes the input matrix transposing each 4x4 block and interleaving the values.
[in] | src_ptr | Pointer to the source matrix. Supported data types: U8 |
[in] | src_stride_x | Stride of the source matrix in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source matrix in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source matrix |
[out] | dst_ptr | Pointer to the destination matrix Supported data types: U8 |
[in] | dst_stride_x | Stride of the destination matrix in X dimension (in bytes) |
[in] | dst_step_x | dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination matrix in Y dimension (in bytes) |
[in] | dst_step_y | dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination matrix |
Definition at line 219 of file gemm.cl.
References CONVERT_TO_IMAGE_STRUCT, offset(), and Image::ptr.
__kernel void gemm_transpose1x16_u8 | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_offset_first_element_in_bytes | ||
) |
This OpenCL kernel computes the "vector" 1x16 transposition of input matrix.
[in] | src_ptr | Pointer to the source matrix. Supported data types: U8 |
[in] | src_stride_x | Stride of the source matrix in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source matrix in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source matrix |
[out] | dst_ptr | Pointer to the destination matrix Supported data types: U8 |
[in] | dst_stride_x | Stride of the destination matrix in X dimension (in bytes) |
[in] | dst_step_x | dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination matrix in Y dimension (in bytes) |
[in] | dst_step_y | dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination matrix |
Definition at line 105 of file gemm.cl.
References CONVERT_TO_IMAGE_STRUCT, and Image::ptr.
__kernel void gemm_transpose1x4_f32 | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_offset_first_element_in_bytes | ||
) |
This OpenCL kernel computes the "vector" 1x4 transposition of input matrix.
[in] | src_ptr | Pointer to the source matrix. Supported data types: F32 |
[in] | src_stride_x | Stride of the source matrix in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source matrix in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source matrix |
[out] | dst_ptr | Pointer to the destination matrix Supported data types: F32 |
[in] | dst_stride_x | Stride of the destination matrix in X dimension (in bytes) |
[in] | dst_step_x | dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination matrix in Y dimension (in bytes) |
[in] | dst_step_y | dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination matrix |
Definition at line 41 of file gemm.cl.
References CONVERT_TO_IMAGE_STRUCT, and Image::ptr.
__kernel void gemm_transpose1x8_f16 | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_offset_first_element_in_bytes | ||
) |
This OpenCL kernel computes the "vector" 1x8 transposition of input matrix.
[in] | src_ptr | Pointer to the source matrix. Supported data types: F16 |
[in] | src_stride_x | Stride of the source matrix in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source matrix in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source matrix |
[out] | dst_ptr | Pointer to the destination matrix Supported data types: F16 |
[in] | dst_stride_x | Stride of the destination matrix in X dimension (in bytes) |
[in] | dst_step_x | dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination matrix in Y dimension (in bytes) |
[in] | dst_step_y | dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination matrix |
Definition at line 73 of file gemm.cl.
References CONVERT_TO_IMAGE_STRUCT, and Image::ptr.