ARM Compute Library
17.04
|
#include "helpers.h"
Go to the source code of this file.
Functions | |
__kernel void | reshape_to_columns (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_stride_z, uint src_step_z, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes, __global uchar *bias_ptr, uint bias_stride_x, uint bias_step_x, uint bias_stride_y, uint bias_step_y, uint bias_offset_first_element_in_bytes, uint width, uint height) |
This kernel reshapes the tensor's low three dimensions to single column. More... | |
__kernel void | im2col_generic (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_stride_z, uint src_step_z, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes, int kernel_size, int kernel_depth, int width, int2 input_dims, int2 strides, int2 paddings) |
This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM. More... | |
__kernel void | col2im (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_stride_z, uint dst_step_z, uint dst_offset_first_element_in_bytes, uint width) |
This kernel performs a reshaping of the output of the convolution layer. More... | |
__kernel void | im2col_reduced (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_stride_z, uint src_step_z, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_offset_first_element_in_bytes, uint width, uint height) |
This kernel reshapes the tensor's low three dimensions to single row for GEMM operation. More... | |
__kernel void col2im | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_stride_z, | ||
uint | dst_step_z, | ||
uint | dst_offset_first_element_in_bytes, | ||
uint | width | ||
) |
This kernel performs a reshaping of the output of the convolution layer.
[in] | src_ptr | Pointer to the source tensor. Supported data types: F16, F32 |
[in] | src_stride_x | Stride of the source tensor in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source tensor in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[out] | dst_ptr | Pointer to the destination tensor. Supported data types: F16, F32 |
[in] | dst_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | dst_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination tensor in Y dimension (in bytes) |
[in] | dst_step_y | dst_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_stride_z | Stride of the destination tensor in Z dimension (in bytes) |
[in] | dst_step_z | dst_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
[in] | width | The output tensor width |
Definition at line 175 of file convolution_layer.cl.
References CONVERT_TO_IMAGE_STRUCT, CONVERT_TO_TENSOR3D_STRUCT_NO_STEP, DATA_TYPE, Image::ptr, Tensor3D::ptr, Tensor3D::stride_x, Tensor3D::stride_y, and Tensor3D::stride_z.
__kernel void im2col_generic | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_stride_z, | ||
uint | src_step_z, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_offset_first_element_in_bytes, | ||
int | kernel_size, | ||
int | kernel_depth, | ||
int | width, | ||
int2 | input_dims, | ||
int2 | strides, | ||
int2 | paddings | ||
) |
This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM.
[in] | src_ptr | Pointer to the source tensor. Supported data types: F16, F32 |
[in] | src_stride_x | Stride of the source tensor in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source tensor in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_stride_z | Stride of the source tensor in Z dimension (in bytes) |
[in] | src_step_z | src_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[out] | dst_ptr | Pointer to the destination tensor. Supported data types: F16, F32 |
[in] | dst_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | dst_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination tensor in Y dimension (in bytes) |
[in] | dst_step_y | dst_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
[in] | kernel_size | The convolution kernel size |
[in] | kernel_depth | The kernel depth |
[in] | width | The output tensor width |
[in] | input_dims | The input tensor dimensions |
[in] | strides | The strides of the im2col operation |
[in] | paddings | The input tensor paddings |
Definition at line 105 of file convolution_layer.cl.
References CONVERT_TO_IMAGE_STRUCT_NO_STEP, CONVERT_TO_TENSOR3D_STRUCT, DATA_TYPE, Image::ptr, Image::stride_x, Image::stride_y, and tensor3D_offset().
__kernel void im2col_reduced | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_stride_z, | ||
uint | src_step_z, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_offset_first_element_in_bytes, | ||
uint | width, | ||
uint | height | ||
) |
This kernel reshapes the tensor's low three dimensions to single row for GEMM operation.
[in] | src_ptr | Pointer to the source tensor. Supported data types: F16, F32 |
[in] | src_stride_x | Stride of the source tensor in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source tensor in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_stride_z | Stride of the source tensor in Z dimension (in bytes) |
[in] | src_step_z | src_stride_z * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[out] | dst_ptr | Pointer to the destination tensor. Same as input. |
[in] | dst_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | dst_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
[in] | width | The width of the input tensor |
[in] | height | The height of the input tensor |
Definition at line 208 of file convolution_layer.cl.
References CONVERT_TO_TENSOR3D_STRUCT, DATA_TYPE, and Tensor3D::ptr.
__kernel void reshape_to_columns | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_stride_z, | ||
uint | src_step_z, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_offset_first_element_in_bytes, | ||
__global uchar * | bias_ptr, | ||
uint | bias_stride_x, | ||
uint | bias_step_x, | ||
uint | bias_stride_y, | ||
uint | bias_step_y, | ||
uint | bias_offset_first_element_in_bytes, | ||
uint | width, | ||
uint | height | ||
) |
This kernel reshapes the tensor's low three dimensions to single column.
[in] | src_ptr | Pointer to the source tensor. Supported data types: F16, F32 |
[in] | src_stride_x | Stride of the source tensor in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source tensor in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_stride_z | Stride of the source tensor in Z dimension (in bytes) |
[in] | src_step_z | src_stride_z * number of elements along Y processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[out] | dst_ptr | Pointer to the destination tensor. Same as input |
[in] | dst_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | dst_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination tensor in Y dimension (in bytes) |
[in] | dst_step_y | dst_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
[in] | bias_ptr | Pointer to the bias tensor. Same as input |
[in] | bias_stride_x | Stride of the bias tensor in X dimension (in bytes) |
[in] | bias_step_x | bias_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | bias_stride_y | Stride of the bias tensor in Y dimension (in bytes) |
[in] | bias_step_y | bias_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | bias_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[in] | width | The width of the input tensor |
[in] | height | The height of the input tensor |
Definition at line 53 of file convolution_layer.cl.
References CONVERT_TO_TENSOR3D_STRUCT, DATA_TYPE, and Tensor3D::ptr.