26 #define SWAP_ROW(u0, l0) \ 33 #define SWAP_4x4(u0, u1, u2, u3, l0, l1, l2, l3) \ 35 VEC_DATA_TYPE(DATA_TYPE, 4) \ 43 #define SWAP_8x8(u0, u1, u2, u3, u4, u5, u6, u7, l0, l1, l2, l3, l4, l5, l6, l7) \ 45 VEC_DATA_TYPE(DATA_TYPE, 8) \ 57 #define TRANSPOSE_4x4(u0, u1, u2, u3) \ 59 VEC_DATA_TYPE(DATA_TYPE, 4) \ 80 #define TRANSPOSE_8x8(u0, u1, u2, u3, u4, u5, u6, u7) \ 82 TRANSPOSE_4x4(u0.s0123, u1.s0123, u2.s0123, u3.s0123); \ 83 TRANSPOSE_4x4(u0.s4567, u1.s4567, u2.s4567, u3.s4567); \ 84 TRANSPOSE_4x4(u4.s0123, u5.s0123, u6.s0123, u7.s0123); \ 85 TRANSPOSE_4x4(u4.s4567, u5.s4567, u6.s4567, u7.s4567); \ 86 SWAP_4x4(u0.s4567, u1.s4567, u2.s4567, u3.s4567, u4.s0123, u5.s0123, u6.s0123, u7.s0123); \ 89 #define TRANSPOSE_16x16(u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15) \ 91 TRANSPOSE_8x8(u0.s01234567, u1.s01234567, u2.s01234567, u3.s01234567, u4.s01234567, u5.s01234567, u6.s01234567, u7.s01234567); \ 92 TRANSPOSE_8x8(u0.s89ABCDEF, u1.s89ABCDEF, u2.s89ABCDEF, u3.s89ABCDEF, u4.s89ABCDEF, u5.s89ABCDEF, u6.s89ABCDEF, u7.s89ABCDEF); \ 93 TRANSPOSE_8x8(u8.s01234567, u9.s01234567, u10.s01234567, u11.s01234567, u12.s01234567, u13.s01234567, u14.s01234567, u15.s01234567); \ 94 TRANSPOSE_8x8(u8.s89ABCDEF, u9.s89ABCDEF, u10.s89ABCDEF, u11.s89ABCDEF, u12.s89ABCDEF, u13.s89ABCDEF, u14.s89ABCDEF, u15.s89ABCDEF); \ 95 SWAP_8x8(u0.s89ABCDEF, u1.s89ABCDEF, u2.s89ABCDEF, u3.s89ABCDEF, u4.s89ABCDEF, u5.s89ABCDEF, u6.s89ABCDEF, u7.s89ABCDEF, \ 96 u8.s01234567, u9.s01234567, u10.s01234567, u11.s01234567, u12.s01234567, u13.s01234567, u14.s01234567, u15.s01234567); \ 99 #ifndef DATA_TYPE_IN_BYTES 100 #error DATA_TYPE_IN_BYTES not set for the transpose OpenCL kernel 103 #if DATA_TYPE_IN_BYTES == 4 104 #define DATA_TYPE uint 105 #define TRANSPOSE() TRANSPOSE_4x4(u0, u1, u2, u3) 106 #define VLOAD(x, y) vload4(x, y) 107 #define VSTORE(x, y, z) vstore4(x, y, z) 109 #elif DATA_TYPE_IN_BYTES == 2 110 #define DATA_TYPE ushort 111 #define TRANSPOSE() TRANSPOSE_8x8(u0, u1, u2, u3, u4, u5, u6, u7) 112 #define VLOAD(x, y) vload8(x, y) 113 #define VSTORE(x, y, z) vstore8(x, y, z) 115 #elif DATA_TYPE_IN_BYTES == 1 116 #define DATA_TYPE uchar 117 #define TRANSPOSE() TRANSPOSE_16x16(u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15) 118 #define VLOAD(x, y) vload16(x, y) 119 #define VSTORE(x, y, z) vstore16(x, y, z) 120 #define BLOCK_SIZE 16 122 #error DATA_TYPE_IN_BYTES not supported for transpose 148 uint x = get_global_id(0) * BLOCK_SIZE;
149 uint y = get_global_id(1) * BLOCK_SIZE;
196 uint dst_offset_in_bytes = y * DATA_TYPE_IN_BYTES + x * dst_stride_y + dst_offset_first_element_in_bytes;
197 VSTORE(u0, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 0 * dst_stride_y));
198 VSTORE(u1, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 1 * dst_stride_y));
199 VSTORE(u2, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 2 * dst_stride_y));
200 VSTORE(u3, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 3 * dst_stride_y));
202 VSTORE(u4, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 4 * dst_stride_y));
203 VSTORE(u5, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 5 * dst_stride_y));
204 VSTORE(u6, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 6 * dst_stride_y));
205 VSTORE(u7, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 7 * dst_stride_y));
207 VSTORE(u8, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 8 * dst_stride_y));
208 VSTORE(u9, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 9 * dst_stride_y));
209 VSTORE(u10, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 10 * dst_stride_y));
210 VSTORE(u11, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 11 * dst_stride_y));
211 VSTORE(u12, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 12 * dst_stride_y));
212 VSTORE(u13, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 13 * dst_stride_y));
213 VSTORE(u14, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 14 * dst_stride_y));
214 VSTORE(u15, 0, (__global
DATA_TYPE *)(dst_ptr + dst_offset_in_bytes + 15 * dst_stride_y));
#define IMAGE_DECLARATION(name)
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
#define CONVERT_TO_IMAGE_STRUCT(name)
#define VEC_DATA_TYPE(type, size)
__kernel void transpose(__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes)
This OpenCL kernel computes the matrix transposition of input matrix.
Structure to hold Image information.