1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 __constant short4 vec_offset = (short4)(0, 1, 2, 3);
7 #define GET_VAL(x, y) ((x) < 0 || (x) >= src_cols || (y) < 0 || (y) >= src_rows) ? scalar : src[src_offset + y * src_step + x]
9 __kernel void warpAffine_nearest_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
10 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
11 __constant float * M, ST scalar_)
13 int x = get_global_id(0) * 4;
14 int y = get_global_id(1);
15 uchar scalar = convert_uchar_sat_rte(scalar_);
17 if (x >= dst_cols || y >= dst_rows) return;
24 new_x = convert_short4_sat_rte(M[0] * convert_float4(vec_offset + (short4)(x)) +
25 M[1] * convert_float4((short4)y) + M[2]);
27 new_y = convert_short4_sat_rte(M[3] * convert_float4(vec_offset + (short4)(x)) +
28 M[4] * convert_float4((short4)y) + M[5]);
30 uchar4 pix = (uchar4)scalar;
32 pix.s0 = GET_VAL(new_x.s0, new_y.s0);
33 pix.s1 = GET_VAL(new_x.s1, new_y.s1);
34 pix.s2 = GET_VAL(new_x.s2, new_y.s2);
35 pix.s3 = GET_VAL(new_x.s3, new_y.s3);
37 int dst_index = x + y * dst_step + dst_offset;
39 vstore4(pix, 0, dst + dst_index);
42 uchar4 read_pixels(__global const uchar * src, short tx, short ty,
43 int src_offset, int src_step, int src_cols, int
44 src_rows, uchar scalar)
52 if (tx >= 0 && (tx + 1) < src_cols && ty >= 0 && ty < src_rows)
54 pt = vload2(0, src + src_offset + ty * src_step + tx);
58 pt.s0 = GET_VAL(tx, ty);
59 pt.s1 = GET_VAL(bx, ty);
62 if (tx >= 0 && (tx + 1) < src_cols && by >= 0 && by < src_rows)
64 pb = vload2(0, src + src_offset + by * src_step + tx);
68 pb.s0 = GET_VAL(tx, by);
69 pb.s1 = GET_VAL(bx, by);
72 return (uchar4)(pt, pb);
75 __kernel void warpAffine_linear_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
76 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
77 __constant float * M, ST scalar_)
79 int x = get_global_id(0) * 4;
80 int y = get_global_id(1);
81 uchar scalar = convert_uchar_sat_rte(scalar_);
83 if (x >= dst_cols || y >= dst_rows) return;
90 nx = M[0] * convert_float4((vec_offset + (short4)x)) + M[1] * convert_float4((short4)y) + M[2];
91 ny = M[3] * convert_float4((vec_offset + (short4)x)) + M[4] * convert_float4((short4)y) + M[5];
94 s = round((nx - floor(nx)) * 32.0f) / 32.0f;
95 t = round((ny - floor(ny)) * 32.0f) / 32.0f;
98 tx = convert_short4_sat_rtn(nx);
99 ty = convert_short4_sat_rtn(ny);
102 pix[0] = read_pixels(src, tx.s0, ty.s0, src_offset, src_step, src_cols, src_rows, scalar);
103 pix[1] = read_pixels(src, tx.s1, ty.s1, src_offset, src_step, src_cols, src_rows, scalar);
104 pix[2] = read_pixels(src, tx.s2, ty.s2, src_offset, src_step, src_cols, src_rows, scalar);
105 pix[3] = read_pixels(src, tx.s3, ty.s3, src_offset, src_step, src_cols, src_rows, scalar);
107 float4 tl, tr, bl, br;
108 tl = convert_float4((uchar4)(pix[0].s0, pix[1].s0, pix[2].s0, pix[3].s0));
109 tr = convert_float4((uchar4)(pix[0].s1, pix[1].s1, pix[2].s1, pix[3].s1));
110 bl = convert_float4((uchar4)(pix[0].s2, pix[1].s2, pix[2].s2, pix[3].s2));
111 br = convert_float4((uchar4)(pix[0].s3, pix[1].s3, pix[2].s3, pix[3].s3));
114 pixel = tl * (1 - s) * (1 - t) + tr * s * (1 - t) + bl * (1 - s) * t + br * s * t;
116 int dst_index = x + y * dst_step + dst_offset;
117 vstore4(convert_uchar4_sat_rte(pixel), 0, dst + dst_index);
120 __kernel void warpPerspective_nearest_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
121 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
122 __constant float * M, ST scalar_)
124 int x = get_global_id(0) * 4;
125 int y = get_global_id(1);
126 uchar scalar = convert_uchar_sat_rte(scalar_);
128 if (x >= dst_cols || y >= dst_rows) return;
136 nx = M[0] * convert_float4(vec_offset + (short4)(x)) +
137 M[1] * convert_float4((short4)y) + M[2];
139 ny = M[3] * convert_float4(vec_offset + (short4)(x)) +
140 M[4] * convert_float4((short4)y) + M[5];
142 nz = M[6] * convert_float4(vec_offset + (short4)(x)) +
143 M[7] * convert_float4((short4)y) + M[8];
146 float4 fz = select((float4)(0.0f), (float4)(1.0f / nz), nz != 0.0f);
147 new_x = convert_short4_sat_rte(nx * fz);
148 new_y = convert_short4_sat_rte(ny * fz);
150 uchar4 pix = (uchar4)scalar;
152 pix.s0 = GET_VAL(new_x.s0, new_y.s0);
153 pix.s1 = GET_VAL(new_x.s1, new_y.s1);
154 pix.s2 = GET_VAL(new_x.s2, new_y.s2);
155 pix.s3 = GET_VAL(new_x.s3, new_y.s3);
157 int dst_index = x + y * dst_step + dst_offset;
159 vstore4(pix, 0, dst + dst_index);
162 __kernel void warpPerspective_linear_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
163 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
164 __constant float * M, ST scalar_)
166 int x = get_global_id(0) * 4;
167 int y = get_global_id(1);
168 uchar scalar = convert_uchar_sat_rte(scalar_);
170 if (x >= dst_cols || y >= dst_rows) return;
178 nx = M[0] * convert_float4(vec_offset + (short4)(x)) + M[1] * convert_float4((short4)y) + M[2];
180 ny = M[3] * convert_float4(vec_offset + (short4)(x)) + M[4] * convert_float4((short4)y) + M[5];
182 nz = M[6] * convert_float4(vec_offset + (short4)(x)) + M[7] * convert_float4((short4)y) + M[8];
184 float4 fz = select((float4)(0.0f), (float4)(1.0f / nz), nz != 0.0f);
190 s = round((nx - floor(nx)) * 32.0f) / (float4)32.0f;
191 t = round((ny - floor(ny)) * 32.0f) / (float4)32.0f;
194 tx = convert_short4_sat_rtn(nx);
195 ty = convert_short4_sat_rtn(ny);
198 pix[0] = read_pixels(src, tx.s0, ty.s0, src_offset, src_step, src_cols, src_rows, scalar);
199 pix[1] = read_pixels(src, tx.s1, ty.s1, src_offset, src_step, src_cols, src_rows, scalar);
200 pix[2] = read_pixels(src, tx.s2, ty.s2, src_offset, src_step, src_cols, src_rows, scalar);
201 pix[3] = read_pixels(src, tx.s3, ty.s3, src_offset, src_step, src_cols, src_rows, scalar);
203 float4 tl, tr, bl, br;
204 tl = convert_float4((uchar4)(pix[0].s0, pix[1].s0, pix[2].s0, pix[3].s0));
205 tr = convert_float4((uchar4)(pix[0].s1, pix[1].s1, pix[2].s1, pix[3].s1));
206 bl = convert_float4((uchar4)(pix[0].s2, pix[1].s2, pix[2].s2, pix[3].s2));
207 br = convert_float4((uchar4)(pix[0].s3, pix[1].s3, pix[2].s3, pix[3].s3));
210 pixel = tl * (1 - s) * (1 - t) + tr * s * (1 - t) + bl * (1 - s) * t + br * s * t;
212 int dst_index = x + y * dst_step + dst_offset;
213 vstore4(convert_uchar4_sat_rte(pixel), 0, dst + dst_index);