modules/imgproc/src/opencl/warp_transform.cl

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 __constant short4 vec_offset = (short4)(0, 1, 2, 3);
   6
   7 #define GET_VAL(x, y) ((x) < 0 || (x) >= src_cols || (y) < 0 || (y) >= src_rows) ? scalar : src[src_offset + y * src_step + x]
   8
   9 __kernel void warpAffine_nearest_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
  10                                     __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
  11                                     __constant float * M, ST scalar_)
  12 {
  13     int x = get_global_id(0) * 4;
  14     int y = get_global_id(1);
  15     uchar scalar = convert_uchar_sat_rte(scalar_);
  16
  17     if (x >= dst_cols || y >= dst_rows) return;
  18
  19     /* { M0, M1, M2 }
  20      * { M3, M4, M5 }
  21      */
  22
  23     short4 new_x, new_y;
  24     new_x = convert_short4_sat_rte(M[0] * convert_float4(vec_offset + (short4)(x)) +
  25                                    M[1] * convert_float4((short4)y) + M[2]);
  26
  27     new_y = convert_short4_sat_rte(M[3] * convert_float4(vec_offset + (short4)(x)) +
  28                                    M[4] * convert_float4((short4)y) + M[5]);
  29
  30     uchar4 pix = (uchar4)scalar;
  31
  32     pix.s0 = GET_VAL(new_x.s0, new_y.s0);
  33     pix.s1 = GET_VAL(new_x.s1, new_y.s1);
  34     pix.s2 = GET_VAL(new_x.s2, new_y.s2);
  35     pix.s3 = GET_VAL(new_x.s3, new_y.s3);
  36
  37     int dst_index = x + y * dst_step + dst_offset;
  38
  39     vstore4(pix, 0,  dst + dst_index);
  40 }
  41
  42 uchar4 read_pixels(__global const uchar * src, short tx, short ty,
  43                    int src_offset, int src_step, int src_cols, int
  44                    src_rows, uchar scalar)
  45 {
  46     uchar2 pt, pb;
  47     short bx, by;
  48
  49     bx = tx + 1;
  50     by = ty + 1;
  51
  52     if (tx >= 0 && (tx + 1) < src_cols && ty >= 0 && ty < src_rows)
  53     {
  54         pt = vload2(0, src + src_offset + ty * src_step + tx);
  55     }
  56     else
  57     {
  58         pt.s0 = GET_VAL(tx, ty);
  59         pt.s1 = GET_VAL(bx, ty);
  60     }
  61
  62     if (tx >= 0 && (tx + 1) < src_cols && by >= 0 && by < src_rows)
  63     {
  64         pb = vload2(0, src + src_offset + by * src_step + tx);
  65     }
  66     else
  67     {
  68         pb.s0 = GET_VAL(tx, by);
  69         pb.s1 = GET_VAL(bx, by);
  70     }
  71
  72     return (uchar4)(pt, pb);
  73 }
  74
  75 __kernel void warpAffine_linear_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
  76                                    __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
  77                                    __constant float * M, ST scalar_)
  78 {
  79     int x = get_global_id(0) * 4;
  80     int y = get_global_id(1);
  81     uchar scalar = convert_uchar_sat_rte(scalar_);
  82
  83     if (x >= dst_cols || y >= dst_rows) return;
  84
  85     /* { M0, M1, M2 }
  86      * { M3, M4, M5 }
  87      */
  88
  89     float4 nx, ny;
  90     nx = M[0] * convert_float4((vec_offset + (short4)x)) + M[1] * convert_float4((short4)y) + M[2];
  91     ny = M[3] * convert_float4((vec_offset + (short4)x)) + M[4] * convert_float4((short4)y) + M[5];
  92
  93     float4 s, t;
  94     s = round((nx - floor(nx)) * 32.0f) / 32.0f;
  95     t = round((ny - floor(ny)) * 32.0f) / 32.0f;
  96
  97     short4 tx, ty;
  98     tx = convert_short4_sat_rtn(nx);
  99     ty = convert_short4_sat_rtn(ny);
 100
 101     uchar4 pix[4];
 102     pix[0] = read_pixels(src, tx.s0, ty.s0, src_offset, src_step, src_cols, src_rows, scalar);
 103     pix[1] = read_pixels(src, tx.s1, ty.s1, src_offset, src_step, src_cols, src_rows, scalar);
 104     pix[2] = read_pixels(src, tx.s2, ty.s2, src_offset, src_step, src_cols, src_rows, scalar);
 105     pix[3] = read_pixels(src, tx.s3, ty.s3, src_offset, src_step, src_cols, src_rows, scalar);
 106
 107     float4 tl, tr, bl, br;
 108     tl = convert_float4((uchar4)(pix[0].s0, pix[1].s0, pix[2].s0, pix[3].s0));
 109     tr = convert_float4((uchar4)(pix[0].s1, pix[1].s1, pix[2].s1, pix[3].s1));
 110     bl = convert_float4((uchar4)(pix[0].s2, pix[1].s2, pix[2].s2, pix[3].s2));
 111     br = convert_float4((uchar4)(pix[0].s3, pix[1].s3, pix[2].s3, pix[3].s3));
 112
 113     float4 pixel;
 114     pixel = tl * (1 - s) * (1 - t) + tr * s * (1 - t) + bl * (1 - s) * t + br * s * t;
 115
 116     int dst_index = x + y * dst_step + dst_offset;
 117     vstore4(convert_uchar4_sat_rte(pixel), 0, dst + dst_index);
 118 }
 119
 120 __kernel void warpPerspective_nearest_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
 121                                          __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
 122                                          __constant float * M, ST scalar_)
 123 {
 124     int x = get_global_id(0) * 4;
 125     int y = get_global_id(1);
 126     uchar scalar = convert_uchar_sat_rte(scalar_);
 127
 128     if (x >= dst_cols || y >= dst_rows) return;
 129
 130     /* { M0, M1, M2 }
 131      * { M3, M4, M5 }
 132      * { M6, M7, M8 }
 133      */
 134
 135     float4 nx, ny, nz;
 136     nx = M[0] * convert_float4(vec_offset + (short4)(x)) +
 137          M[1] * convert_float4((short4)y) + M[2];
 138
 139     ny = M[3] * convert_float4(vec_offset + (short4)(x)) +
 140          M[4] * convert_float4((short4)y) + M[5];
 141
 142     nz = M[6] * convert_float4(vec_offset + (short4)(x)) +
 143          M[7] * convert_float4((short4)y) + M[8];
 144
 145     short4 new_x, new_y;
 146     float4 fz = select((float4)(0.0f), (float4)(1.0f / nz), nz != 0.0f);
 147     new_x = convert_short4_sat_rte(nx * fz);
 148     new_y = convert_short4_sat_rte(ny * fz);
 149
 150     uchar4 pix = (uchar4)scalar;
 151
 152     pix.s0 = GET_VAL(new_x.s0, new_y.s0);
 153     pix.s1 = GET_VAL(new_x.s1, new_y.s1);
 154     pix.s2 = GET_VAL(new_x.s2, new_y.s2);
 155     pix.s3 = GET_VAL(new_x.s3, new_y.s3);
 156
 157     int dst_index = x + y * dst_step + dst_offset;
 158
 159     vstore4(pix, 0,  dst + dst_index);
 160 }
 161
 162 __kernel void warpPerspective_linear_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
 163                                         __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
 164                                         __constant float * M, ST scalar_)
 165 {
 166     int x = get_global_id(0) * 4;
 167     int y = get_global_id(1);
 168     uchar scalar = convert_uchar_sat_rte(scalar_);
 169
 170     if (x >= dst_cols || y >= dst_rows) return;
 171
 172     /* { M0, M1, M2 }
 173      * { M3, M4, M5 }
 174      * { M6, M7, M8 }
 175      */
 176
 177     float4 nx, ny, nz;
 178     nx = M[0] * convert_float4(vec_offset + (short4)(x)) + M[1] * convert_float4((short4)y) + M[2];
 179
 180     ny = M[3] * convert_float4(vec_offset + (short4)(x)) + M[4] * convert_float4((short4)y) + M[5];
 181
 182     nz = M[6] * convert_float4(vec_offset + (short4)(x)) + M[7] * convert_float4((short4)y) + M[8];
 183
 184     float4 fz = select((float4)(0.0f), (float4)(1.0f / nz), nz != 0.0f);
 185
 186     nx = nx * fz;
 187     ny = ny * fz;
 188
 189     float4 s, t;
 190     s = round((nx - floor(nx)) * 32.0f) / (float4)32.0f;
 191     t = round((ny - floor(ny)) * 32.0f) / (float4)32.0f;
 192
 193     short4 tx, ty;
 194     tx = convert_short4_sat_rtn(nx);
 195     ty = convert_short4_sat_rtn(ny);
 196
 197     uchar4 pix[4];
 198     pix[0] = read_pixels(src, tx.s0, ty.s0, src_offset, src_step, src_cols, src_rows, scalar);
 199     pix[1] = read_pixels(src, tx.s1, ty.s1, src_offset, src_step, src_cols, src_rows, scalar);
 200     pix[2] = read_pixels(src, tx.s2, ty.s2, src_offset, src_step, src_cols, src_rows, scalar);
 201     pix[3] = read_pixels(src, tx.s3, ty.s3, src_offset, src_step, src_cols, src_rows, scalar);
 202
 203     float4 tl, tr, bl, br;
 204     tl = convert_float4((uchar4)(pix[0].s0, pix[1].s0, pix[2].s0, pix[3].s0));
 205     tr = convert_float4((uchar4)(pix[0].s1, pix[1].s1, pix[2].s1, pix[3].s1));
 206     bl = convert_float4((uchar4)(pix[0].s2, pix[1].s2, pix[2].s2, pix[3].s2));
 207     br = convert_float4((uchar4)(pix[0].s3, pix[1].s3, pix[2].s3, pix[3].s3));
 208
 209     float4 pixel;
 210     pixel = tl * (1 - s) * (1 - t) + tr * s * (1 - t) + bl * (1 - s) * t + br * s * t;
 211
 212     int dst_index = x + y * dst_step + dst_offset;
 213     vstore4(convert_uchar4_sat_rte(pixel), 0,  dst + dst_index);
 214 }