int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
+ float u = tl_u + du;
+ float x_ = u * scale - ct[0];
+ float ct1 = 1 - ct[2];
+
for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
ymap_index += ymap_step)
{
__global float * xmap = (__global float *)(xmapptr + xmap_index);
__global float * ymap = (__global float *)(ymapptr + ymap_index);
- float u = tl_u + du;
float v = tl_v + dv;
+ float y_ = v * scale - ct[1];
- float x_ = u / scale - ct[0];
- float y_ = v / scale - ct[1];
-
- float ct1 = 1 - ct[2];
float x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * ct1));
float y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * ct1));
float z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * ct1));
int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
+ float u = (tl_u + du) * scale;
+ float x_, z_;
+ x_ = sincos(u, &z_);
+
for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
ymap_index += ymap_step)
{
__global float * xmap = (__global float *)(xmapptr + xmap_index);
__global float * ymap = (__global float *)(ymapptr + ymap_index);
- float u = tl_u + du;
- float v = tl_v + dv;
- float x, y;
-
- u /= scale;
- float x_, y_, z_;
- x_ = sincos(u, &z_);
- y_ = v / scale;
+ float y_ = (tl_v + dv) * scale;
- float z;
+ float x, y, z;
x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * z_));
y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * z_));
z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * z_));
int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
+ float u = (tl_u + du) * scale;
+ float cosu, sinu = sincos(u, &cosu);
+
for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
ymap_index += ymap_step)
{
__global float * xmap = (__global float *)(xmapptr + xmap_index);
__global float * ymap = (__global float *)(ymapptr + ymap_index);
- float u = tl_u + du;
- float v = tl_v + dv;
- float x, y;
-
- v /= scale;
- u /= scale;
+ float v = (tl_v + dv) * scale;
- float cosv, sinv = sincos(v, &cosv), cosu, sinu = sincos(u, &cosu);
+ float cosv, sinv = sincos(v, &cosv);
float x_ = sinv * sinu;
float y_ = -cosv;
float z_ = sinv * cosu;
- float z;
+ float x, y, z;
x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * z_));
y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * z_));
z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * z_));
k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
ocl::KernelArg::PtrReadOnly(uk_rinv), ocl::KernelArg::PtrReadOnly(ut),
- dst_tl.x, dst_tl.y, projector_.scale, rowsPerWI);
+ dst_tl.x, dst_tl.y, 1/projector_.scale, rowsPerWI);
size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };
if (k.run(2, globalsize, NULL, true))
UMat uxmap = xmap.getUMat(), uymap = ymap.getUMat(), uk_rinv = k_rinv.getUMat(ACCESS_READ);
k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
- ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, projector_.scale, rowsPerWI);
+ ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, 1/projector_.scale, rowsPerWI);
size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };
if (k.run(2, globalsize, NULL, true))
UMat uxmap = xmap.getUMat(), uymap = ymap.getUMat(), uk_rinv = k_rinv.getUMat(ACCESS_READ);
k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
- ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, projector_.scale,
+ ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, 1/projector_.scale,
rowsPerWI);
size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };