Small optimization for buildMaps kernels in case rowsPerWI > 1

author Alexander Karsakov <alexander.karsakov@itseez.com>

Wed, 22 Oct 2014 14:43:29 +0000 (17:43 +0300)

committer Alexander Karsakov <alexander.karsakov@itseez.com>

Wed, 22 Oct 2014 14:43:29 +0000 (17:43 +0300)
author Alexander Karsakov <alexander.karsakov@itseez.com>
Wed, 22 Oct 2014 14:43:29 +0000 (17:43 +0300)
committer Alexander Karsakov <alexander.karsakov@itseez.com>
Wed, 22 Oct 2014 14:43:29 +0000 (17:43 +0300)
diff --git a/modules/stitching/src/opencl/warpers.cl b/modules/stitching/src/opencl/warpers.cl

index 9b5619f..7ec87ae 100644 (file)
--- a/modules/stitching/src/opencl/warpers.cl
+++ b/modules/stitching/src/opencl/warpers.cl
@@ -56,19 +56,19 @@ __kernel void buildWarpPlaneMaps(__global uchar * xmapptr, int xmap_step, int xm
          int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
          int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
  
+        float u = tl_u + du;
+        float x_ = u * scale - ct[0];
+        float ct1 = 1 - ct[2];
+
          for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
              ymap_index += ymap_step)
          {
              __global float * xmap = (__global float *)(xmapptr + xmap_index);
              __global float * ymap = (__global float *)(ymapptr + ymap_index);
  
-            float u = tl_u + du;
              float v = tl_v + dv;
+            float y_ = v * scale - ct[1];
  
-            float x_ = u / scale - ct[0];
-            float y_ = v / scale - ct[1];
-
-            float ct1 = 1 - ct[2];
              float x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * ct1));
              float y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * ct1));
              float z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * ct1));
@@ -94,22 +94,19 @@ __kernel void buildWarpCylindricalMaps(__global uchar * xmapptr, int xmap_step,
          int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
          int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
  
+        float u = (tl_u + du) * scale;
+        float x_, z_;
+        x_ = sincos(u, &z_);
+
          for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
              ymap_index += ymap_step)
          {
              __global float * xmap = (__global float *)(xmapptr + xmap_index);
              __global float * ymap = (__global float *)(ymapptr + ymap_index);
  
-            float u = tl_u + du;
-            float v = tl_v + dv;
-            float x, y;
-
-            u /= scale;
-            float x_, y_, z_;
-            x_ = sincos(u, &z_);
-            y_ = v / scale;
+            float y_ = (tl_v + dv) * scale;
  
-            float z;
+            float x, y, z;
              x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * z_));
              y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * z_));
              z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * z_));
@@ -137,25 +134,23 @@ __kernel void buildWarpSphericalMaps(__global uchar * xmapptr, int xmap_step, in
          int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
          int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
  
+        float u = (tl_u + du) * scale;
+        float cosu, sinu = sincos(u, &cosu);
+
          for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
              ymap_index += ymap_step)
          {
              __global float * xmap = (__global float *)(xmapptr + xmap_index);
              __global float * ymap = (__global float *)(ymapptr + ymap_index);
  
-            float u = tl_u + du;
-            float v = tl_v + dv;
-            float x, y;
-
-            v /= scale;
-            u /= scale;
+            float v = (tl_v + dv) * scale;
  
-            float cosv, sinv = sincos(v, &cosv), cosu, sinu = sincos(u, &cosu);
+            float cosv, sinv = sincos(v, &cosv);
              float x_ = sinv * sinu;
              float y_ = -cosv;
              float z_ = sinv * cosu;
  
-            float z;
+            float x, y, z;
              x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * z_));
              y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * z_));
              z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * z_));
diff --git a/modules/stitching/src/warpers.cpp b/modules/stitching/src/warpers.cpp

index f474010..2711a01 100644 (file)
--- a/modules/stitching/src/warpers.cpp
+++ b/modules/stitching/src/warpers.cpp
@@ -110,7 +110,7 @@ Rect PlaneWarper::buildMaps(Size src_size, InputArray K, InputArray R, InputArra
  
              k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
                     ocl::KernelArg::PtrReadOnly(uk_rinv), ocl::KernelArg::PtrReadOnly(ut),
-                   dst_tl.x, dst_tl.y, projector_.scale, rowsPerWI);
+                   dst_tl.x, dst_tl.y, 1/projector_.scale, rowsPerWI);
  
              size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };
              if (k.run(2, globalsize, NULL, true))
@@ -388,7 +388,7 @@ Rect SphericalWarper::buildMaps(Size src_size, InputArray K, InputArray R, Outpu
              UMat uxmap = xmap.getUMat(), uymap = ymap.getUMat(), uk_rinv = k_rinv.getUMat(ACCESS_READ);
  
              k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
-                   ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, projector_.scale, rowsPerWI);
+                   ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, 1/projector_.scale, rowsPerWI);
  
              size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };
              if (k.run(2, globalsize, NULL, true))
@@ -436,7 +436,7 @@ Rect CylindricalWarper::buildMaps(Size src_size, InputArray K, InputArray R, Out
              UMat uxmap = xmap.getUMat(), uymap = ymap.getUMat(), uk_rinv = k_rinv.getUMat(ACCESS_READ);
  
              k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
-                   ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, projector_.scale,
+                   ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, 1/projector_.scale,
                     rowsPerWI);
  
              size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };
author	Alexander Karsakov <alexander.karsakov@itseez.com>
	Wed, 22 Oct 2014 14:43:29 +0000 (17:43 +0300)
committer	Alexander Karsakov <alexander.karsakov@itseez.com>
	Wed, 22 Oct 2014 14:43:29 +0000 (17:43 +0300)
modules/stitching/src/opencl/warpers.cl		patch \| blob \| history
modules/stitching/src/warpers.cpp		patch \| blob \| history