//
//M*/
#include "precomp.hpp"
+#include "opencv2/core/hal/intrin.hpp"
+
namespace cv
{
if( npoints == 0 )
return Rect();
- const Point* pts = points.ptr<Point>();
- Point pt = pts[0];
+#if CV_SIMD
+ const int64_t* pts = points.ptr<int64_t>();
-#if CV_SSE4_2
- if(cv::checkHardwareSupport(CV_CPU_SSE4_2))
+ if( !is_float )
{
- if( !is_float )
+ v_int32 minval, maxval;
+ minval = maxval = v_reinterpret_as_s32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
+ for( i = 1; i <= npoints - v_int32::nlanes/2; i+= v_int32::nlanes/2 )
{
- __m128i minval, maxval;
- minval = maxval = _mm_loadl_epi64((const __m128i*)(&pt)); //min[0]=pt.x, min[1]=pt.y
-
- for( i = 1; i < npoints; i++ )
- {
- __m128i ptXY = _mm_loadl_epi64((const __m128i*)&pts[i]);
- minval = _mm_min_epi32(ptXY, minval);
- maxval = _mm_max_epi32(ptXY, maxval);
- }
- xmin = _mm_cvtsi128_si32(minval);
- ymin = _mm_cvtsi128_si32(_mm_srli_si128(minval, 4));
- xmax = _mm_cvtsi128_si32(maxval);
- ymax = _mm_cvtsi128_si32(_mm_srli_si128(maxval, 4));
+ v_int32 ptXY2 = v_reinterpret_as_s32(vx_load(pts + i));
+ minval = v_min(ptXY2, minval);
+ maxval = v_max(ptXY2, maxval);
}
- else
+ minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))));
+ maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))));
+ if( i <= npoints - v_int32::nlanes/4 )
{
- __m128 minvalf, maxvalf, z = _mm_setzero_ps(), ptXY = _mm_setzero_ps();
- minvalf = maxvalf = _mm_loadl_pi(z, (const __m64*)(&pt));
-
- for( i = 1; i < npoints; i++ )
+ v_int32 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i))));
+ minval = v_min(ptXY, minval);
+ maxval = v_max(ptXY, maxval);
+ i += v_int64::nlanes/2;
+ }
+ for(int j = 16; j < CV_SIMD_WIDTH; j*=2)
+ {
+ minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))));
+ maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))));
+ }
+ xmin = minval.get0();
+ xmax = maxval.get0();
+ ymin = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))).get0();
+ ymax = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))).get0();
+#if CV_SIMD_WIDTH > 16
+ if( i < npoints )
+ {
+ v_int32x4 minval2, maxval2;
+ minval2 = maxval2 = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
+ for( i++; i < npoints; i++ )
{
- ptXY = _mm_loadl_pi(ptXY, (const __m64*)&pts[i]);
-
- minvalf = _mm_min_ps(minvalf, ptXY);
- maxvalf = _mm_max_ps(maxvalf, ptXY);
+ v_int32x4 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
+ minval2 = v_min(ptXY, minval2);
+ maxval2 = v_max(ptXY, maxval2);
}
-
- float xyminf[2], xymaxf[2];
- _mm_storel_pi((__m64*)xyminf, minvalf);
- _mm_storel_pi((__m64*)xymaxf, maxvalf);
- xmin = cvFloor(xyminf[0]);
- ymin = cvFloor(xyminf[1]);
- xmax = cvFloor(xymaxf[0]);
- ymax = cvFloor(xymaxf[1]);
+ xmin = min(xmin, minval2.get0());
+ xmax = max(xmax, maxval2.get0());
+ ymin = min(ymin, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval2))).get0());
+ ymax = max(ymax, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0());
}
+#endif
}
else
-#endif
{
- if( !is_float )
+ v_float32 minval, maxval;
+ minval = maxval = v_reinterpret_as_f32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
+ for( i = 1; i <= npoints - v_float32::nlanes/2; i+= v_float32::nlanes/2 )
{
- xmin = xmax = pt.x;
- ymin = ymax = pt.y;
-
- for( i = 1; i < npoints; i++ )
+ v_float32 ptXY2 = v_reinterpret_as_f32(vx_load(pts + i));
+ minval = v_min(ptXY2, minval);
+ maxval = v_max(ptXY2, maxval);
+ }
+ minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))));
+ maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))));
+ if( i <= npoints - v_float32::nlanes/4 )
+ {
+ v_float32 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i))));
+ minval = v_min(ptXY, minval);
+ maxval = v_max(ptXY, maxval);
+ i += v_float32::nlanes/4;
+ }
+ for(int j = 16; j < CV_SIMD_WIDTH; j*=2)
+ {
+ minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))));
+ maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))));
+ }
+ xmin = cvFloor(minval.get0());
+ xmax = cvFloor(maxval.get0());
+ ymin = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))).get0());
+ ymax = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))).get0());
+#if CV_SIMD_WIDTH > 16
+ if( i < npoints )
+ {
+ v_float32x4 minval2, maxval2;
+ minval2 = maxval2 = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
+ for( i++; i < npoints; i++ )
{
- pt = pts[i];
+ v_float32x4 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
+ minval2 = v_min(ptXY, minval2);
+ maxval2 = v_max(ptXY, maxval2);
+ }
+ xmin = min(xmin, cvFloor(minval2.get0()));
+ xmax = max(xmax, cvFloor(maxval2.get0()));
+ ymin = min(ymin, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval2))).get0()));
+ ymax = max(ymax, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0()));
+ }
+#endif
+ }
+#else
+ const Point* pts = points.ptr<Point>();
+ Point pt = pts[0];
- if( xmin > pt.x )
- xmin = pt.x;
+ if( !is_float )
+ {
+ xmin = xmax = pt.x;
+ ymin = ymax = pt.y;
- if( xmax < pt.x )
- xmax = pt.x;
+ for( i = 1; i < npoints; i++ )
+ {
+ pt = pts[i];
- if( ymin > pt.y )
- ymin = pt.y;
+ if( xmin > pt.x )
+ xmin = pt.x;
- if( ymax < pt.y )
- ymax = pt.y;
- }
- }
- else
- {
- Cv32suf v;
- // init values
- xmin = xmax = CV_TOGGLE_FLT(pt.x);
- ymin = ymax = CV_TOGGLE_FLT(pt.y);
+ if( xmax < pt.x )
+ xmax = pt.x;
- for( i = 1; i < npoints; i++ )
- {
- pt = pts[i];
- pt.x = CV_TOGGLE_FLT(pt.x);
- pt.y = CV_TOGGLE_FLT(pt.y);
+ if( ymin > pt.y )
+ ymin = pt.y;
- if( xmin > pt.x )
- xmin = pt.x;
+ if( ymax < pt.y )
+ ymax = pt.y;
+ }
+ }
+ else
+ {
+ Cv32suf v;
+ // init values
+ xmin = xmax = CV_TOGGLE_FLT(pt.x);
+ ymin = ymax = CV_TOGGLE_FLT(pt.y);
- if( xmax < pt.x )
- xmax = pt.x;
+ for( i = 1; i < npoints; i++ )
+ {
+ pt = pts[i];
+ pt.x = CV_TOGGLE_FLT(pt.x);
+ pt.y = CV_TOGGLE_FLT(pt.y);
- if( ymin > pt.y )
- ymin = pt.y;
+ if( xmin > pt.x )
+ xmin = pt.x;
- if( ymax < pt.y )
- ymax = pt.y;
- }
+ if( xmax < pt.x )
+ xmax = pt.x;
+
+ if( ymin > pt.y )
+ ymin = pt.y;
- v.i = CV_TOGGLE_FLT(xmin); xmin = cvFloor(v.f);
- v.i = CV_TOGGLE_FLT(ymin); ymin = cvFloor(v.f);
- // because right and bottom sides of the bounding rectangle are not inclusive
- // (note +1 in width and height calculation below), cvFloor is used here instead of cvCeil
- v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f);
- v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f);
+ if( ymax < pt.y )
+ ymax = pt.y;
}
+
+ v.i = CV_TOGGLE_FLT(xmin); xmin = cvFloor(v.f);
+ v.i = CV_TOGGLE_FLT(ymin); ymin = cvFloor(v.f);
+ // because right and bottom sides of the bounding rectangle are not inclusive
+ // (note +1 in width and height calculation below), cvFloor is used here instead of cvCeil
+ v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f);
+ v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f);
}
+#endif
return Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1);
}