modules/core/src/count_non_zero.dispatch.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html
   4
   5
   6 #include "precomp.hpp"
   7 #include "opencl_kernels_core.hpp"
   8 #include "stat.hpp"
   9
  10 #include "count_non_zero.simd.hpp"
  11 #include "count_non_zero.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
  12
  13 namespace cv {
  14
  15 static CountNonZeroFunc getCountNonZeroTab(int depth)
  16 {
  17     CV_INSTRUMENT_REGION();
  18     CV_CPU_DISPATCH(getCountNonZeroTab, (depth),
  19         CV_CPU_DISPATCH_MODES_ALL);
  20 }
  21
  22 #ifdef HAVE_OPENCL
  23 static bool ocl_countNonZero( InputArray _src, int & res )
  24 {
  25     int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
  26     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
  27
  28     if (depth == CV_64F && !doubleSupport)
  29         return false;
  30
  31     int dbsize = ocl::Device::getDefault().maxComputeUnits();
  32     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
  33
  34     int wgs2_aligned = 1;
  35     while (wgs2_aligned < (int)wgs)
  36         wgs2_aligned <<= 1;
  37     wgs2_aligned >>= 1;
  38
  39     ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
  40                   format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
  41                          " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
  42                          ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
  43                          ocl::typeToStr(depth), (int)wgs, kercn,
  44                          wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
  45                          _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
  46     if (k.empty())
  47         return false;
  48
  49     UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
  50     k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
  51            dbsize, ocl::KernelArg::PtrWriteOnly(db));
  52
  53     size_t globalsize = dbsize * wgs;
  54     if (k.run(1, &globalsize, &wgs, true))
  55         return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
  56     return false;
  57 }
  58 #endif
  59
  60 #if defined HAVE_IPP
  61 static bool ipp_countNonZero( Mat &src, int &res )
  62 {
  63     CV_INSTRUMENT_REGION_IPP();
  64
  65     // see https://github.com/opencv/opencv/issues/17453
  66     if (src.dims <= 2 && src.step > 520000 && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
  67         return false;
  68
  69 #if IPP_VERSION_X100 < 201801
  70     // Poor performance of SSE42
  71     if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
  72         return false;
  73 #endif
  74
  75     Ipp32s  count = 0;
  76     int     depth = src.depth();
  77
  78     if(src.dims <= 2)
  79     {
  80         IppStatus status;
  81         IppiSize  size = {src.cols*src.channels(), src.rows};
  82
  83         if(depth == CV_8U)
  84             status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0);
  85         else if(depth == CV_32F)
  86             status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0);
  87         else
  88             return false;
  89
  90         if(status < 0)
  91             return false;
  92
  93         res = size.width*size.height - count;
  94     }
  95     else
  96     {
  97         IppStatus       status;
  98         const Mat      *arrays[] = {&src, NULL};
  99         Mat            planes[1];
 100         NAryMatIterator it(arrays, planes, 1);
 101         IppiSize        size  = {(int)it.size*src.channels(), 1};
 102         res = 0;
 103         for (size_t i = 0; i < it.nplanes; i++, ++it)
 104         {
 105             if(depth == CV_8U)
 106                 status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, it.planes->ptr<Ipp8u>(), (int)it.planes->step, size, &count, 0, 0);
 107             else if(depth == CV_32F)
 108                 status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, it.planes->ptr<Ipp32f>(), (int)it.planes->step, size, &count, 0, 0);
 109             else
 110                 return false;
 111
 112             if(status < 0 || (int)it.planes->total()*src.channels() < count)
 113                 return false;
 114
 115             res += (int)it.planes->total()*src.channels() - count;
 116         }
 117     }
 118
 119     return true;
 120 }
 121 #endif
 122
 123 int countNonZero(InputArray _src)
 124 {
 125     CV_INSTRUMENT_REGION();
 126
 127     int type = _src.type(), cn = CV_MAT_CN(type);
 128     CV_Assert( cn == 1 );
 129
 130 #if defined HAVE_OPENCL || defined HAVE_IPP
 131     int res = -1;
 132 #endif
 133
 134 #ifdef HAVE_OPENCL
 135     CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
 136                 ocl_countNonZero(_src, res),
 137                 res)
 138 #endif
 139
 140     Mat src = _src.getMat();
 141     CV_IPP_RUN_FAST(ipp_countNonZero(src, res), res);
 142
 143     CountNonZeroFunc func = getCountNonZeroTab(src.depth());
 144     CV_Assert( func != 0 );
 145
 146     const Mat* arrays[] = {&src, 0};
 147     uchar* ptrs[1] = {};
 148     NAryMatIterator it(arrays, ptrs);
 149     int total = (int)it.size, nz = 0;
 150
 151     for( size_t i = 0; i < it.nplanes; i++, ++it )
 152         nz += func( ptrs[0], total );
 153
 154     return nz;
 155 }
 156
 157 void findNonZero(InputArray _src, OutputArray _idx)
 158 {
 159     Mat src = _src.getMat();
 160     CV_Assert( src.channels() == 1 && src.dims == 2 );
 161
 162     int depth = src.depth();
 163     std::vector<Point> idxvec;
 164     int rows = src.rows, cols = src.cols;
 165     AutoBuffer<int> buf_(cols + 1);
 166     int* buf = buf_.data();
 167
 168     for( int i = 0; i < rows; i++ )
 169     {
 170         int j, k = 0;
 171         const uchar* ptr8 = src.ptr(i);
 172         if( depth == CV_8U || depth == CV_8S )
 173         {
 174             for( j = 0; j < cols; j++ )
 175                 if( ptr8[j] != 0 ) buf[k++] = j;
 176         }
 177         else if( depth == CV_16U || depth == CV_16S )
 178         {
 179             const ushort* ptr16 = (const ushort*)ptr8;
 180             for( j = 0; j < cols; j++ )
 181                 if( ptr16[j] != 0 ) buf[k++] = j;
 182         }
 183         else if( depth == CV_32S )
 184         {
 185             const int* ptr32s = (const int*)ptr8;
 186             for( j = 0; j < cols; j++ )
 187                 if( ptr32s[j] != 0 ) buf[k++] = j;
 188         }
 189         else if( depth == CV_32F )
 190         {
 191             const float* ptr32f = (const float*)ptr8;
 192             for( j = 0; j < cols; j++ )
 193                 if( ptr32f[j] != 0 ) buf[k++] = j;
 194         }
 195         else
 196         {
 197             const double* ptr64f = (const double*)ptr8;
 198             for( j = 0; j < cols; j++ )
 199                 if( ptr64f[j] != 0 ) buf[k++] = j;
 200         }
 201
 202         if( k > 0 )
 203         {
 204             size_t sz = idxvec.size();
 205             idxvec.resize(sz + k);
 206             for( j = 0; j < k; j++ )
 207                 idxvec[sz + j] = Point(buf[j], i);
 208         }
 209     }
 210
 211     if( idxvec.empty() || (_idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous()) )
 212         _idx.release();
 213
 214     if( !idxvec.empty() )
 215         Mat(idxvec).copyTo(_idx);
 216 }
 217
 218 } // namespace