Merge branch '2.4'
authorAndrey Kamaev <andrey.kamaev@itseez.com>
Fri, 22 Feb 2013 13:33:30 +0000 (17:33 +0400)
committerAndrey Kamaev <andrey.kamaev@itseez.com>
Fri, 22 Feb 2013 13:33:30 +0000 (17:33 +0400)
1  2 
doc/tutorials/definitions/tocDefinitions.rst
modules/core/include/opencv2/core/gpumat.hpp
modules/core/src/gpumat.cpp
modules/gpu/doc/feature_detection_and_description.rst
modules/gpu/include/opencv2/gpu/gpu.hpp
modules/highgui/include/opencv2/highgui/highgui.hpp
modules/highgui/src/cap_ffmpeg_impl.hpp
modules/imgproc/perf/perf_cvt_color.cpp
modules/ocl/include/opencv2/ocl/ocl.hpp
modules/ocl/src/initialization.cpp
modules/ocl/src/mcwutil.cpp

Simple merge
@@@ -631,13 -631,12 +631,12 @@@ If ``compactResult`` is ``true`` , the 
  
  
  
 -gpu::BruteForceMatcher_GPU_base::radiusMatchConvert
 +gpu::BFMatcher_GPU::radiusMatchConvert
  ---------------------------------------------------
 -Converts matrices obtained via :ocv:func:`gpu::BruteForceMatcher_GPU_base::radiusMatchSingle` or :ocv:func:`gpu::BruteForceMatcher_GPU_base::radiusMatchCollection` to vector with :ocv:class:`DMatch`.
 +Converts matrices obtained via :ocv:func:`gpu::BFMatcher_GPU::radiusMatchSingle` or :ocv:func:`gpu::BFMatcher_GPU::radiusMatchCollection` to vector with :ocv:class:`DMatch`.
  
 -.. ocv:function:: void gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
 +.. ocv:function:: void gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
  
 -.. ocv:function:: void gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
 +.. ocv:function:: void gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
  
  If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
@@@ -1519,102 -1535,8 +1520,99 @@@ private
      struct HaarCascade;
      struct LbpCascade;
      friend class CascadeClassifier_GPU_LBP;
- public:
-     int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
  };
  
 +// ======================== GPU version for soft cascade ===================== //
 +
 +class CV_EXPORTS ChannelsProcessor
 +{
 +public:
 +    enum
 +    {
 +        GENERIC   = 1 << 4,
 +        SEPARABLE = 2 << 4
 +    };
 +
 +    // Appends specified number of HOG first-order features integrals into given vector.
 +    // Param frame is an input 3-channel bgr image.
 +    // Param channels is a GPU matrix of optionally shrinked channels
 +    // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution.
 +    virtual void apply(InputArray frame, OutputArray channels, Stream& stream = Stream::Null()) = 0;
 +
 +    // Creates a specific preprocessor implementation.
 +    // Param shrinkage is a resizing factor. Resize is applied before the computing integral sum
 +    // Param bins is a number of HOG-like channels.
 +    // Param flags is a channel computing extra flags.
 +    static cv::Ptr<ChannelsProcessor> create(const int shrinkage, const int bins, const int flags = GENERIC);
 +
 +    virtual ~ChannelsProcessor();
 +
 +protected:
 +    ChannelsProcessor();
 +};
 +
 +// Implementation of soft (stage-less) cascaded detector.
 +class CV_EXPORTS SCascade : public cv::Algorithm
 +{
 +public:
 +
 +    // Representation of detectors result.
 +    struct CV_EXPORTS Detection
 +    {
 +        ushort x;
 +        ushort y;
 +        ushort w;
 +        ushort h;
 +        float confidence;
 +        int kind;
 +
 +        enum {PEDESTRIAN = 0};
 +    };
 +
 +    enum { NO_REJECT = 1, DOLLAR = 2, /*PASCAL = 4,*/ DEFAULT = NO_REJECT, NMS_MASK = 0xF};
 +
 +    // An empty cascade will be created.
 +    // Param minScale is a minimum scale relative to the original size of the image on which cascade will be applied.
 +    // Param minScale is a maximum scale relative to the original size of the image on which cascade will be applied.
 +    // Param scales is a number of scales from minScale to maxScale.
 +    // Param flags is an extra tuning flags.
 +    SCascade(const double minScale = 0.4, const double maxScale = 5., const int scales = 55,
 +        const int flags = NO_REJECT || ChannelsProcessor::GENERIC);
 +
 +    virtual ~SCascade();
 +
 +    cv::AlgorithmInfo* info() const;
 +
 +    // Load cascade from FileNode.
 +    // Param fn is a root node for cascade. Should be <cascade>.
 +    virtual bool load(const FileNode& fn);
 +
 +    // Load cascade config.
 +    virtual void read(const FileNode& fn);
 +
 +    // Return the matrix of of detected objects.
 +    // Param image is a frame on which detector will be applied.
 +    // Param rois is a regions of interests mask generated by genRoi.
 +    //    Only the objects that fall into one of the regions will be returned.
 +    // Param objects is an output array of Detections represented as GpuMat of detections (SCascade::Detection)
 +    //    The first element of the matrix is  actually a count of detections.
 +    // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution
 +    virtual void detect(InputArray image, InputArray rois, OutputArray objects, Stream& stream = Stream::Null()) const;
 +
 +private:
 +
 +    struct Fields;
 +    Fields* fields;
 +
 +    double minScale;
 +    double maxScale;
 +    int scales;
 +
 +    int flags;
 +};
 +
 +CV_EXPORTS bool initModule_gpu(void);
 +
  ////////////////////////////////// SURF //////////////////////////////////////////
  
  class CV_EXPORTS SURF_GPU
@@@ -729,8 -730,139 +730,139 @@@ namespace c
              cout << "average kernel total time:  " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
  #endif
          }
 - 
 +
-         cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+        double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName,
+                                  size_t globalThreads[3], size_t localThreads[3],
 -                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, 
++                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+                                  bool finish, bool measureKernelTime, bool cleanUp)
+         {
+             //construct kernel name
+             //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
+             //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
+             stringstream idxStr;
+             if(channels != -1)
+                 idxStr << "_C" << channels;
+             if(depth != -1)
+                 idxStr << "_D" << depth;
+             kernelName += idxStr.str();
+             cl_kernel kernel;
+             kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
+             double kernelTime = 0.0;
+             if( globalThreads != NULL)
+             {
+                 if ( localThreads != NULL)
+                 {
+                     globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
+                     globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
+                     globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
+                     //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
+                     cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
+                 }
+                 for(size_t i = 0; i < args.size(); i ++)
+                     openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
+                 if(measureKernelTime == false)
+                 {
+                     openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+                                     localThreads, 0, NULL, NULL));
+                 }
+                 else
+                 {
+                     cl_event event = NULL;
+                     openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+                                     localThreads, 0, NULL, &event));
+                     cl_ulong end_time, queue_time;
+                     openCLSafeCall(clWaitForEvents(1, &event));
+                     openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
+                                     sizeof(cl_ulong), &end_time, 0));
+                     openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
+                                     sizeof(cl_ulong), &queue_time, 0));
+                     kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
+                     clReleaseEvent(event);
+                 }
+             }
+             if(finish)
+             {
+                 clFinish(clCxt->impl->clCmdQueue);
+             }
+             if(cleanUp)
+             {
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+             return kernelTime;
+         }
+         // Converts the contents of a file into a string
+         static int convertToString(const char *filename, std::string& s)
+         {
+             size_t size;
+             char*  str;
+             std::fstream f(filename, (std::fstream::in | std::fstream::binary));
+             if(f.is_open())
+             {
+                 size_t fileSize;
+                 f.seekg(0, std::fstream::end);
+                 size = fileSize = (size_t)f.tellg();
+                 f.seekg(0, std::fstream::beg);
+                 str = new char[size+1];
+                 if(!str)
+                 {
+                     f.close();
+                     return -1;
+                 }
+                 f.read(str, fileSize);
+                 f.close();
+                 str[size] = '\0';
 -            
++
+                 s = str;
+                 delete[] str;
+                 return 0;
+             }
+             printf("Error: Failed to open file %s\n", filename);
+             return -1;
+         }
+         double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName,
+                                  size_t globalThreads[3], size_t localThreads[3],
 -                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, 
++                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+                                  bool finish, bool measureKernelTime, bool cleanUp)
+         {
+             std::vector<std::string> fsource;
+             for (int i = 0 ; i < numFiles ; i++)
+             {
+                 std::string str;
+                 if (convertToString(fileName[i], str) >= 0)
+                     fsource.push_back(str);
+             }
+             const char **source = new const char *[numFiles];
+             for (int i = 0 ; i < numFiles ; i++)
+                 source[i] = fsource[i].c_str();
+             double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads,
+                                  args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
+             fsource.clear();
+             delete []source;
+             return kernelTime;
+         }
 - 
++
+        cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
                               const size_t size)
          {
              int status;
@@@ -123,6 -126,101 +126,101 @@@ namespace c
              openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
                                    build_options, finish_mode);
          }
 - 
++
+        cl_mem bindTexture(const oclMat &mat)
+         {
+             cl_mem texture;
+             cl_image_format format;
+             int err;
+             int depth    = mat.depth();
+             int channels = mat.channels();
+             switch(depth)
+             {
+             case CV_8U:
+                 format.image_channel_data_type = CL_UNSIGNED_INT8;
+                 break;
+             case CV_32S:
+                 format.image_channel_data_type = CL_UNSIGNED_INT32;
+                 break;
+             case CV_32F:
+                 format.image_channel_data_type = CL_FLOAT;
+                 break;
+             default:
+                 throw std::exception();
+                 break;
+             }
+             switch(channels)
+             {
+             case 1:
+                 format.image_channel_order     = CL_R;
+                 break;
+             case 3:
+                 format.image_channel_order     = CL_RGB;
+                 break;
+             case 4:
+                 format.image_channel_order     = CL_RGBA;
+                 break;
+             default:
+                 throw std::exception();
+                 break;
+             }
+ #if CL_VERSION_1_2
+             cl_image_desc desc;
+             desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
+             desc.image_width      = mat.cols;
+             desc.image_height     = mat.rows;
+             desc.image_depth      = 0;
+             desc.image_array_size = 1;
+             desc.image_row_pitch  = 0;
+             desc.image_slice_pitch = 0;
+             desc.buffer           = NULL;
+             desc.num_mip_levels   = 0;
+             desc.num_samples      = 0;
+             texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+ #else
+             texture = clCreateImage2D(
+                 mat.clCxt->impl->clContext,
+                 CL_MEM_READ_WRITE,
+                 &format,
+                 mat.cols,
+                 mat.rows,
+                 0,
+                 NULL,
+                 &err);
+ #endif
+             size_t origin[] = { 0, 0, 0 };
+             size_t region[] = { mat.cols, mat.rows, 1 };
+             cl_mem devData;
+             if (mat.cols * mat.elemSize() != mat.step)
+             {
+                 devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
+                     * mat.elemSize(), NULL, NULL);
+                 const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
 -                clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin, 
++                clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
+                     regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
+             }
+             else
+             {
+                 devData = (cl_mem)mat.data;
+             }
+             clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
+             if ((mat.cols * mat.elemSize() != mat.step))
+             {
+                 clFinish(mat.clCxt->impl->clCmdQueue);
+                 clReleaseMemObject(devData);
+             }
+             openCLSafeCall(err);
+             return texture;
+         }
+         void releaseTexture(cl_mem& texture)
+         {
+             openCLFree(texture);
+         }
      }//namespace ocl
  
  }//namespace cv