-gpu::BruteForceMatcher_GPU_base::radiusMatchConvert
+gpu::BFMatcher_GPU::radiusMatchConvert
---------------------------------------------------
-Converts matrices obtained via :ocv:func:`gpu::BruteForceMatcher_GPU_base::radiusMatchSingle` or :ocv:func:`gpu::BruteForceMatcher_GPU_base::radiusMatchCollection` to vector with :ocv:class:`DMatch`.
+Converts matrices obtained via :ocv:func:`gpu::BFMatcher_GPU::radiusMatchSingle` or :ocv:func:`gpu::BFMatcher_GPU::radiusMatchCollection` to vector with :ocv:class:`DMatch`.
-.. ocv:function:: void gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
+.. ocv:function:: void gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
-.. ocv:function:: void gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
+.. ocv:function:: void gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
-
struct HaarCascade;
struct LbpCascade;
friend class CascadeClassifier_GPU_LBP;
-
- public:
- int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
};
+// ======================== GPU version for soft cascade ===================== //
+
+class CV_EXPORTS ChannelsProcessor
+{
+public:
+ enum
+ {
+ GENERIC = 1 << 4,
+ SEPARABLE = 2 << 4
+ };
+
+ // Appends specified number of HOG first-order features integrals into given vector.
+ // Param frame is an input 3-channel bgr image.
+ // Param channels is a GPU matrix of optionally shrinked channels
+ // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution.
+ virtual void apply(InputArray frame, OutputArray channels, Stream& stream = Stream::Null()) = 0;
+
+ // Creates a specific preprocessor implementation.
+ // Param shrinkage is a resizing factor. Resize is applied before the computing integral sum
+ // Param bins is a number of HOG-like channels.
+ // Param flags is a channel computing extra flags.
+ static cv::Ptr<ChannelsProcessor> create(const int shrinkage, const int bins, const int flags = GENERIC);
+
+ virtual ~ChannelsProcessor();
+
+protected:
+ ChannelsProcessor();
+};
+
+// Implementation of soft (stage-less) cascaded detector.
+class CV_EXPORTS SCascade : public cv::Algorithm
+{
+public:
+
+ // Representation of detectors result.
+ struct CV_EXPORTS Detection
+ {
+ ushort x;
+ ushort y;
+ ushort w;
+ ushort h;
+ float confidence;
+ int kind;
+
+ enum {PEDESTRIAN = 0};
+ };
+
+ enum { NO_REJECT = 1, DOLLAR = 2, /*PASCAL = 4,*/ DEFAULT = NO_REJECT, NMS_MASK = 0xF};
+
+ // An empty cascade will be created.
+ // Param minScale is a minimum scale relative to the original size of the image on which cascade will be applied.
+ // Param minScale is a maximum scale relative to the original size of the image on which cascade will be applied.
+ // Param scales is a number of scales from minScale to maxScale.
+ // Param flags is an extra tuning flags.
+ SCascade(const double minScale = 0.4, const double maxScale = 5., const int scales = 55,
+ const int flags = NO_REJECT || ChannelsProcessor::GENERIC);
+
+ virtual ~SCascade();
+
+ cv::AlgorithmInfo* info() const;
+
+ // Load cascade from FileNode.
+ // Param fn is a root node for cascade. Should be <cascade>.
+ virtual bool load(const FileNode& fn);
+
+ // Load cascade config.
+ virtual void read(const FileNode& fn);
+
+ // Return the matrix of of detected objects.
+ // Param image is a frame on which detector will be applied.
+ // Param rois is a regions of interests mask generated by genRoi.
+ // Only the objects that fall into one of the regions will be returned.
+ // Param objects is an output array of Detections represented as GpuMat of detections (SCascade::Detection)
+ // The first element of the matrix is actually a count of detections.
+ // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution
+ virtual void detect(InputArray image, InputArray rois, OutputArray objects, Stream& stream = Stream::Null()) const;
+
+private:
+
+ struct Fields;
+ Fields* fields;
+
+ double minScale;
+ double maxScale;
+ int scales;
+
+ int flags;
+};
+
+CV_EXPORTS bool initModule_gpu(void);
+
////////////////////////////////// SURF //////////////////////////////////////////
class CV_EXPORTS SURF_GPU
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
#endif
}
-
+
- cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+ double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
++ vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+ bool finish, bool measureKernelTime, bool cleanUp)
+
+ {
+ //construct kernel name
+ //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
+ //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
+ stringstream idxStr;
+ if(channels != -1)
+ idxStr << "_C" << channels;
+ if(depth != -1)
+ idxStr << "_D" << depth;
+ kernelName += idxStr.str();
+
+ cl_kernel kernel;
+ kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
+
+ double kernelTime = 0.0;
+
+ if( globalThreads != NULL)
+ {
+ if ( localThreads != NULL)
+ {
+ globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
+ globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
+ globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
+
+ //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
+ cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
+ }
+ for(size_t i = 0; i < args.size(); i ++)
+ openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
+
+ if(measureKernelTime == false)
+ {
+ openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+ localThreads, 0, NULL, NULL));
+ }
+ else
+ {
+ cl_event event = NULL;
+ openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+ localThreads, 0, NULL, &event));
+
+ cl_ulong end_time, queue_time;
+
+ openCLSafeCall(clWaitForEvents(1, &event));
+
+ openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
+ sizeof(cl_ulong), &end_time, 0));
+
+ openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
+ sizeof(cl_ulong), &queue_time, 0));
+
+ kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
+
+ clReleaseEvent(event);
+ }
+ }
+
+ if(finish)
+ {
+ clFinish(clCxt->impl->clCmdQueue);
+ }
+
+ if(cleanUp)
+ {
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+
+ return kernelTime;
+ }
+
+ // Converts the contents of a file into a string
+ static int convertToString(const char *filename, std::string& s)
+ {
+ size_t size;
+ char* str;
+
+ std::fstream f(filename, (std::fstream::in | std::fstream::binary));
+ if(f.is_open())
+ {
+ size_t fileSize;
+ f.seekg(0, std::fstream::end);
+ size = fileSize = (size_t)f.tellg();
+ f.seekg(0, std::fstream::beg);
+
+ str = new char[size+1];
+ if(!str)
+ {
+ f.close();
+ return -1;
+ }
+
+ f.read(str, fileSize);
+ f.close();
+ str[size] = '\0';
-
++
+ s = str;
+ delete[] str;
+ return 0;
+ }
+ printf("Error: Failed to open file %s\n", filename);
+ return -1;
+ }
+
+ double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
++ vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+ bool finish, bool measureKernelTime, bool cleanUp)
+
+ {
+ std::vector<std::string> fsource;
+ for (int i = 0 ; i < numFiles ; i++)
+ {
+ std::string str;
+ if (convertToString(fileName[i], str) >= 0)
+ fsource.push_back(str);
+ }
+ const char **source = new const char *[numFiles];
+ for (int i = 0 ; i < numFiles ; i++)
+ source[i] = fsource[i].c_str();
+ double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads,
+ args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
+ fsource.clear();
+ delete []source;
+ return kernelTime;
+ }
-
++
+ cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
const size_t size)
{
int status;
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
build_options, finish_mode);
}
-
++
+ cl_mem bindTexture(const oclMat &mat)
+ {
+ cl_mem texture;
+ cl_image_format format;
+ int err;
+ int depth = mat.depth();
+ int channels = mat.channels();
+
+ switch(depth)
+ {
+ case CV_8U:
+ format.image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+ case CV_32S:
+ format.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+ case CV_32F:
+ format.image_channel_data_type = CL_FLOAT;
+ break;
+ default:
+ throw std::exception();
+ break;
+ }
+ switch(channels)
+ {
+ case 1:
+ format.image_channel_order = CL_R;
+ break;
+ case 3:
+ format.image_channel_order = CL_RGB;
+ break;
+ case 4:
+ format.image_channel_order = CL_RGBA;
+ break;
+ default:
+ throw std::exception();
+ break;
+ }
+ #if CL_VERSION_1_2
+ cl_image_desc desc;
+ desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ desc.image_width = mat.cols;
+ desc.image_height = mat.rows;
+ desc.image_depth = 0;
+ desc.image_array_size = 1;
+ desc.image_row_pitch = 0;
+ desc.image_slice_pitch = 0;
+ desc.buffer = NULL;
+ desc.num_mip_levels = 0;
+ desc.num_samples = 0;
+ texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+ #else
+ texture = clCreateImage2D(
+ mat.clCxt->impl->clContext,
+ CL_MEM_READ_WRITE,
+ &format,
+ mat.cols,
+ mat.rows,
+ 0,
+ NULL,
+ &err);
+ #endif
+ size_t origin[] = { 0, 0, 0 };
+ size_t region[] = { mat.cols, mat.rows, 1 };
+
+ cl_mem devData;
+ if (mat.cols * mat.elemSize() != mat.step)
+ {
+ devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
+ * mat.elemSize(), NULL, NULL);
+ const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
- clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
++ clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
+ regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
+ }
+ else
+ {
+ devData = (cl_mem)mat.data;
+ }
+
+ clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
+ if ((mat.cols * mat.elemSize() != mat.step))
+ {
+ clFinish(mat.clCxt->impl->clCmdQueue);
+ clReleaseMemObject(devData);
+ }
+
+ openCLSafeCall(err);
+ return texture;
+ }
+
+ void releaseTexture(cl_mem& texture)
+ {
+ openCLFree(texture);
+ }
}//namespace ocl
}//namespace cv