From d26f62569ddfd5d29a8602ce50b2252f4d7b27fd Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 9 Oct 2013 16:57:52 +0400 Subject: [PATCH] ocl: fix compilation warnings, update openCLExecuteKernelInterop --- cmake/cl2cpp.cmake | 3 + modules/ocl/include/opencv2/ocl/ocl.hpp | 27 ++++++++ modules/ocl/include/opencv2/ocl/private/util.hpp | 18 ------ modules/ocl/src/cl_operations.cpp | 79 +++++++---------------- modules/ocl/src/cl_programcache.cpp | 10 +-- modules/ocl/test/test_api.cpp | 80 ++++++++++++++++++++++++ 6 files changed, 133 insertions(+), 84 deletions(-) create mode 100644 modules/ocl/test/test_api.cpp diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake index 825172b..1916c3e 100644 --- a/cmake/cl2cpp.cmake +++ b/cmake/cl2cpp.cmake @@ -6,6 +6,7 @@ get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME) set(STR_CPP "// This file is auto-generated. Do not edit! +#include \"precomp.hpp\" #include \"${OUTPUT_HPP_NAME}\" namespace cv @@ -16,6 +17,8 @@ namespace ocl set(STR_HPP "// This file is auto-generated. Do not edit! +#include \"opencv2/ocl/private/util.hpp\" + namespace cv { namespace ocl diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 58719f1..b90f454 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -221,6 +221,33 @@ namespace cv //! set where binary cache to be saved to CV_EXPORTS void setBinaryPath(const char *path); + struct ProgramSource + { + const char* name; + const char* programStr; + const char* programHash; + + // Cache in memory by name (should be unique). Caching on disk disabled. + inline ProgramSource(const char* _name, const char* _programStr) + : name(_name), programStr(_programStr), programHash(NULL) + { + } + + // Cache in memory by name (should be unique). Caching on disk uses programHash mark. + inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash) + : name(_name), programStr(_programStr), programHash(_programHash) + { + } + }; + + //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. + //! Deprecated, will be replaced + CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt, + const cv::ocl::ProgramSource& source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options); + class CV_EXPORTS oclMatExpr; //////////////////////////////// oclMat //////////////////////////////// class CV_EXPORTS oclMat diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 30288a6..bb1a5ee 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -189,24 +189,6 @@ inline size_t roundUp(size_t sz, size_t n) return result; } -//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. -CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt, - const cv::ocl::ProgramEntry* source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, - int channels, int depth, const char *build_options, - bool finish = true, bool measureKernelTime = false, - bool cleanUp = true); - -//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. -CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt, - const cv::ocl::ProgramEntry* source, const int numFiles, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, - int channels, int depth, const char *build_options, - bool finish = true, bool measureKernelTime = false, - bool cleanUp = true); - }//namespace ocl }//namespace cv diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp index 25d7454..ed13be5 100644 --- a/modules/ocl/src/cl_operations.cpp +++ b/modules/ocl/src/cl_operations.cpp @@ -302,28 +302,27 @@ void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, stri total_kernel_time = 0; cout << "-------------------------------------" << endl; - cout << setiosflags(ios::left) << setw(15) << "excute time"; - cout << setiosflags(ios::left) << setw(15) << "lauch time"; + cout << setiosflags(ios::left) << setw(15) << "execute time"; + cout << setiosflags(ios::left) << setw(15) << "launch time"; cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl; int i = 0; for(i = 0; i < RUN_TIMES; i++) openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, build_options); - cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; + cout << "average kernel execute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; #endif } -double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, +void openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramSource& source, string kernelName, size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options, - bool finish, bool measureKernelTime, bool cleanUp) + vector< pair > &args, int channels, int depth, const char *build_options) { //construct kernel name //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number - //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) + //for example split_C2_D2, represent the split kernel with channels = 2 and dataType Depth = 2 (Data type is char) stringstream idxStr; if(channels != -1) idxStr << "_C" << channels; @@ -331,63 +330,27 @@ double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* sou idxStr << "_D" << depth; kernelName += idxStr.str(); - cl_kernel kernel; - kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); - - double kernelTime = 0.0; + std::string name = std::string("custom_") + source.name; + ProgramEntry program = { name.c_str(), source.programStr, source.programHash }; + cl_kernel kernel = openCLGetKernelFromSource(ctx, &program, kernelName, build_options); - if( globalThreads != NULL) + CV_Assert(globalThreads != NULL); + if ( localThreads != NULL) { - if ( localThreads != NULL) - { - globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; - globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; - globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; - - //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2]; - cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); - } - for(size_t i = 0; i < args.size(); i ++) - openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - - if(measureKernelTime == false) - { - openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, NULL)); - } - else - { - cl_event event = NULL; - openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, &event)); - - cl_ulong end_time, queue_time; - - openCLSafeCall(clWaitForEvents(1, &event)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &end_time, 0)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, - sizeof(cl_ulong), &queue_time, 0)); - - kernelTime = (double)(end_time - queue_time) / (1000 * 1000); - - clReleaseEvent(event); - } - } + globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); + globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); + globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); - if(finish) - { - clFinish(getClCommandQueue(ctx)); + cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); } + for(size_t i = 0; i < args.size(); i ++) + openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - if(cleanUp) - { - openCLSafeCall(clReleaseKernel(kernel)); - } + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, NULL)); - return kernelTime; + clFinish(getClCommandQueue(ctx)); + openCLSafeCall(clReleaseKernel(kernel)); } cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp index a34f828..741df87 100644 --- a/modules/ocl/src/cl_programcache.cpp +++ b/modules/ocl/src/cl_programcache.cpp @@ -67,7 +67,6 @@ namespace cv { namespace ocl { -#define MAX_PROG_CACHE_SIZE 1024 /* * The binary caching system to eliminate redundant program source compilation. * Strictly, this is not a cache because we do not implement evictions right now. @@ -291,7 +290,7 @@ struct ProgramFileCache bool writeConfigurationToFile(const string& options, std::vector& buf) { if (hash_ == NULL) - return true; // don't save dynamic kernels + return true; // don't save programs without hash if (!f.is_open()) { @@ -469,7 +468,7 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn { stringstream src_sign; - src_sign << (int64)(source->programStr); + src_sign << source->name; src_sign << getClContext(ctx); if (NULL != build_options) { @@ -514,15 +513,10 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options); //Cache the binary for future use if build_options is null - if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) { cv::AutoLock lockCache(mutexCache); this->addProgram(src_sign.str(), program); } - else - { - cout << "Warning: code cache has been full.\n"; - } return program; } diff --git a/modules/ocl/test/test_api.cpp b/modules/ocl/test/test_api.cpp new file mode 100644 index 0000000..0b59fc6 --- /dev/null +++ b/modules/ocl/test/test_api.cpp @@ -0,0 +1,80 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" // for OpenCL types: cl_mem + +TEST(TestAPI, openCLExecuteKernelInterop) +{ + cv::RNG rng; + Size sz(10000, 1); + cv::Mat cpuMat = cvtest::randomMat(rng, sz, CV_32FC4, -10, 10, false); + + cv::ocl::oclMat gpuMat(cpuMat); + cv::ocl::oclMat gpuMatDst(sz, CV_32FC4); + + const char* kernelStr = +"__kernel void test_kernel(__global float4* src, __global float4* dst) {\n" +" int x = get_global_id(0);\n" +" dst[x] = src[x];\n" +"}\n"; + + cv::ocl::ProgramSource program("test_interop", kernelStr); + + using namespace std; + vector > args; + args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMat.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMatDst.data )); + + size_t globalThreads[3] = { sz.width, 1, 1 }; + cv::ocl::openCLExecuteKernelInterop( + gpuMat.clCxt, + program, + "test_kernel", + globalThreads, NULL, args, + -1, -1, + ""); + + cv::Mat dst; + gpuMatDst.download(dst); + + EXPECT_LE(checkNorm(cpuMat, dst), 1e-3); +} -- 2.7.4