Add dft and gemm to ocl module, using AMD's clAmdFft and clAmdBlas libraries
authoryao <bitwangyaoyao@gmail.com>
Tue, 7 Aug 2012 08:33:46 +0000 (16:33 +0800)
committeryao <bitwangyaoyao@gmail.com>
Tue, 7 Aug 2012 08:33:46 +0000 (16:33 +0800)
CMakeLists.txt
cmake/OpenCVDetectOpenCL.cmake
cmake/templates/cvconfig.h.cmake
modules/ocl/CMakeLists.txt
modules/ocl/include/opencv2/ocl/ocl.hpp
modules/ocl/src/fft.cpp [new file with mode: 0644]
modules/ocl/src/gemm.cpp [new file with mode: 0644]
modules/ocl/test/test_fft.cpp [new file with mode: 0644]
modules/ocl/test/test_gemm.cpp [new file with mode: 0644]

index 62709b8..e048156 100644 (file)
@@ -140,6 +140,9 @@ OCV_OPTION(WITH_XIMEA          "Include XIMEA cameras support"               OFF
 OCV_OPTION(WITH_XINE           "Include Xine support (GPL)"                  OFF  IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_CLP            "Include Clp support (EPL)"                   OFF)
 OCV_OPTION(WITH_OPENCL         "Include OpenCL Runtime support"              OFF  IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_OPENCLAMDFFT   "Include AMD OpenCL FFT library support"      OFF  IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_OPENCLAMDBLAS  "Include AMD OpenCL BLAS library support"     OFF  IF (NOT ANDROID AND NOT IOS) )
+
 
 # OpenCV build components
 # ===================================================
@@ -396,6 +399,12 @@ if(WITH_OPENCL)
   if(OPENCL_FOUND)
     set(HAVE_OPENCL 1)
   endif()
+  if(WITH_OPENCLAMDFFT)
+    set(HAVE_CLAMDFFT 1)
+  endif()
+  if(WITH_OPENCLAMDBLAS)
+    set(HAVE_CLAMDBLAS 1)
+  endif()
 endif()
 
 # ----------------------------------------------------------------------------
index 903b55b..96473a5 100644 (file)
@@ -2,8 +2,19 @@ if(APPLE)
     set(OPENCL_FOUND YES)
     set(OPENCL_LIBRARIES "-framework OpenCL")
 else()
-    find_package(OpenCL QUIET)
-
+    #find_package(OpenCL QUIET)
+       if(WITH_OPENCLAMDFFT)
+            find_path(CLAMDFFT_INCLUDE_DIR
+                NAMES clAmdFft.h)
+            find_library(CLAMDFFT_LIBRARIES
+                NAMES clAmdFft.Runtime)
+       endif()
+       if(WITH_OPENCLAMDBLAS)
+            find_path(CLAMDBLAS_INCLUDE_DIR
+                NAMES clAmdBlas.h)
+            find_library(CLAMDBLAS_LIBRARIES
+                NAMES clAmdBlas)
+       endif()
     # Try AMD/ATI Stream SDK
     if (NOT OPENCL_FOUND)
         set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT})
index 1012008..fb779c8 100644 (file)
 /* OpenCL Support */
 #cmakedefine HAVE_OPENCL
 
+/* AMD's OpenCL Fast Fourier Transform Library*/ 
+#cmakedefine HAVE_CLAMDFFT
+
+/* AMD's Basic Linear Algebra Subprograms Library*/
+#cmakedefine HAVE_CLAMDBLAS
+
 /* NVidia Cuda Fast Fourier Transform (FFT) API*/
 #cmakedefine HAVE_CUFFT
 
index b331e9f..2459577 100644 (file)
@@ -29,6 +29,14 @@ if (HAVE_OPENCL)
   if(OPENCL_INCLUDE_DIR)
       ocv_include_directories(${OPENCL_INCLUDE_DIR})
   endif()
+  if (HAVE_CLAMDFFT)
+       set(ocl_link_libs ${ocl_link_libs} ${CLAMDFFT_LIBRARIES})
+       ocv_include_directories(${CLAMDFFT_INCLUDE_DIR})
+  endif()
+  if (HAVE_CLAMDBLAS)
+       set(ocl_link_libs ${ocl_link_libs} ${CLAMDBLAS_LIBRARIES})
+       ocv_include_directories(${CLAMDBLAS_INCLUDE_DIR})
+  endif()
 endif()
 
 ocv_set_module_sources(
index 517e9ff..3bc6729 100644 (file)
@@ -894,7 +894,35 @@ namespace cv
                // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
                CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf);
 
-
+#ifdef HAVE_CLAMDFFT
+            ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+            // the two functions must be called before/after run any fft library functions.
+            CV_EXPORTS void fft_setup();    // this will be implicitly invoked
+            CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually
+
+                   /////////////////////////////////////// DFT /////////////////////////////////////////////////////
+                   //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+                   //! Param dft_size is the size of DFT transform.
+                   //!
+                   //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+                   // support src type of CV32FC1, CV32FC2
+                   // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+                   // dft_size is the size of original input, which is used for transformation from complex to real.
+                   // dft_size must be powers of 2, 3 and 5
+                   // real to complex dft requires at least v1.8 clAmdFft
+                   // real to complex dft output is not the same with cpu version
+                   // real to complex and complex to real does not support DFT_ROWS
+                   CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0);
+#endif // HAVE_CLAMDFFT
+
+#ifdef HAVE_CLAMDBLAS
+               //! implements generalized matrix product algorithm GEMM from BLAS
+               // The functionality requires clAmdBlas library
+               // only support type CV_32FC1
+               // flag GEMM_3_T is not supported
+               CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha,
+               const oclMat& src3, double beta, oclMat& dst, int flags = 0);
+#endif
 
     }
 }
diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp
new file mode 100644 (file)
index 0000000..b3eda35
--- /dev/null
@@ -0,0 +1,302 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <iomanip>
+#include "precomp.hpp"
+
+#ifdef HAVE_CLAMDFFT
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+#if !defined (HAVE_OPENCL)
+void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
+#else
+
+#include <clAmdFft.h>
+
+namespace cv{ namespace ocl {
+    enum FftType
+    {
+        C2R = 1, // complex to complex
+        R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
+        C2C = 3  // opencl HERMITIAN_INTERLEAVED to real
+    };
+    struct FftPlan
+    {
+        friend void fft_setup();
+        friend void fft_teardown();
+        ~FftPlan();
+    protected:
+        FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
+        const Size dft_size;
+        const int src_step, dst_step;
+        const int flags;
+        const FftType type;
+        clAmdFftPlanHandle plHandle;
+        static vector<FftPlan*> planStore;
+        static bool started;
+        static clAmdFftSetupData * setupData;
+    public:
+        // return a baked plan-> 
+        // if there is one matched plan, return it
+        // if not, bake a new one, put it into the planStore and return it.
+        static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
+    };
+}}
+bool cv::ocl::FftPlan::started = false;
+vector<cv::ocl::FftPlan*> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan*>();
+clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0;
+
+void cv::ocl::fft_setup()
+{
+    if(FftPlan::started)
+    {
+        return;
+    }
+    FftPlan::setupData = new clAmdFftSetupData;
+    openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData ));
+    FftPlan::started = true;
+}
+void cv::ocl::fft_teardown()
+{
+    if(!FftPlan::started)
+    {
+        return;
+    }
+    delete FftPlan::setupData;
+    for(int i = 0; i < FftPlan::planStore.size(); i ++)
+    {
+        delete FftPlan::planStore[i];
+    }
+    FftPlan::planStore.clear();
+    openCLSafeCall( clAmdFftTeardown( ) );
+    FftPlan::started = false;
+}
+
+// bake a new plan
+cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
+    : dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0)
+{
+    if(!FftPlan::started)
+    {
+        // implicitly do fft setup
+        fft_setup();
+    }
+
+    bool is_1d_input   = (_dft_size.height == 1);
+    int is_row_dft             = flags & DFT_ROWS;
+    int is_scaled_dft          = flags & DFT_SCALE;
+    int is_inverse                     = flags & DFT_INVERSE;
+
+    clAmdFftResultLocation     place;
+    clAmdFftLayout                     inLayout;
+    clAmdFftLayout                     outLayout;
+    clAmdFftDim                                dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D;
+
+    size_t batchSize            = is_row_dft?dft_size.height : 1;
+    size_t clLengthsIn[ 3 ]  = {1, 1, 1};
+    size_t clStridesIn[ 3 ]  = {1, 1, 1};
+    size_t clLengthsOut[ 3 ] = {1, 1, 1};
+    size_t clStridesOut[ 3 ] = {1, 1, 1};
+    clLengthsIn[0]                      = dft_size.width;
+    clLengthsIn[1]                      = is_row_dft ? 1 : dft_size.height;
+    clStridesIn[0]                      = 1;
+    clStridesOut[0]                     = 1;
+
+    switch(_type)
+    {
+    case C2C:
+        inLayout        = CLFFT_COMPLEX_INTERLEAVED;
+        outLayout       = CLFFT_COMPLEX_INTERLEAVED;
+        clStridesIn[1]  = src_step / sizeof(std::complex<float>);
+        clStridesOut[1] = clStridesIn[1];
+        break;
+    case R2C:
+        CV_Assert(!is_row_dft); // this is not supported yet
+        inLayout        = CLFFT_REAL;
+        outLayout       = CLFFT_HERMITIAN_INTERLEAVED;
+        clStridesIn[1]  = src_step / sizeof(float);
+        clStridesOut[1] = dst_step / sizeof(std::complex<float>);
+        break;
+    case C2R:
+        CV_Assert(!is_row_dft); // this is not supported yet
+        inLayout        = CLFFT_HERMITIAN_INTERLEAVED;
+        outLayout       = CLFFT_REAL;
+        clStridesIn[1]  = src_step / sizeof(std::complex<float>);
+        clStridesOut[1] = dst_step / sizeof(float);
+        break;
+    default:
+        //std::runtime_error("does not support this convertion!");
+        cout << "Does not support this convertion!" << endl;
+        throw exception();
+        break;
+    }
+
+    clStridesIn[2]  = is_row_dft ? clStridesIn[1]  : dft_size.width * clStridesIn[1];
+    clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
+
+    openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) );
+
+    openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
+    openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
+    openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
+
+    openCLSafeCall( clAmdFftSetPlanInStride  ( plHandle, dim, clStridesIn ) );
+    openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
+    openCLSafeCall( clAmdFftSetPlanDistance  ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) );
+    openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
+}
+cv::ocl::FftPlan::~FftPlan()
+{
+    for(int i = 0; i < planStore.size(); i ++)
+    {
+        if(planStore[i]->plHandle == plHandle)
+        {
+            planStore.erase(planStore.begin()+ i);
+        }
+    }
+    openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
+}
+
+clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
+{
+    // go through search
+    for(int i = 0; i < planStore.size(); i ++)
+    {
+        FftPlan * plan = planStore[i];
+        if(
+            plan->dft_size.width == _dft_size.width && 
+            plan->dft_size.height == _dft_size.height &&
+            plan->flags == _flags &&
+            plan->src_step == _src_step &&
+            plan->dst_step == _dst_step &&
+            plan->type == _type
+            )
+        {
+            return plan->plHandle;
+        }
+    }
+    // no baked plan is found
+    FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
+    planStore.push_back(newPlan);
+    return newPlan->plHandle;
+}
+
+void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags) 
+{
+    if(dft_size == Size(0,0))
+    {
+        dft_size = src.size();
+    }
+    // check if the given dft size is of optimal dft size
+    CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
+
+    // similar assertions with cuda module
+    CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
+
+    // we don't support DFT_SCALE flag
+    CV_Assert(!(DFT_SCALE & flags));
+
+    bool is_1d_input   = (src.rows == 1);
+    int is_row_dft             = flags & DFT_ROWS;
+    int is_scaled_dft          = flags & DFT_SCALE;
+    int is_inverse                     = flags & DFT_INVERSE;
+    bool is_complex_input      = src.channels() == 2;
+    bool is_complex_output     = !(flags & DFT_REAL_OUTPUT);
+
+    // We don't support real-to-real transform
+    CV_Assert(is_complex_input || is_complex_output);
+    FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
+
+    switch(type)
+    {
+    case C2C:
+        dst.create(src.rows, src.cols, CV_32FC2);
+        break;
+    case R2C:
+        CV_Assert(!is_row_dft); // this is not supported yet
+        dst.create(src.rows, src.cols/2 + 1, CV_32FC2);
+        break;
+    case C2R:
+        CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
+        CV_Assert(!is_row_dft); // this is not supported yet
+        dst.create(src.rows, dft_size.width, CV_32FC1);
+        break;
+    default:
+        //std::runtime_error("does not support this convertion!");
+        cout << "Does not support this convertion!" << endl;
+        throw exception();
+        break;
+    }
+    clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
+
+    //get the buffersize
+    size_t buffersize=0;
+    openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
+
+    //allocate the intermediate buffer 
+    cl_mem clMedBuffer=NULL;
+    if (buffersize)
+    {
+        cl_int medstatus;
+        clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
+        openCLSafeCall( medstatus );
+    }
+    openCLSafeCall( clAmdFftEnqueueTransform( plHandle, 
+        is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD, 
+        1, 
+        &src.clCxt->impl->clCmdQueue, 
+        0, NULL, NULL, 
+        (cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) );
+    openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
+    if(clMedBuffer)
+    {
+        openCLFree(clMedBuffer);
+    }
+}
+
+#endif
+#endif //HAVE_CLAMDFFT
diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp
new file mode 100644 (file)
index 0000000..c35e061
--- /dev/null
@@ -0,0 +1,161 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <iomanip>
+#include "precomp.hpp"
+
+#ifdef HAVE_CLAMDBLAS
+
+#include "clAmdBlas.h"
+
+#if !defined (HAVE_OPENCL)
+void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
+#else
+
+using namespace cv;
+
+       void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha,
+               const oclMat& src3, double beta, oclMat& dst, int flags)
+       {
+               CV_Assert(src1.cols == src2.rows && 
+                       (src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
+               CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
+               if(!src3.empty())
+               {
+                       src3.copyTo(dst);
+               }
+               else
+               {
+                       dst.create(src1.rows, src2.cols, src1.type());
+                       dst.setTo(Scalar::all(0));
+               }
+               openCLSafeCall( clAmdBlasSetup() );
+               
+               const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
+               const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
+               const clAmdBlasOrder     order  = clAmdBlasRowMajor;
+
+               const int M = src1.rows;
+               const int N = src2.cols;
+               const int K = src1.cols;
+               int lda     = src1.step;
+               int ldb     = src2.step;
+               int ldc     = dst.step;
+               int offa    = src1.offset;
+               int offb    = src2.offset;
+               int offc    = dst.offset;
+
+
+               switch(src1.type())
+               {
+               case CV_32FC1:
+                       lda  /= sizeof(float);
+                       ldb  /= sizeof(float);
+                       ldc  /= sizeof(float);
+                       offa /= sizeof(float);
+                       offb /= sizeof(float);
+                       offc /= sizeof(float);
+                       openCLSafeCall
+                       (
+                               clAmdBlasSgemmEx(order, transA, transB, M, N, K,
+                                       alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, 
+                                       beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+                       );
+                       break;
+               case CV_64FC1:
+                       lda  /= sizeof(double);
+                       ldb  /= sizeof(double);
+                       ldc  /= sizeof(double);
+                       offa /= sizeof(double);
+                       offb /= sizeof(double);
+                       offc /= sizeof(double);
+                       openCLSafeCall
+                       (
+                               clAmdBlasDgemmEx(order, transA, transB, M, N, K,
+                                       alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, 
+                                       beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+                       );
+                       break;
+               case CV_32FC2:
+                       {
+                               lda  /= sizeof(std::complex<float>);
+                               ldb  /= sizeof(std::complex<float>);
+                               ldc  /= sizeof(std::complex<float>);
+                               offa /= sizeof(std::complex<float>);
+                               offb /= sizeof(std::complex<float>);
+                               offc /= sizeof(std::complex<float>);
+                               cl_float2 alpha_2 = {{alpha, 0}};
+                               cl_float2 beta_2  = {{beta, 0}};
+                               openCLSafeCall
+                               (
+                                       clAmdBlasCgemmEx(order, transA, transB, M, N, K,
+                                               alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, 
+                                               beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+                               );
+                       }
+                       break;
+               case CV_64FC2:
+                       {
+                               lda  /= sizeof(std::complex<double>);
+                               ldb  /= sizeof(std::complex<double>);
+                               ldc  /= sizeof(std::complex<double>);
+                               offa /= sizeof(std::complex<double>);
+                               offb /= sizeof(std::complex<double>);
+                               offc /= sizeof(std::complex<double>);
+                               cl_double2 alpha_2 = {{alpha, 0}};
+                               cl_double2 beta_2  = {{beta, 0}};
+                               openCLSafeCall
+                               (
+                                       clAmdBlasZgemmEx(order, transA, transB, M, N, K,
+                                               alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, 
+                                               beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+                               );
+                       }
+                       break;
+               }
+               clAmdBlasTeardown();
+       }
+#endif
+#endif
diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp
new file mode 100644 (file)
index 0000000..4b51d4f
--- /dev/null
@@ -0,0 +1,97 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+using namespace std;
+#ifdef HAVE_CLAMDFFT
+////////////////////////////////////////////////////////////////////////////
+// Dft
+PARAM_TEST_CASE(Dft, cv::Size, bool) 
+{
+       cv::Size dft_size;
+       bool     dft_rows;
+       std::vector<cv::ocl::Info> oclinfo;
+    virtual void SetUp()
+    {
+           int devnums = getDevice(oclinfo);
+        CV_Assert(devnums > 0);
+               dft_size = GET_PARAM(0);
+               dft_rows = GET_PARAM(1);
+    }
+};
+
+TEST_P(Dft, C2C)
+{
+       cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
+       cv::Mat b_gold;
+       int flags = 0;
+       flags |= dft_rows ? cv::DFT_ROWS : 0;
+
+       cv::ocl::oclMat d_b;
+       
+       cv::dft(a, b_gold, flags);
+       cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
+       EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
+}
+
+
+TEST_P(Dft, R2CthenC2R)
+{
+       cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
+       
+       int flags = 0;
+       //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
+
+       cv::ocl::oclMat d_b, d_c;
+       cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
+       cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
+       EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
+}
+
+INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
+    testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
+    testing::Values(false, true)));
+
+#endif // HAVE_CLAMDFFT
diff --git a/modules/ocl/test/test_gemm.cpp b/modules/ocl/test/test_gemm.cpp
new file mode 100644 (file)
index 0000000..a836149
--- /dev/null
@@ -0,0 +1,85 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+using namespace std;
+#ifdef HAVE_CLAMDBLAS
+////////////////////////////////////////////////////////////////////////////
+// GEMM
+PARAM_TEST_CASE(Gemm, int, cv::Size, int) 
+{
+       int      type;
+       cv::Size mat_size;
+       int              flags;
+       vector<cv::ocl::Info> info;
+    virtual void SetUp()
+    {
+               type     = GET_PARAM(0);
+               mat_size = GET_PARAM(1);
+               flags    = GET_PARAM(2);
+               cv::ocl::getDevice(info);
+    }
+};
+
+TEST_P(Gemm, Accuracy)
+{
+       cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
+       cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
+       cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
+
+       cv::Mat dst;
+       cv::ocl::oclMat ocl_dst;
+
+       cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
+       cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
+
+       EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
+}
+
+INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
+       testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
+    testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
+    testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+#endif