From 0f168936a08c24f48cef02cab65520f0d1e06cf7 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 31 Jan 2014 13:19:16 +0400
Subject: [PATCH] OCL module 2 trash

---
 modules/nonfree/CMakeLists.txt                     |    2 +-
 modules/ocl/CMakeLists.txt                         |   11 -
 .../camera_calibration_and_3D_reconstruction.rst   |  328 ---
 modules/ocl/doc/data_structures.rst                |  189 --
 .../ocl/doc/feature_detection_and_description.rst  |  836 --------
 modules/ocl/doc/image_filtering.rst                |  719 -------
 modules/ocl/doc/image_processing.rst               |  347 ---
 modules/ocl/doc/images/adaptiveBilateralFilter.jpg |  Bin 65410 -> 0 bytes
 modules/ocl/doc/introduction.rst                   |   73 -
 modules/ocl/doc/matrix_reductions.rst              |  106 -
 modules/ocl/doc/ml_machine_learning.rst            |  106 -
 modules/ocl/doc/object_detection.rst               |   95 -
 modules/ocl/doc/ocl.rst                            |   21 -
 modules/ocl/doc/operations_on_matrices.rst         |  602 ------
 .../ocl/doc/structures_and_utility_functions.rst   |   56 -
 modules/ocl/doc/video_analysis.rst                 |  561 -----
 modules/ocl/include/opencv2/ocl.hpp                | 2230 --------------------
 .../ocl/include/opencv2/ocl/matrix_operations.hpp  |  490 -----
 modules/ocl/include/opencv2/ocl/ocl.hpp            |   48 -
 .../opencv2/ocl/private/opencl_dumpinfo.hpp        |  154 --
 .../include/opencv2/ocl/private/opencl_utils.hpp   |  115 -
 modules/ocl/include/opencv2/ocl/private/util.hpp   |  191 --
 modules/ocl/perf/main.cpp                          |   76 -
 modules/ocl/perf/perf_arithm.cpp                   | 1127 ----------
 modules/ocl/perf/perf_bgfg.cpp                     |  289 ---
 modules/ocl/perf/perf_blend.cpp                    |  130 --
 modules/ocl/perf/perf_brief.cpp                    |  114 -
 modules/ocl/perf/perf_brute_force_matcher.cpp      |  177 --
 modules/ocl/perf/perf_calib3d.cpp                  |   85 -
 modules/ocl/perf/perf_canny.cpp                    |   76 -
 modules/ocl/perf/perf_color.cpp                    |  110 -
 modules/ocl/perf/perf_fast.cpp                     |   93 -
 modules/ocl/perf/perf_fft.cpp                      |   88 -
 modules/ocl/perf/perf_filters.cpp                  |  416 ----
 modules/ocl/perf/perf_gemm.cpp                     |   88 -
 modules/ocl/perf/perf_gftt.cpp                     |   95 -
 modules/ocl/perf/perf_haar.cpp                     |  153 --
 modules/ocl/perf/perf_hog.cpp                      |  100 -
 modules/ocl/perf/perf_hough.cpp                    |  106 -
 modules/ocl/perf/perf_imgproc.cpp                  |  737 -------
 modules/ocl/perf/perf_imgwarp.cpp                  |  364 ----
 modules/ocl/perf/perf_kalman.cpp                   |  103 -
 modules/ocl/perf/perf_match_template.cpp           |  121 --
 modules/ocl/perf/perf_matrix_operation.cpp         |  238 ---
 modules/ocl/perf/perf_ml.cpp                       |  109 -
 modules/ocl/perf/perf_moments.cpp                  |   90 -
 modules/ocl/perf/perf_norm.cpp                     |   86 -
 modules/ocl/perf/perf_opticalflow.cpp              |  255 ---
 modules/ocl/perf/perf_orb.cpp                      |  103 -
 modules/ocl/perf/perf_precomp.hpp                  |  198 --
 modules/ocl/perf/perf_pyramid.cpp                  |  130 --
 modules/ocl/perf/perf_split_merge.cpp              |  146 --
 modules/ocl/src/arithm.cpp                         | 1804 ----------------
 modules/ocl/src/bgfg_mog.cpp                       |  639 ------
 modules/ocl/src/blend.cpp                          |   99 -
 modules/ocl/src/brief.cpp                          |   91 -
 modules/ocl/src/brute_force_matcher.cpp            | 1213 -----------
 modules/ocl/src/build_warps.cpp                    |  285 ---
 modules/ocl/src/canny.cpp                          |  387 ----
 modules/ocl/src/cl_context.cpp                     |  944 ---------
 modules/ocl/src/cl_operations.cpp                  |  549 -----
 modules/ocl/src/cl_programcache.cpp                |  514 -----
 modules/ocl/src/cl_programcache.hpp                |   85 -
 modules/ocl/src/color.cpp                          |  656 ------
 modules/ocl/src/columnsum.cpp                      |   75 -
 modules/ocl/src/error.cpp                          |  174 --
 modules/ocl/src/fast.cpp                           |  229 --
 modules/ocl/src/fft.cpp                            |  382 ----
 modules/ocl/src/filtering.cpp                      | 1710 ---------------
 modules/ocl/src/gemm.cpp                           |  205 --
 modules/ocl/src/gftt.cpp                           |  300 ---
 modules/ocl/src/haar.cpp                           | 1224 -----------
 modules/ocl/src/hog.cpp                            | 1962 -----------------
 modules/ocl/src/hough.cpp                          |  398 ----
 modules/ocl/src/imgproc.cpp                        | 2014 ------------------
 modules/ocl/src/interpolate_frames.cpp             |  235 ---
 modules/ocl/src/kalman.cpp                         |  134 --
 modules/ocl/src/kmeans.cpp                         |  451 ----
 modules/ocl/src/knearest.cpp                       |  151 --
 modules/ocl/src/match_template.cpp                 |  570 -----
 modules/ocl/src/matrix_operations.cpp              |  632 ------
 modules/ocl/src/mcwutil.cpp                        |  226 --
 modules/ocl/src/moments.cpp                        |  391 ----
 modules/ocl/src/mssegmentation.cpp                 |  402 ----
 modules/ocl/src/opencl/arithm_LUT.cl               |  107 -
 .../ocl/src/opencl/arithm_absdiff_nonsaturate.cl   |  107 -
 modules/ocl/src/opencl/arithm_add.cl               |  143 --
 modules/ocl/src/opencl/arithm_addWeighted.cl       |   75 -
 modules/ocl/src/opencl/arithm_add_mask.cl          |   97 -
 modules/ocl/src/opencl/arithm_add_scalar.cl        |  103 -
 modules/ocl/src/opencl/arithm_add_scalar_mask.cl   |   96 -
 modules/ocl/src/opencl/arithm_bitwise_binary.cl    |   82 -
 .../ocl/src/opencl/arithm_bitwise_binary_mask.cl   |   88 -
 .../ocl/src/opencl/arithm_bitwise_binary_scalar.cl |   82 -
 .../opencl/arithm_bitwise_binary_scalar_mask.cl    |   86 -
 modules/ocl/src/opencl/arithm_bitwise_not.cl       |  253 ---
 modules/ocl/src/opencl/arithm_cartToPolar.cl       |  141 --
 modules/ocl/src/opencl/arithm_compare.cl           |   74 -
 modules/ocl/src/opencl/arithm_exp.cl               |  111 -
 modules/ocl/src/opencl/arithm_flip.cl              |  125 --
 modules/ocl/src/opencl/arithm_log.cl               |  111 -
 modules/ocl/src/opencl/arithm_magnitude.cl         |   74 -
 modules/ocl/src/opencl/arithm_minMax.cl            |  176 --
 modules/ocl/src/opencl/arithm_minMaxLoc.cl         |  258 ---
 modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl    |  256 ---
 modules/ocl/src/opencl/arithm_nonzero.cl           |   93 -
 modules/ocl/src/opencl/arithm_phase.cl             |  171 --
 modules/ocl/src/opencl/arithm_polarToCart.cl       |  197 --
 modules/ocl/src/opencl/arithm_pow.cl               |   73 -
 modules/ocl/src/opencl/arithm_setidentity.cl       |   69 -
 modules/ocl/src/opencl/arithm_sqrt.cl              |  111 -
 modules/ocl/src/opencl/arithm_sum.cl               |  104 -
 modules/ocl/src/opencl/arithm_transpose.cl         |  139 --
 modules/ocl/src/opencl/bgfg_mog.cl                 |  540 -----
 modules/ocl/src/opencl/blend_linear.cl             |   78 -
 modules/ocl/src/opencl/brief.cl                    |  173 --
 modules/ocl/src/opencl/brute_force_match.cl        |  789 -------
 modules/ocl/src/opencl/build_warps.cl              |  207 --
 modules/ocl/src/opencl/convertC3C4.cl              |  153 --
 modules/ocl/src/opencl/cvt_color.cl                | 1599 --------------
 modules/ocl/src/opencl/featdetect_fast.cl          | 1331 ------------
 modules/ocl/src/opencl/filter_sep_col.cl           |  118 --
 modules/ocl/src/opencl/filter_sep_row.cl           |  457 ----
 .../ocl/src/opencl/filtering_adaptive_bilateral.cl |  429 ----
 modules/ocl/src/opencl/filtering_boxFilter.cl      |  376 ----
 modules/ocl/src/opencl/filtering_filter2D.cl       |  374 ----
 modules/ocl/src/opencl/filtering_morph.cl          |  228 --
 .../src/opencl/filtering_sep_filter_singlepass.cl  |  185 --
 modules/ocl/src/opencl/haarobjectdetect.cl         |  596 ------
 modules/ocl/src/opencl/haarobjectdetect_scaled2.cl |  323 ---
 modules/ocl/src/opencl/imgproc_bilateral.cl        |  145 --
 modules/ocl/src/opencl/imgproc_calcHarris.cl       |  214 --
 modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl  |  204 --
 modules/ocl/src/opencl/imgproc_canny.cl            |  721 -------
 modules/ocl/src/opencl/imgproc_clahe.cl            |  255 ---
 modules/ocl/src/opencl/imgproc_columnsum.cl        |   70 -
 modules/ocl/src/opencl/imgproc_convolve.cl         |  111 -
 modules/ocl/src/opencl/imgproc_copymakeboder.cl    |  134 --
 modules/ocl/src/opencl/imgproc_gftt.cl             |  129 --
 modules/ocl/src/opencl/imgproc_histogram.cl        |  279 ---
 modules/ocl/src/opencl/imgproc_hough.cl            |  280 ---
 modules/ocl/src/opencl/imgproc_integral.cl         |  503 -----
 modules/ocl/src/opencl/imgproc_integral_sum.cl     |  412 ----
 modules/ocl/src/opencl/imgproc_median.cl           |  486 -----
 .../ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl |   96 -
 modules/ocl/src/opencl/imgproc_remap.cl            |  408 ----
 modules/ocl/src/opencl/imgproc_resize.cl           |  405 ----
 modules/ocl/src/opencl/imgproc_sobel3.cl           |  347 ---
 modules/ocl/src/opencl/imgproc_threshold.cl        |  136 --
 modules/ocl/src/opencl/imgproc_warpAffine.cl       |  761 -------
 modules/ocl/src/opencl/imgproc_warpPerspective.cl  |  682 ------
 modules/ocl/src/opencl/interpolate_frames.cl       |  252 ---
 modules/ocl/src/opencl/kernel_radix_sort_by_key.cl |  176 --
 modules/ocl/src/opencl/kernel_sort_by_key.cl       |  244 ---
 modules/ocl/src/opencl/kernel_stablesort_by_key.cl |  264 ---
 modules/ocl/src/opencl/kmeans_kernel.cl            |  107 -
 modules/ocl/src/opencl/knearest.cl                 |  186 --
 modules/ocl/src/opencl/match_template.cl           |  853 --------
 modules/ocl/src/opencl/meanShift.cl                |  241 ---
 modules/ocl/src/opencl/merge_mat.cl                | 1378 ------------
 modules/ocl/src/opencl/moments.cl                  |  432 ----
 modules/ocl/src/opencl/objdetect_hog.cl            |  726 -------
 modules/ocl/src/opencl/operator_convertTo.cl       |   64 -
 modules/ocl/src/opencl/operator_copyToM.cl         |   71 -
 modules/ocl/src/opencl/operator_setTo.cl           |   95 -
 modules/ocl/src/opencl/operator_setToM.cl          |   68 -
 modules/ocl/src/opencl/optical_flow_farneback.cl   |  450 ----
 modules/ocl/src/opencl/orb.cl                      |  503 -----
 modules/ocl/src/opencl/pyr_down.cl                 | 1010 ---------
 modules/ocl/src/opencl/pyr_up.cl                   |  146 --
 modules/ocl/src/opencl/pyrlk.cl                    | 1019 ---------
 modules/ocl/src/opencl/split_mat.cl                |  217 --
 modules/ocl/src/opencl/stereobm.cl                 |  338 ---
 modules/ocl/src/opencl/stereobp.cl                 |  393 ----
 modules/ocl/src/opencl/stereocsbp.cl               | 1382 ------------
 modules/ocl/src/opencl/svm.cl                      |  211 --
 modules/ocl/src/opencl/tvl1flow.cl                 |  386 ----
 modules/ocl/src/optical_flow_farneback.cpp         |  542 -----
 modules/ocl/src/orb.cpp                            |  916 --------
 modules/ocl/src/precomp.hpp                        |   98 -
 modules/ocl/src/pyrdown.cpp                        |   91 -
 modules/ocl/src/pyrlk.cpp                          |  338 ---
 modules/ocl/src/pyrup.cpp                          |  104 -
 modules/ocl/src/safe_call.hpp                      |   69 -
 modules/ocl/src/sort_by_key.cpp                    |  472 -----
 modules/ocl/src/split_merge.cpp                    |  300 ---
 modules/ocl/src/stereo_csbp.cpp                    |  698 ------
 modules/ocl/src/stereobm.cpp                       |  222 --
 modules/ocl/src/stereobp.cpp                       |  502 -----
 modules/ocl/src/svm.cpp                            | 1136 ----------
 modules/ocl/src/tvl1flow.cpp                       |  477 -----
 modules/ocl/test/main.cpp                          |   80 -
 modules/ocl/test/test_api.cpp                      |  213 --
 modules/ocl/test/test_arithm.cpp                   | 1621 --------------
 modules/ocl/test/test_bgfg.cpp                     |  240 ---
 modules/ocl/test/test_blend.cpp                    |  176 --
 modules/ocl/test/test_brief.cpp                    |  115 -
 modules/ocl/test/test_brute_force_matcher.cpp      |  220 --
 modules/ocl/test/test_calib3d.cpp                  |  196 --
 modules/ocl/test/test_canny.cpp                    |   89 -
 modules/ocl/test/test_color.cpp                    |  316 ---
 modules/ocl/test/test_fast.cpp                     |   93 -
 modules/ocl/test/test_fft.cpp                      |  244 ---
 modules/ocl/test/test_filters.cpp                  |  476 -----
 modules/ocl/test/test_gemm.cpp                     |   84 -
 modules/ocl/test/test_hough.cpp                    |  112 -
 modules/ocl/test/test_imgproc.cpp                  |  622 ------
 modules/ocl/test/test_kalman.cpp                   |  148 --
 modules/ocl/test/test_kmeans.cpp                   |  235 ---
 modules/ocl/test/test_match_template.cpp           |  137 --
 modules/ocl/test/test_matrix_operation.cpp         |  250 ---
 modules/ocl/test/test_mean_shift.cpp               |  408 ----
 modules/ocl/test/test_ml.cpp                       |  309 ---
 modules/ocl/test/test_moments.cpp                  |   64 -
 modules/ocl/test/test_objdetect.cpp                |  226 --
 modules/ocl/test/test_optflow.cpp                  |  341 ---
 modules/ocl/test/test_orb.cpp                      |  138 --
 modules/ocl/test/test_precomp.hpp                  |   79 -
 modules/ocl/test/test_pyramids.cpp                 |  117 -
 modules/ocl/test/test_sort.cpp                     |  244 ---
 modules/ocl/test/test_split_merge.cpp              |  224 --
 modules/ocl/test/test_warp.cpp                     |  494 -----
 modules/ocl/test/utility.cpp                       |  366 ----
 modules/ocl/test/utility.hpp                       |  328 ---
 samples/CMakeLists.txt                             |    1 -
 samples/ocl/CMakeLists.txt                         |   58 -
 samples/ocl/adaptive_bilateral_filter.cpp          |   65 -
 samples/ocl/bgfg_segm.cpp                          |  126 --
 samples/ocl/clahe.cpp                              |  112 -
 samples/ocl/facedetect.cpp                         |  390 ----
 samples/ocl/hog.cpp                                |  448 ----
 samples/ocl/pyrlk_optical_flow.cpp                 |  264 ---
 samples/ocl/squares.cpp                            |  341 ---
 samples/ocl/stereo_match.cpp                       |  384 ----
 samples/ocl/surf_matcher.cpp                       |  329 ---
 samples/ocl/tvl1_optical_flow.cpp                  |  237 ---
 236 files changed, 1 insertion(+), 79686 deletions(-)
 delete mode 100644 modules/ocl/CMakeLists.txt
 delete mode 100644 modules/ocl/doc/camera_calibration_and_3D_reconstruction.rst
 delete mode 100644 modules/ocl/doc/data_structures.rst
 delete mode 100644 modules/ocl/doc/feature_detection_and_description.rst
 delete mode 100644 modules/ocl/doc/image_filtering.rst
 delete mode 100644 modules/ocl/doc/image_processing.rst
 delete mode 100644 modules/ocl/doc/images/adaptiveBilateralFilter.jpg
 delete mode 100644 modules/ocl/doc/introduction.rst
 delete mode 100644 modules/ocl/doc/matrix_reductions.rst
 delete mode 100644 modules/ocl/doc/ml_machine_learning.rst
 delete mode 100644 modules/ocl/doc/object_detection.rst
 delete mode 100644 modules/ocl/doc/ocl.rst
 delete mode 100644 modules/ocl/doc/operations_on_matrices.rst
 delete mode 100644 modules/ocl/doc/structures_and_utility_functions.rst
 delete mode 100644 modules/ocl/doc/video_analysis.rst
 delete mode 100644 modules/ocl/include/opencv2/ocl.hpp
 delete mode 100644 modules/ocl/include/opencv2/ocl/matrix_operations.hpp
 delete mode 100644 modules/ocl/include/opencv2/ocl/ocl.hpp
 delete mode 100644 modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
 delete mode 100644 modules/ocl/include/opencv2/ocl/private/opencl_utils.hpp
 delete mode 100644 modules/ocl/include/opencv2/ocl/private/util.hpp
 delete mode 100644 modules/ocl/perf/main.cpp
 delete mode 100644 modules/ocl/perf/perf_arithm.cpp
 delete mode 100644 modules/ocl/perf/perf_bgfg.cpp
 delete mode 100644 modules/ocl/perf/perf_blend.cpp
 delete mode 100644 modules/ocl/perf/perf_brief.cpp
 delete mode 100644 modules/ocl/perf/perf_brute_force_matcher.cpp
 delete mode 100644 modules/ocl/perf/perf_calib3d.cpp
 delete mode 100644 modules/ocl/perf/perf_canny.cpp
 delete mode 100644 modules/ocl/perf/perf_color.cpp
 delete mode 100644 modules/ocl/perf/perf_fast.cpp
 delete mode 100644 modules/ocl/perf/perf_fft.cpp
 delete mode 100644 modules/ocl/perf/perf_filters.cpp
 delete mode 100644 modules/ocl/perf/perf_gemm.cpp
 delete mode 100644 modules/ocl/perf/perf_gftt.cpp
 delete mode 100644 modules/ocl/perf/perf_haar.cpp
 delete mode 100644 modules/ocl/perf/perf_hog.cpp
 delete mode 100644 modules/ocl/perf/perf_hough.cpp
 delete mode 100644 modules/ocl/perf/perf_imgproc.cpp
 delete mode 100644 modules/ocl/perf/perf_imgwarp.cpp
 delete mode 100644 modules/ocl/perf/perf_kalman.cpp
 delete mode 100644 modules/ocl/perf/perf_match_template.cpp
 delete mode 100644 modules/ocl/perf/perf_matrix_operation.cpp
 delete mode 100644 modules/ocl/perf/perf_ml.cpp
 delete mode 100644 modules/ocl/perf/perf_moments.cpp
 delete mode 100644 modules/ocl/perf/perf_norm.cpp
 delete mode 100644 modules/ocl/perf/perf_opticalflow.cpp
 delete mode 100644 modules/ocl/perf/perf_orb.cpp
 delete mode 100644 modules/ocl/perf/perf_precomp.hpp
 delete mode 100644 modules/ocl/perf/perf_pyramid.cpp
 delete mode 100644 modules/ocl/perf/perf_split_merge.cpp
 delete mode 100644 modules/ocl/src/arithm.cpp
 delete mode 100644 modules/ocl/src/bgfg_mog.cpp
 delete mode 100644 modules/ocl/src/blend.cpp
 delete mode 100644 modules/ocl/src/brief.cpp
 delete mode 100644 modules/ocl/src/brute_force_matcher.cpp
 delete mode 100644 modules/ocl/src/build_warps.cpp
 delete mode 100644 modules/ocl/src/canny.cpp
 delete mode 100644 modules/ocl/src/cl_context.cpp
 delete mode 100644 modules/ocl/src/cl_operations.cpp
 delete mode 100644 modules/ocl/src/cl_programcache.cpp
 delete mode 100644 modules/ocl/src/cl_programcache.hpp
 delete mode 100644 modules/ocl/src/color.cpp
 delete mode 100644 modules/ocl/src/columnsum.cpp
 delete mode 100644 modules/ocl/src/error.cpp
 delete mode 100644 modules/ocl/src/fast.cpp
 delete mode 100644 modules/ocl/src/fft.cpp
 delete mode 100644 modules/ocl/src/filtering.cpp
 delete mode 100644 modules/ocl/src/gemm.cpp
 delete mode 100644 modules/ocl/src/gftt.cpp
 delete mode 100644 modules/ocl/src/haar.cpp
 delete mode 100644 modules/ocl/src/hog.cpp
 delete mode 100644 modules/ocl/src/hough.cpp
 delete mode 100644 modules/ocl/src/imgproc.cpp
 delete mode 100644 modules/ocl/src/interpolate_frames.cpp
 delete mode 100644 modules/ocl/src/kalman.cpp
 delete mode 100644 modules/ocl/src/kmeans.cpp
 delete mode 100644 modules/ocl/src/knearest.cpp
 delete mode 100644 modules/ocl/src/match_template.cpp
 delete mode 100644 modules/ocl/src/matrix_operations.cpp
 delete mode 100644 modules/ocl/src/mcwutil.cpp
 delete mode 100644 modules/ocl/src/moments.cpp
 delete mode 100644 modules/ocl/src/mssegmentation.cpp
 delete mode 100644 modules/ocl/src/opencl/arithm_LUT.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_add.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_addWeighted.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_add_mask.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_add_scalar.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_add_scalar_mask.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_bitwise_binary.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_bitwise_not.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_cartToPolar.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_compare.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_exp.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_flip.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_log.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_magnitude.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_minMax.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_minMaxLoc.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_nonzero.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_phase.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_polarToCart.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_pow.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_setidentity.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_sqrt.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_sum.cl
 delete mode 100644 modules/ocl/src/opencl/arithm_transpose.cl
 delete mode 100644 modules/ocl/src/opencl/bgfg_mog.cl
 delete mode 100644 modules/ocl/src/opencl/blend_linear.cl
 delete mode 100644 modules/ocl/src/opencl/brief.cl
 delete mode 100644 modules/ocl/src/opencl/brute_force_match.cl
 delete mode 100644 modules/ocl/src/opencl/build_warps.cl
 delete mode 100644 modules/ocl/src/opencl/convertC3C4.cl
 delete mode 100644 modules/ocl/src/opencl/cvt_color.cl
 delete mode 100644 modules/ocl/src/opencl/featdetect_fast.cl
 delete mode 100644 modules/ocl/src/opencl/filter_sep_col.cl
 delete mode 100644 modules/ocl/src/opencl/filter_sep_row.cl
 delete mode 100644 modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
 delete mode 100644 modules/ocl/src/opencl/filtering_boxFilter.cl
 delete mode 100644 modules/ocl/src/opencl/filtering_filter2D.cl
 delete mode 100644 modules/ocl/src/opencl/filtering_morph.cl
 delete mode 100644 modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl
 delete mode 100644 modules/ocl/src/opencl/haarobjectdetect.cl
 delete mode 100644 modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_bilateral.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_calcHarris.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_canny.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_clahe.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_columnsum.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_convolve.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_copymakeboder.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_gftt.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_histogram.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_hough.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_integral.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_integral_sum.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_median.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_remap.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_resize.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_sobel3.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_threshold.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_warpAffine.cl
 delete mode 100644 modules/ocl/src/opencl/imgproc_warpPerspective.cl
 delete mode 100644 modules/ocl/src/opencl/interpolate_frames.cl
 delete mode 100644 modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
 delete mode 100644 modules/ocl/src/opencl/kernel_sort_by_key.cl
 delete mode 100644 modules/ocl/src/opencl/kernel_stablesort_by_key.cl
 delete mode 100644 modules/ocl/src/opencl/kmeans_kernel.cl
 delete mode 100644 modules/ocl/src/opencl/knearest.cl
 delete mode 100644 modules/ocl/src/opencl/match_template.cl
 delete mode 100644 modules/ocl/src/opencl/meanShift.cl
 delete mode 100644 modules/ocl/src/opencl/merge_mat.cl
 delete mode 100644 modules/ocl/src/opencl/moments.cl
 delete mode 100644 modules/ocl/src/opencl/objdetect_hog.cl
 delete mode 100644 modules/ocl/src/opencl/operator_convertTo.cl
 delete mode 100644 modules/ocl/src/opencl/operator_copyToM.cl
 delete mode 100644 modules/ocl/src/opencl/operator_setTo.cl
 delete mode 100644 modules/ocl/src/opencl/operator_setToM.cl
 delete mode 100644 modules/ocl/src/opencl/optical_flow_farneback.cl
 delete mode 100644 modules/ocl/src/opencl/orb.cl
 delete mode 100644 modules/ocl/src/opencl/pyr_down.cl
 delete mode 100644 modules/ocl/src/opencl/pyr_up.cl
 delete mode 100644 modules/ocl/src/opencl/pyrlk.cl
 delete mode 100644 modules/ocl/src/opencl/split_mat.cl
 delete mode 100644 modules/ocl/src/opencl/stereobm.cl
 delete mode 100644 modules/ocl/src/opencl/stereobp.cl
 delete mode 100644 modules/ocl/src/opencl/stereocsbp.cl
 delete mode 100644 modules/ocl/src/opencl/svm.cl
 delete mode 100644 modules/ocl/src/opencl/tvl1flow.cl
 delete mode 100644 modules/ocl/src/optical_flow_farneback.cpp
 delete mode 100644 modules/ocl/src/orb.cpp
 delete mode 100644 modules/ocl/src/precomp.hpp
 delete mode 100644 modules/ocl/src/pyrdown.cpp
 delete mode 100644 modules/ocl/src/pyrlk.cpp
 delete mode 100644 modules/ocl/src/pyrup.cpp
 delete mode 100644 modules/ocl/src/safe_call.hpp
 delete mode 100644 modules/ocl/src/sort_by_key.cpp
 delete mode 100644 modules/ocl/src/split_merge.cpp
 delete mode 100644 modules/ocl/src/stereo_csbp.cpp
 delete mode 100644 modules/ocl/src/stereobm.cpp
 delete mode 100644 modules/ocl/src/stereobp.cpp
 delete mode 100644 modules/ocl/src/svm.cpp
 delete mode 100644 modules/ocl/src/tvl1flow.cpp
 delete mode 100644 modules/ocl/test/main.cpp
 delete mode 100644 modules/ocl/test/test_api.cpp
 delete mode 100644 modules/ocl/test/test_arithm.cpp
 delete mode 100644 modules/ocl/test/test_bgfg.cpp
 delete mode 100644 modules/ocl/test/test_blend.cpp
 delete mode 100644 modules/ocl/test/test_brief.cpp
 delete mode 100644 modules/ocl/test/test_brute_force_matcher.cpp
 delete mode 100644 modules/ocl/test/test_calib3d.cpp
 delete mode 100644 modules/ocl/test/test_canny.cpp
 delete mode 100644 modules/ocl/test/test_color.cpp
 delete mode 100644 modules/ocl/test/test_fast.cpp
 delete mode 100644 modules/ocl/test/test_fft.cpp
 delete mode 100644 modules/ocl/test/test_filters.cpp
 delete mode 100644 modules/ocl/test/test_gemm.cpp
 delete mode 100644 modules/ocl/test/test_hough.cpp
 delete mode 100644 modules/ocl/test/test_imgproc.cpp
 delete mode 100644 modules/ocl/test/test_kalman.cpp
 delete mode 100644 modules/ocl/test/test_kmeans.cpp
 delete mode 100644 modules/ocl/test/test_match_template.cpp
 delete mode 100644 modules/ocl/test/test_matrix_operation.cpp
 delete mode 100644 modules/ocl/test/test_mean_shift.cpp
 delete mode 100644 modules/ocl/test/test_ml.cpp
 delete mode 100644 modules/ocl/test/test_moments.cpp
 delete mode 100644 modules/ocl/test/test_objdetect.cpp
 delete mode 100644 modules/ocl/test/test_optflow.cpp
 delete mode 100644 modules/ocl/test/test_orb.cpp
 delete mode 100644 modules/ocl/test/test_precomp.hpp
 delete mode 100644 modules/ocl/test/test_pyramids.cpp
 delete mode 100644 modules/ocl/test/test_sort.cpp
 delete mode 100644 modules/ocl/test/test_split_merge.cpp
 delete mode 100644 modules/ocl/test/test_warp.cpp
 delete mode 100644 modules/ocl/test/utility.cpp
 delete mode 100644 modules/ocl/test/utility.hpp
 delete mode 100644 samples/ocl/CMakeLists.txt
 delete mode 100644 samples/ocl/adaptive_bilateral_filter.cpp
 delete mode 100644 samples/ocl/bgfg_segm.cpp
 delete mode 100644 samples/ocl/clahe.cpp
 delete mode 100644 samples/ocl/facedetect.cpp
 delete mode 100644 samples/ocl/hog.cpp
 delete mode 100644 samples/ocl/pyrlk_optical_flow.cpp
 delete mode 100644 samples/ocl/squares.cpp
 delete mode 100644 samples/ocl/stereo_match.cpp
 delete mode 100644 samples/ocl/surf_matcher.cpp
 delete mode 100644 samples/ocl/tvl1_optical_flow.cpp

diff --git a/modules/nonfree/CMakeLists.txt b/modules/nonfree/CMakeLists.txt
index eec28e9..86caca9 100644
--- a/modules/nonfree/CMakeLists.txt
+++ b/modules/nonfree/CMakeLists.txt
@@ -4,4 +4,4 @@ endif()
 
 set(the_description "Functionality with possible limitations on the use")
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
-ocv_define_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_cudaarithm opencv_ocl)
+ocv_define_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_cudaarithm)
diff --git a/modules/ocl/CMakeLists.txt b/modules/ocl/CMakeLists.txt
deleted file mode 100644
index db9d64e..0000000
--- a/modules/ocl/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-if(NOT HAVE_OPENCL)
-  ocv_module_disable(ocl)
-  return()
-endif()
-
-set(the_description "OpenCL-accelerated Computer Vision")
-ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d opencv_ml "${OPENCL_LIBRARIES}")
-if(TARGET opencv_test_ocl)
-  target_link_libraries(opencv_test_ocl "${OPENCL_LIBRARIES}")
-endif()
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
diff --git a/modules/ocl/doc/camera_calibration_and_3D_reconstruction.rst b/modules/ocl/doc/camera_calibration_and_3D_reconstruction.rst
deleted file mode 100644
index 8243669..0000000
--- a/modules/ocl/doc/camera_calibration_and_3D_reconstruction.rst
+++ /dev/null
@@ -1,328 +0,0 @@
-Camera Calibration and 3D Reconstruction
-========================================
-
-.. highlight:: cpp
-
-
-
-ocl::StereoBM_OCL
----------------------
-.. ocv:class:: ocl::StereoBM_OCL
-
-Class computing stereo correspondence (disparity map) using the block matching algorithm. ::
-
-    class CV_EXPORTS StereoBM_OCL
-    {
-    public:
-        enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
-
-        enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
-
-        //! the default constructor
-        StereoBM_OCL();
-        //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
-        StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
-
-        //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
-        //! Output disparity has CV_8U type.
-        void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
-
-        //! Some heuristics that tries to estmate
-        // if current GPU will be faster then CPU in this algorithm.
-        // It queries current active device.
-        static bool checkIfGpuCallReasonable();
-
-        int preset;
-        int ndisp;
-        int winSize;
-
-        // If avergeTexThreshold  == 0 => post procesing is disabled
-        // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
-        // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
-        // i.e. input left image is low textured.
-        float avergeTexThreshold;
-    private:
-        /* hidden */
-    };
-
-
-The class also performs pre- and post-filtering steps: Sobel pre-filtering (if ``PREFILTER_XSOBEL`` flag is set) and low textureness filtering (if ``averageTexThreshols > 0`` ). If ``avergeTexThreshold = 0`` , low textureness filtering is disabled. Otherwise, the disparity is set to 0 in each point ``(x, y)`` , where for the left image
-
-.. math::
-    \sum HorizontalGradiensInWindow(x, y, winSize) < (winSize \cdot winSize) \cdot avergeTexThreshold
-
-This means that the input left image is low textured.
-
-
-ocl::StereoBM_OCL::StereoBM_OCL
------------------------------------
-Enables :ocv:class:`ocl::StereoBM_OCL` constructors.
-
-.. ocv:function:: ocl::StereoBM_OCL::StereoBM_OCL()
-
-.. ocv:function:: ocl::StereoBM_OCL::StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ)
-
-    :param preset: Parameter presetting:
-
-        * **BASIC_PRESET** Basic mode without pre-processing.
-
-        * **PREFILTER_XSOBEL** Sobel pre-filtering mode.
-
-    :param ndisparities: Number of disparities. It must be a multiple of 8 and less or equal to 256.
-
-    :param winSize: Block size.
-
-
-
-ocl::StereoBM_OCL::operator ()
-----------------------------------
-Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair.
-
-.. ocv:function:: void ocl::StereoBM_OCL::operator ()(const oclMat& left, const oclMat& right, oclMat& disparity)
-
-    :param left: Left image. Only  ``CV_8UC1``  type is supported.
-
-    :param right: Right image with the same size and the same type as the left one.
-
-    :param disparity: Output disparity map. It is a  ``CV_8UC1``  image with the same size as the input images.
-
-
-ocl::StereoBM_OCL::checkIfGpuCallReasonable
------------------------------------------------
-Uses a heuristic method to estimate whether the current GPU is faster than the CPU in this algorithm. It queries the currently active device.
-
-.. ocv:function:: bool ocl::StereoBM_OCL::checkIfGpuCallReasonable()
-
-ocl::StereoBeliefPropagation
---------------------------------
-.. ocv:class:: ocl::StereoBeliefPropagation
-
-Class computing stereo correspondence using the belief propagation algorithm. ::
-
-    class CV_EXPORTS StereoBeliefPropagation
-    {
-    public:
-        enum { DEFAULT_NDISP  = 64 };
-        enum { DEFAULT_ITERS  = 5  };
-        enum { DEFAULT_LEVELS = 5  };
-        static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
-        explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
-                                         int iters  = DEFAULT_ITERS,
-                                         int levels = DEFAULT_LEVELS,
-                                         int msg_type = CV_16S);
-        StereoBeliefPropagation(int ndisp, int iters, int levels,
-                                float max_data_term, float data_weight,
-                                float max_disc_term, float disc_single_jump,
-                                int msg_type = CV_32F);
-        void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
-        void operator()(const oclMat &data, oclMat &disparity);
-        int ndisp;
-        int iters;
-        int levels;
-        float max_data_term;
-        float data_weight;
-        float max_disc_term;
-        float disc_single_jump;
-        int msg_type;
-    private:
-        /* hidden */
-    };
-
-The class implements algorithm described in [Felzenszwalb2006]_ . It can compute own data cost (using a truncated linear model) or use a user-provided data cost.
-
-.. note::
-
-    ``StereoBeliefPropagation`` requires a lot of memory for message storage:
-
-    .. math::
-
-        width \_ step  \cdot height  \cdot ndisp  \cdot 4  \cdot (1 + 0.25)
-
-    and for data cost storage:
-
-    .. math::
-
-        width\_step \cdot height \cdot ndisp \cdot (1 + 0.25 + 0.0625 +  \dotsm + \frac{1}{4^{levels}})
-
-    ``width_step`` is the number of bytes in a line including padding.
-
-
-
-ocl::StereoBeliefPropagation::StereoBeliefPropagation
----------------------------------------------------------
-Enables the :ocv:class:`ocl::StereoBeliefPropagation` constructors.
-
-.. ocv:function:: ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int msg_type = CV_16S)
-
-.. ocv:function:: ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp, int iters, int levels, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int msg_type = CV_32F)
-
-    :param ndisp: Number of disparities.
-
-    :param iters: Number of BP iterations on each level.
-
-    :param levels: Number of levels.
-
-    :param max_data_term: Threshold for data cost truncation.
-
-    :param data_weight: Data weight.
-
-    :param max_disc_term: Threshold for discontinuity truncation.
-
-    :param disc_single_jump: Discontinuity single jump.
-
-    :param msg_type: Type for messages.  ``CV_16SC1``  and  ``CV_32FC1`` types are supported.
-
-``StereoBeliefPropagation`` uses a truncated linear model for the data cost and discontinuity terms:
-
-.. math::
-
-    DataCost = data \_ weight  \cdot \min ( \lvert Img_Left(x,y)-Img_Right(x-d,y)  \rvert , max \_ data \_ term)
-
-.. math::
-
-    DiscTerm =  \min (disc \_ single \_ jump  \cdot \lvert f_1-f_2  \rvert , max \_ disc \_ term)
-
-For more details, see [Felzenszwalb2006]_.
-
-By default, :ocv:class:`ocl::StereoBeliefPropagation` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement:
-
-.. math::
-
-    10  \cdot 2^{levels-1}  \cdot max \_ data \_ term < SHRT \_ MAX
-
-
-
-ocl::StereoBeliefPropagation::estimateRecommendedParams
------------------------------------------------------------
-Uses a heuristic method to compute the recommended parameters ( ``ndisp``, ``iters`` and ``levels`` ) for the specified image size ( ``width`` and ``height`` ).
-
-.. ocv:function:: void ocl::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
-
-
-
-ocl::StereoBeliefPropagation::operator ()
----------------------------------------------
-Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair or data cost.
-
-.. ocv:function:: void ocl::StereoBeliefPropagation::operator ()(const oclMat& left, const oclMat& right, oclMat& disparity)
-
-.. ocv:function:: void ocl::StereoBeliefPropagation::operator ()(const oclMat& data, oclMat& disparity)
-
-    :param left: Left image. ``CV_8UC1`` , ``CV_8UC3``  and  ``CV_8UC4``  types are supported.
-
-    :param right: Right image with the same size and the same type as the left one.
-
-    :param data: User-specified data cost, a matrix of ``msg_type`` type and ``Size(<image columns>*ndisp, <image rows>)`` size.
-
-    :param disparity: Output disparity map. If  ``disparity``  is empty, the output type is  ``CV_16SC1`` . Otherwise, the type is retained.
-
-ocl::StereoConstantSpaceBP
-------------------------------
-.. ocv:class:: ocl::StereoConstantSpaceBP
-
-Class computing stereo correspondence using the constant space belief propagation algorithm. ::
-
-    class CV_EXPORTS StereoConstantSpaceBP
-    {
-    public:
-        enum { DEFAULT_NDISP    = 128 };
-        enum { DEFAULT_ITERS    = 8   };
-        enum { DEFAULT_LEVELS   = 4   };
-        enum { DEFAULT_NR_PLANE = 4   };
-        static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
-        explicit StereoConstantSpaceBP(
-            int ndisp    = DEFAULT_NDISP,
-            int iters    = DEFAULT_ITERS,
-            int levels   = DEFAULT_LEVELS,
-            int nr_plane = DEFAULT_NR_PLANE,
-            int msg_type = CV_32F);
-        StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
-            float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
-            int min_disp_th = 0,
-            int msg_type = CV_32F);
-        void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
-        int ndisp;
-        int iters;
-        int levels;
-        int nr_plane;
-        float max_data_term;
-        float data_weight;
-        float max_disc_term;
-        float disc_single_jump;
-        int min_disp_th;
-        int msg_type;
-        bool use_local_init_data_cost;
-    private:
-        /* hidden */
-    };
-
-The class implements algorithm described in [Yang2010]_. ``StereoConstantSpaceBP`` supports both local minimum and global minimum data cost initialization algorithms. For more details, see the paper mentioned above. By default, a local algorithm is used. To enable a global algorithm, set ``use_local_init_data_cost`` to ``false`` .
-
-
-ocl::StereoConstantSpaceBP::StereoConstantSpaceBP
------------------------------------------------------
-Enables the :ocv:class:`ocl::StereoConstantSpaceBP` constructors.
-
-.. ocv:function:: ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int nr_plane = DEFAULT_NR_PLANE, int msg_type = CV_32F)
-
-.. ocv:function:: ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th = 0, int msg_type = CV_32F)
-
-    :param ndisp: Number of disparities.
-
-    :param iters: Number of BP iterations on each level.
-
-    :param levels: Number of levels.
-
-    :param nr_plane: Number of disparity levels on the first level.
-
-    :param max_data_term: Truncation of data cost.
-
-    :param data_weight: Data weight.
-
-    :param max_disc_term: Truncation of discontinuity.
-
-    :param disc_single_jump: Discontinuity single jump.
-
-    :param min_disp_th: Minimal disparity threshold.
-
-    :param msg_type: Type for messages.  ``CV_16SC1``  and  ``CV_32FC1`` types are supported.
-
-``StereoConstantSpaceBP`` uses a truncated linear model for the data cost and discontinuity terms:
-
-.. math::
-
-    DataCost = data \_ weight  \cdot \min ( \lvert I_2-I_1  \rvert , max \_ data \_ term)
-
-.. math::
-
-    DiscTerm =  \min (disc \_ single \_ jump  \cdot \lvert f_1-f_2  \rvert , max \_ disc \_ term)
-
-For more details, see [Yang2010]_.
-
-By default, ``StereoConstantSpaceBP`` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement:
-
-.. math::
-
-    10  \cdot 2^{levels-1}  \cdot max \_ data \_ term < SHRT \_ MAX
-
-
-
-ocl::StereoConstantSpaceBP::estimateRecommendedParams
----------------------------------------------------------
-Uses a heuristic method to compute parameters (ndisp, iters, levelsand nrplane) for the specified image size (widthand height).
-
-.. ocv:function:: void ocl::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
-
-
-
-ocl::StereoConstantSpaceBP::operator ()
--------------------------------------------
-Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair.
-
-.. ocv:function:: void ocl::StereoConstantSpaceBP::operator ()(const oclMat& left, const oclMat& right, oclMat& disparity)
-
-    :param left: Left image. ``CV_8UC1`` , ``CV_8UC3``  and  ``CV_8UC4``  types are supported.
-
-    :param right: Right image with the same size and the same type as the left one.
-
-    :param disparity: Output disparity map. If  ``disparity``  is empty, the output type is  ``CV_16SC1`` . Otherwise, the output type is  ``disparity.type()`` .
diff --git a/modules/ocl/doc/data_structures.rst b/modules/ocl/doc/data_structures.rst
deleted file mode 100644
index bde3d14..0000000
--- a/modules/ocl/doc/data_structures.rst
+++ /dev/null
@@ -1,189 +0,0 @@
-Data Structures
-=============================
-
-.. ocv:class:: ocl::oclMat
-
-OpenCV C++ 1-D or 2-D dense array class ::
-
-    class CV_EXPORTS oclMat
-    {
-    public:
-        //! default constructor
-        oclMat();
-        //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
-        oclMat(int rows, int cols, int type);
-        oclMat(Size size, int type);
-        //! constucts oclMatrix and fills it with the specified value _s.
-        oclMat(int rows, int cols, int type, const Scalar &s);
-        oclMat(Size size, int type, const Scalar &s);
-        //! copy constructor
-        oclMat(const oclMat &m);
-
-        //! constructor for oclMatrix headers pointing to user-allocated data
-        oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
-        oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
-
-        //! creates a matrix header for a part of the bigger matrix
-        oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
-        oclMat(const oclMat &m, const Rect &roi);
-
-        //! builds oclMat from Mat. Perfom blocking upload to device.
-        explicit oclMat (const Mat &m);
-
-        //! destructor - calls release()
-        ~oclMat();
-
-        //! assignment operators
-        oclMat &operator = (const oclMat &m);
-        //! assignment operator. Perfom blocking upload to device.
-        oclMat &operator = (const Mat &m);
-        oclMat &operator = (const oclMatExpr& expr);
-
-        //! pefroms blocking upload data to oclMat.
-        void upload(const cv::Mat &m);
-
-
-        //! downloads data from device to host memory. Blocking calls.
-        operator Mat() const;
-        void download(cv::Mat &m) const;
-
-        //! convert to _InputArray
-        operator _InputArray();
-
-        //! convert to _OutputArray
-        operator _OutputArray();
-
-        //! returns a new oclMatrix header for the specified row
-        oclMat row(int y) const;
-        //! returns a new oclMatrix header for the specified column
-        oclMat col(int x) const;
-        //! ... for the specified row span
-        oclMat rowRange(int startrow, int endrow) const;
-        oclMat rowRange(const Range &r) const;
-        //! ... for the specified column span
-        oclMat colRange(int startcol, int endcol) const;
-        oclMat colRange(const Range &r) const;
-
-        //! returns deep copy of the oclMatrix, i.e. the data is copied
-        oclMat clone() const;
-
-        //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
-        // It calls m.create(this->size(), this->type()).
-        // It supports any data type
-        void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
-
-        //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
-        void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
-
-        void assignTo( oclMat &m, int type = -1 ) const;
-
-        //! sets every oclMatrix element to s
-        oclMat& operator = (const Scalar &s);
-        //! sets some of the oclMatrix elements to s, according to the mask
-        oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
-        //! creates alternative oclMatrix header for the same data, with different
-        // number of channels and/or different number of rows. see cvReshape.
-        oclMat reshape(int cn, int rows = 0) const;
-
-        //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
-        // previous data is unreferenced if needed.
-        void create(int rows, int cols, int type);
-        void create(Size size, int type);
-
-        //! allocates new oclMatrix with specified device memory type.
-        void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
-        void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
-
-        //! decreases reference counter;
-        // deallocate the data when reference counter reaches 0.
-        void release();
-
-        //! swaps with other smart pointer
-        void swap(oclMat &mat);
-
-        //! locates oclMatrix header within a parent oclMatrix. See below
-        void locateROI( Size &wholeSize, Point &ofs ) const;
-        //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
-        oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
-        //! extracts a rectangular sub-oclMatrix
-        // (this is a generalized form of row, rowRange etc.)
-        oclMat operator()( Range rowRange, Range colRange ) const;
-        oclMat operator()( const Rect &roi ) const;
-
-        oclMat& operator+=( const oclMat& m );
-        oclMat& operator-=( const oclMat& m );
-        oclMat& operator*=( const oclMat& m );
-        oclMat& operator/=( const oclMat& m );
-
-        //! returns true if the oclMatrix data is continuous
-        // (i.e. when there are no gaps between successive rows).
-        // similar to CV_IS_oclMat_CONT(cvoclMat->type)
-        bool isContinuous() const;
-        //! returns element size in bytes,
-        // similar to CV_ELEM_SIZE(cvMat->type)
-        size_t elemSize() const;
-        //! returns the size of element channel in bytes.
-        size_t elemSize1() const;
-        //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
-        int type() const;
-        //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
-        //! 3 channels element actually use 4 channel space
-        int ocltype() const;
-        //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
-        int depth() const;
-        //! returns element type, similar to CV_MAT_CN(cvMat->type)
-        int channels() const;
-        //! returns element type, return 4 for 3 channels element,
-        //!becuase 3 channels element actually use 4 channel space
-        int oclchannels() const;
-        //! returns step/elemSize1()
-        size_t step1() const;
-        //! returns oclMatrix size:
-        // width == number of columns, height == number of rows
-        Size size() const;
-        //! returns true if oclMatrix data is NULL
-        bool empty() const;
-
-        //! matrix transposition
-        oclMat t() const;
-
-        /*! includes several bit-fields:
-          - the magic signature
-          - continuity flag
-          - depth
-          - number of channels
-          */
-        int flags;
-        //! the number of rows and columns
-        int rows, cols;
-        //! a distance between successive rows in bytes; includes the gap if any
-        size_t step;
-        //! pointer to the data(OCL memory object)
-        uchar *data;
-
-        //! pointer to the reference counter;
-        // when oclMatrix points to user-allocated data, the pointer is NULL
-        int *refcount;
-
-        //! helper fields used in locateROI and adjustROI
-        //datastart and dataend are not used in current version
-        uchar *datastart;
-        uchar *dataend;
-
-        //! OpenCL context associated with the oclMat object.
-        Context *clCxt;
-        //add offset for handle ROI, calculated in byte
-        int offset;
-        //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
-        int wholerows;
-        int wholecols;
-    };
-
-Basically speaking, the ``oclMat`` is the mirror of ``Mat`` with the extension of OCL feature, the members have the same meaning and useage of ``Mat`` except following:
-
-* ``datastart`` and ``dataend`` are replaced with ``wholerows`` and ``wholecols``
-
-* Only basic flags are supported in ``oclMat`` (i.e. depth number of channels)
-
-* All the 3-channel matrix (i.e. RGB image) are represented by 4-channel matrix in ``oclMat``. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV ``Mat`` and ``oclMat``.
-    For example: If a ``oclMat`` has 3 channels, ``channels()`` returns 3 and ``oclchannels()`` returns 4
diff --git a/modules/ocl/doc/feature_detection_and_description.rst b/modules/ocl/doc/feature_detection_and_description.rst
deleted file mode 100644
index 9cfabdf..0000000
--- a/modules/ocl/doc/feature_detection_and_description.rst
+++ /dev/null
@@ -1,836 +0,0 @@
-Feature Detection And Description
-=================================
-
-.. highlight:: cpp
-
-ocl::Canny
--------------------
-Finds edges in an image using the [Canny86]_ algorithm.
-
-.. ocv:function:: void ocl::Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
-
-.. ocv:function:: void ocl::Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
-
-.. ocv:function:: void ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
-
-.. ocv:function:: void ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
-
-    :param image: Single-channel 8-bit input image.
-
-    :param dx: First derivative of image in the vertical direction. Support only ``CV_32S`` type.
-
-    :param dy: First derivative of image in the horizontal direction. Support only ``CV_32S`` type.
-
-    :param edges: Output edge map. It has the same size and type as  ``image`` .
-
-    :param low_thresh: First threshold for the hysteresis procedure.
-
-    :param high_thresh: Second threshold for the hysteresis procedure.
-
-    :param apperture_size: Aperture size for the  :ocv:func:`Sobel`  operator.
-
-    :param L2gradient: Flag indicating whether a more accurate  :math:`L_2`  norm  :math:`=\sqrt{(dI/dx)^2 + (dI/dy)^2}`  should be used to compute the image gradient magnitude ( ``L2gradient=true`` ), or a faster default  :math:`L_1`  norm  :math:`=|dI/dx|+|dI/dy|`  is enough ( ``L2gradient=false`` ).
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-.. seealso:: :ocv:func:`Canny`
-
-
-ocl::BruteForceMatcher_OCL_base
------------------------------------
-.. ocv:class:: ocl::BruteForceMatcher_OCL_base
-
-Brute-force descriptor matcher. For each descriptor in the first set, this matcher finds the closest descriptor in the second set by trying each one. This descriptor matcher supports masking permissible matches between descriptor sets. ::
-
-    class BruteForceMatcher_OCL_base
-    {
-    public:
-            enum DistType {L1Dist = 0, L2Dist, HammingDist};
-
-        // Add descriptors to train descriptor collection.
-        void add(const std::vector<oclMat>& descCollection);
-
-        // Get train descriptors collection.
-        const std::vector<oclMat>& getTrainDescriptors() const;
-
-        // Clear train descriptors collection.
-        void clear();
-
-        // Return true if there are no train descriptors in collection.
-        bool empty() const;
-
-        // Return true if the matcher supports mask in match methods.
-        bool isMaskSupported() const;
-
-        void matchSingle(const oclMat& query, const oclMat& train,
-            oclMat& trainIdx, oclMat& distance,
-            const oclMat& mask = oclMat());
-
-        static void matchDownload(const oclMat& trainIdx,
-            const oclMat& distance, std::vector<DMatch>& matches);
-        static void matchConvert(const Mat& trainIdx,
-            const Mat& distance, std::vector<DMatch>& matches);
-
-        void match(const oclMat& query, const oclMat& train,
-            std::vector<DMatch>& matches, const oclMat& mask = oclMat());
-
-        void makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection,
-            const vector<oclMat>& masks = std::vector<oclMat>());
-
-        void matchCollection(const oclMat& query, const oclMat& trainCollection,
-            oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,
-            const oclMat& maskCollection);
-
-        static void matchDownload(const oclMat& trainIdx, oclMat& imgIdx,
-            const oclMat& distance, std::vector<DMatch>& matches);
-        static void matchConvert(const Mat& trainIdx, const Mat& imgIdx,
-            const Mat& distance, std::vector<DMatch>& matches);
-
-        void match(const oclMat& query, std::vector<DMatch>& matches,
-            const std::vector<oclMat>& masks = std::vector<oclMat>());
-
-        void knnMatchSingle(const oclMat& query, const oclMat& train,
-            oclMat& trainIdx, oclMat& distance, oclMat& allDist, int k,
-            const oclMat& mask = oclMat());
-
-        static void knnMatchDownload(const oclMat& trainIdx, const oclMat& distance,
-            std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-        static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
-            std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-
-        void knnMatch(const oclMat& query, const oclMat& train,
-            std::vector< std::vector<DMatch> >& matches, int k,
-            const oclMat& mask = oclMat(), bool compactResult = false);
-
-        void knnMatch2Collection(const oclMat& query, const oclMat& trainCollection,
-            oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,
-            const oclMat& maskCollection = oclMat());
-
-        static void knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance,
-            std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-        static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
-            std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-
-        void knnMatch(const oclMat& query, std::vector< std::vector<DMatch> >& matches, int k,
-            const std::vector<oclMat>& masks = std::vector<oclMat>(),
-            bool compactResult = false);
-
-        void radiusMatchSingle(const oclMat& query, const oclMat& train,
-            oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance,
-            const oclMat& mask = oclMat());
-
-        static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches,
-            std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-        static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
-            std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-
-        void radiusMatch(const oclMat& query, const oclMat& train,
-            std::vector< std::vector<DMatch> >& matches, float maxDistance,
-            const oclMat& mask = oclMat(), bool compactResult = false);
-
-        void radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, oclMat& nMatches, float maxDistance,
-            const std::vector<oclMat>& masks = std::vector<oclMat>());
-
-        static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches,
-            std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-        static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
-            std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-
-        void radiusMatch(const oclMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
-            const std::vector<oclMat>& masks = std::vector<oclMat>(), bool compactResult = false);
-
-                DistType distType;
-
-    private:
-        std::vector<oclMat> trainDescCollection;
-    };
-
-
-The class ``BruteForceMatcher_OCL_base`` has an interface similar to the class :ocv:class:`DescriptorMatcher`. It has two groups of ``match`` methods: for matching descriptors of one image with another image or with an image set. Also, all functions have an alternative to save results either to the GPU memory or to the CPU memory. ``BruteForceMatcher_OCL_base`` supports only the ``L1<float>``, ``L2<float>``, and ``Hamming`` distance types.
-
-.. seealso:: :ocv:class:`DescriptorMatcher`, :ocv:class:`BFMatcher`
-
-
-
-ocl::BruteForceMatcher_OCL_base::match
-------------------------------------------
-Finds the best match for each descriptor from a query set with train descriptors.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, const oclMat& train, std::vector<DMatch>& matches, const oclMat& mask = oclMat())
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat& query, const oclMat& train, oclMat& trainIdx, oclMat& distance, const oclMat& mask = oclMat())
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, std::vector<DMatch>& matches, const std::vector<oclMat>& masks = std::vector<oclMat>())
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::matchCollection( const oclMat& query, const oclMat& trainCollection, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, const oclMat& masks=oclMat() )
-
-.. seealso:: :ocv:func:`DescriptorMatcher::match`
-
-
-
-ocl::BruteForceMatcher_OCL_base::makeGpuCollection
-------------------------------------------------------
-Performs a GPU collection of train descriptors and masks in a suitable format for the :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchCollection` function.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection, const vector<oclMat>& masks = std::vector<oclMat>())
-
-
-ocl::BruteForceMatcher_OCL_base::matchDownload
---------------------------------------------------
-Downloads matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchCollection` to vector with :ocv:class:`DMatch`.
-
-.. ocv:function:: static void ocl::BruteForceMatcher_OCL_base::matchDownload( const oclMat& trainIdx, const oclMat& distance, std::vector<DMatch>& matches )
-
-.. ocv:function:: static void ocl::BruteForceMatcher_OCL_base::matchDownload( const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, std::vector<DMatch>& matches )
-
-
-ocl::BruteForceMatcher_OCL_base::matchConvert
--------------------------------------------------
-Converts matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchCollection` to vector with :ocv:class:`DMatch`.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>&matches)
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>&matches)
-
-
-
-ocl::BruteForceMatcher_OCL_base::knnMatch
----------------------------------------------
-Finds the ``k`` best matches for each descriptor from a query set with train descriptors.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, const oclMat& train, std::vector< std::vector<DMatch> >&matches, int k, const oclMat& mask = oclMat(), bool compactResult = false)
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat& query, const oclMat& train, oclMat& trainIdx, oclMat& distance, oclMat& allDist, int k, const oclMat& mask = oclMat())
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, std::vector< std::vector<DMatch> >&matches, int k, const std::vector<oclMat>&masks = std::vector<oclMat>(), bool compactResult = false )
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat& query, const oclMat& trainCollection, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, const oclMat& maskCollection = oclMat())
-
-    :param query: Query set of descriptors.
-
-    :param train: Training set of descriptors. It is not be added to train descriptors collection stored in the class object.
-
-    :param k: Number of the best matches per each query descriptor (or less if it is not possible).
-
-    :param mask: Mask specifying permissible matches between the input query and train matrices of descriptors.
-
-    :param compactResult: If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
-
-
-The function returns detected ``k`` (or less if not possible) matches in the increasing order by distance.
-
-The third variant of the method stores the results in GPU memory.
-
-.. seealso:: :ocv:func:`DescriptorMatcher::knnMatch`
-
-
-
-ocl::BruteForceMatcher_OCL_base::knnMatchDownload
------------------------------------------------------
-Downloads matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::knnMatchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::knnMatch2Collection` to vector with :ocv:class:`DMatch`.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat& trainIdx, const oclMat& distance, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
-
-If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
-
-
-
-ocl::BruteForceMatcher_OCL_base::knnMatchConvert
-----------------------------------------------------
-Converts matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::knnMatchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::knnMatch2Collection` to CPU vector with :ocv:class:`DMatch`.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
-
-If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
-
-
-
-ocl::BruteForceMatcher_OCL_base::radiusMatch
-------------------------------------------------
-For each query descriptor, finds the best matches with a distance less than a given threshold.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat& query, const oclMat& train, std::vector< std::vector<DMatch> >&matches, float maxDistance, const oclMat& mask = oclMat(), bool compactResult = false)
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat& query, const oclMat& train, oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance, const oclMat& mask = oclMat())
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat& query, std::vector< std::vector<DMatch> >&matches, float maxDistance, const std::vector<oclMat>& masks = std::vector<oclMat>(), bool compactResult = false)
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, oclMat& nMatches, float maxDistance, const std::vector<oclMat>& masks = std::vector<oclMat>())
-
-    :param query: Query set of descriptors.
-
-    :param train: Training set of descriptors. It is not added to train descriptors collection stored in the class object.
-
-    :param maxDistance: Distance threshold.
-
-    :param mask: Mask specifying permissible matches between the input query and train matrices of descriptors.
-
-    :param compactResult: If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
-
-
-The function returns detected matches in the increasing order by distance.
-
-The methods work only on devices with the compute capability  :math:`>=` 1.1.
-
-The third variant of the method stores the results in GPU memory and does not store the points by the distance.
-
-.. seealso:: :ocv:func:`DescriptorMatcher::radiusMatch`
-
-
-
-ocl::BruteForceMatcher_OCL_base::radiusMatchDownload
---------------------------------------------------------
-Downloads matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::radiusMatchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::radiusMatchCollection` to vector with :ocv:class:`DMatch`.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
-
-If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
-
-
-
-
-ocl::BruteForceMatcher_OCL_base::radiusMatchConvert
--------------------------------------------------------
-Converts matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::radiusMatchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::radiusMatchCollection` to vector with :ocv:class:`DMatch`.
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
-
-.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
-
-If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
-
-
-ocl::FAST_OCL
-------------------
-.. ocv:class:: ocl::FAST_OCL
-
-Class used for corner detection using the FAST algorithm. ::
-
-        class CV_EXPORTS FAST_OCL
-        {
-        public:
-            enum
-            {
-                X_ROW = 0,
-                Y_ROW,
-                RESPONSE_ROW,
-                ROWS_COUNT
-            };
-
-            // all features have same size
-            static const int FEATURE_SIZE = 7;
-
-            explicit FAST_OCL(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05);
-
-            //! finds the keypoints using FAST detector
-            //! supports only CV_8UC1 images
-            void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
-            void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints);
-
-            //! download keypoints from device to host memory
-            static void downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-            //! convert keypoints to KeyPoint vector
-            static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
-
-            //! release temporary buffer's memory
-            void release();
-
-            bool nonmaxSupression;
-
-            int threshold;
-
-            //! max keypoints = keypointsRatio * img.size().area()
-            double keypointsRatio;
-
-            //! find keypoints and compute it's response if nonmaxSupression is true
-            //! return count of detected keypoints
-            int calcKeyPointsLocation(const oclMat& image, const oclMat& mask);
-
-            //! get final array of keypoints
-            //! performs nonmax supression if needed
-            //! return final count of keypoints
-            int getKeyPoints(oclMat& keypoints);
-
-        private:
-            // Hidden
-        };
-
-
-The class ``FAST_OCL`` implements FAST corner detection algorithm.
-
-.. seealso:: :ocv:func:`FAST`
-
-
-
-ocl::FAST_OCL::FAST_OCL
---------------------------
-Constructor.
-
-.. ocv:function:: ocl::FAST_OCL::FAST_OCL(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05)
-
-    :param threshold: Threshold on difference between intensity of the central pixel and pixels on a circle around this pixel.
-
-    :param nonmaxSupression: If it is true, non-maximum suppression is applied to detected corners (keypoints).
-
-    :param keypointsRatio: Inner buffer size for keypoints store is determined as (keypointsRatio * image_width * image_height).
-
-
-
-ocl::FAST_OCL::operator ()
-----------------------------
-Finds the keypoints using FAST detector.
-
-.. ocv:function:: void ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints)
-.. ocv:function:: void ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
-
-    :param image: Image where keypoints (corners) are detected. Only 8-bit grayscale images are supported.
-
-    :param mask: Optional input mask that marks the regions where we should detect features.
-
-    :param keypoints: The output vector of keypoints. Can be stored both in host or device memory. For device memory:
-
-            * X_ROW of keypoints will contain the horizontal coordinate of the i'th point
-            * Y_ROW of keypoints will contain the vertical coordinate of the i'th point
-            * RESPONSE_ROW will contain response of i'th point (if non-maximum suppression is applied)
-
-
-
-ocl::FAST_OCL::downloadKeypoints
-----------------------------------
-Download keypoints from device to host memory.
-
-.. ocv:function:: void ocl::FAST_OCL::downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints)
-
-
-
-ocl::FAST_OCL::convertKeypoints
----------------------------------
-Converts keypoints from OpenCL representation to vector of ``KeyPoint``.
-
-.. ocv:function:: void ocl::FAST_OCL::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
-
-
-
-ocl::FAST_OCL::release
-------------------------
-Releases inner buffer memory.
-
-.. ocv:function:: void ocl::FAST_OCL::release()
-
-
-
-ocl::FAST_OCL::calcKeyPointsLocation
---------------------------------------
-Find keypoints. If ``nonmaxSupression`` is true, responses are computed and eliminates keypoints with the smaller responses from 9-neighborhood regions.
-
-.. ocv:function:: int ocl::FAST_OCL::calcKeyPointsLocation(const oclMat& image, const oclMat& mask)
-
-    :param image: Image where keypoints (corners) are detected. Only 8-bit grayscale images are supported.
-
-    :param mask: Optional input mask that marks the regions where we should detect features.
-
-The function returns the amount of detected keypoints.
-
-
-
-ocl::FAST_OCL::getKeyPoints
------------------------------
-Gets final array of keypoints.
-
-.. ocv:function:: int ocl::FAST_OCL::getKeyPoints(oclMat& keypoints)
-
-    :param keypoints: The output vector of keypoints.
-
-The function performs non-max suppression if needed and returns the final amount of keypoints.
-
-ocl::BRIEF_OCL
-------------------
-.. ocv:class:: ocl::BRIEF_OCL
-
-Class for computing BRIEF descriptors described in a paper of Calonder M., Lepetit V.,
-Strecha C., Fua P. *BRIEF: Binary Robust Independent Elementary Features* ,
-11th European Conference on Computer Vision (ECCV), Heraklion, Crete. LNCS Springer, September 2010. ::
-
-        class CV_EXPORTS BRIEF_OCL
-        {
-        public:
-            static const int PATCH_SIZE = 48;
-            static const int KERNEL_SIZE = 9;
-
-            explicit BRIEF_OCL(int _bytes = 32);
-
-            //!computes the brief descriptor for a set of given keypoints
-            //! supports only CV_8UC1 images
-            void compute(const oclMat& image, const oclMat& keypoints, oclMat& mask, oclMat& descriptors) const;
-
-            static int getBorderSize();
-        protected:
-            ...
-        };
-
-ocl::BRIEF_OCL::BRIEF_OCL
---------------------------
-Constructor.
-
-.. ocv:function:: ocl::BRIEF_OCL::BRIEF_OCL(int _bytes = 32)
-
-    :param bytes: The length of the descriptor in bytes. Supported values are 16, 32 or 64 bytes.
-
-ocl::BRIEF_OCL::compute
-------------------------
-Computes BRIEF descriptors.
-
-.. ocv:function:: void ocl::BRIEF_OCL::compute(const oclMat& image, const oclMat& keypoints, oclMat& mask, oclMat& descriptors) const
-
-    :param image: Image The input 8-bit grayscale image.
-
-    :param keypoints: The keypoints.
-
-    :param mask: In and output mask. If mask has same cols as keypoints, descriptors are computed for keypoints with non-zero mask element.
-                On return it indicates for what keypoints a descriptor was computed or not(if a keypoint is near the image border).
-
-    :param descriptors: The computed descriptors. It has size keypoints.cols x bytes.
-
-ocl::BRIEF_OCL::getBorderSize
------------------------------
-Returns the size of the image border where descriptors cannot be computed
-
-.. ocv:function:: static int ocl::BRIEF_OCL::getBorderSize() const
-
-ocl::HOGDescriptor
-----------------------
-
-.. ocv:struct:: ocl::HOGDescriptor
-
-The class implements Histogram of Oriented Gradients ([Dalal2005]_) object detector. ::
-
-    struct CV_EXPORTS HOGDescriptor
-    {
-        enum { DEFAULT_WIN_SIGMA = -1 };
-        enum { DEFAULT_NLEVELS = 64 };
-        enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
-
-        HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
-                      Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
-                      int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
-                      double threshold_L2hys=0.2, bool gamma_correction=true,
-                      int nlevels=DEFAULT_NLEVELS);
-
-        size_t getDescriptorSize() const;
-        size_t getBlockHistogramSize() const;
-
-        void setSVMDetector(const vector<float>& detector);
-
-        static vector<float> getDefaultPeopleDetector();
-        static vector<float> getPeopleDetector48x96();
-        static vector<float> getPeopleDetector64x128();
-
-        void detect(const oclMat& img, vector<Point>& found_locations,
-                    double hit_threshold=0, Size win_stride=Size(),
-                    Size padding=Size());
-
-        void detectMultiScale(const oclMat& img, vector<Rect>& found_locations,
-                              double hit_threshold=0, Size win_stride=Size(),
-                              Size padding=Size(), double scale0=1.05,
-                              int group_threshold=2);
-
-        void getDescriptors(const oclMat& img, Size win_stride,
-                            oclMat& descriptors,
-                            int descr_format=DESCR_FORMAT_COL_BY_COL);
-
-        Size win_size;
-        Size block_size;
-        Size block_stride;
-        Size cell_size;
-        int nbins;
-        double win_sigma;
-        double threshold_L2hys;
-        bool gamma_correction;
-        int nlevels;
-
-    private:
-        // Hidden
-    }
-
-
-Interfaces of all methods are kept similar to the ``CPU HOG`` descriptor and detector analogues as much as possible.
-
-.. note::
-
-   (Ocl) An example using the HOG descriptor can be found at opencv_source_code/samples/ocl/hog.cpp
-
-ocl::HOGDescriptor::HOGDescriptor
--------------------------------------
-Creates the ``HOG`` descriptor and detector.
-
-.. ocv:function:: ocl::HOGDescriptor::HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA, double threshold_L2hys=0.2, bool gamma_correction=true, int nlevels=DEFAULT_NLEVELS)
-
-   :param win_size: Detection window size. Align to block size and block stride.
-
-   :param block_size: Block size in pixels. Align to cell size. Only (16,16) is supported for now.
-
-   :param block_stride: Block stride. It must be a multiple of cell size.
-
-   :param cell_size: Cell size. Only (8, 8) is supported for now.
-
-   :param nbins: Number of bins. Only 9 bins per cell are supported for now.
-
-   :param win_sigma: Gaussian smoothing window parameter.
-
-   :param threshold_L2hys: L2-Hys normalization method shrinkage.
-
-   :param gamma_correction: Flag to specify whether the gamma correction preprocessing is required or not.
-
-   :param nlevels: Maximum number of detection window increases.
-
-
-
-ocl::HOGDescriptor::getDescriptorSize
------------------------------------------
-Returns the number of coefficients required for the classification.
-
-.. ocv:function:: size_t ocl::HOGDescriptor::getDescriptorSize() const
-
-
-
-ocl::HOGDescriptor::getBlockHistogramSize
----------------------------------------------
-Returns the block histogram size.
-
-.. ocv:function:: size_t ocl::HOGDescriptor::getBlockHistogramSize() const
-
-
-
-ocl::HOGDescriptor::setSVMDetector
---------------------------------------
-Sets coefficients for the linear SVM classifier.
-
-.. ocv:function:: void ocl::HOGDescriptor::setSVMDetector(const vector<float>& detector)
-
-
-
-ocl::HOGDescriptor::getDefaultPeopleDetector
-------------------------------------------------
-Returns coefficients of the classifier trained for people detection (for default window size).
-
-.. ocv:function:: static vector<float> ocl::HOGDescriptor::getDefaultPeopleDetector()
-
-
-
-ocl::HOGDescriptor::getPeopleDetector48x96
-----------------------------------------------
-Returns coefficients of the classifier trained for people detection (for 48x96 windows).
-
-.. ocv:function:: static vector<float> ocl::HOGDescriptor::getPeopleDetector48x96()
-
-
-
-ocl::HOGDescriptor::getPeopleDetector64x128
------------------------------------------------
-Returns coefficients of the classifier trained for people detection (for 64x128 windows).
-
-.. ocv:function:: static vector<float> ocl::HOGDescriptor::getPeopleDetector64x128()
-
-
-
-ocl::HOGDescriptor::detect
-------------------------------
-Performs object detection without a multi-scale window.
-
-.. ocv:function:: void ocl::HOGDescriptor::detect(const oclMat& img, vector<Point>& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size())
-
-   :param img: Source image.  ``CV_8UC1``  and  ``CV_8UC4`` types are supported for now.
-
-   :param found_locations: Left-top corner points of detected objects boundaries.
-
-   :param hit_threshold: Threshold for the distance between features and SVM classifying plane. Usually it is 0 and should be specfied in the detector coefficients (as the last free coefficient). But if the free coefficient is omitted (which is allowed), you can specify it manually here.
-
-   :param win_stride: Window stride. It must be a multiple of block stride.
-
-   :param padding: Mock parameter to keep the CPU interface compatibility. It must be (0,0).
-
-
-
-ocl::HOGDescriptor::detectMultiScale
-----------------------------------------
-Performs object detection with a multi-scale window.
-
-.. ocv:function:: void ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size(), double scale0=1.05, int group_threshold=2)
-
-   :param img: Source image. See  :ocv:func:`ocl::HOGDescriptor::detect`  for type limitations.
-
-   :param found_locations: Detected objects boundaries.
-
-   :param hit_threshold: Threshold for the distance between features and SVM classifying plane. See  :ocv:func:`ocl::HOGDescriptor::detect`  for details.
-
-   :param win_stride: Window stride. It must be a multiple of block stride.
-
-   :param padding: Mock parameter to keep the CPU interface compatibility. It must be (0,0).
-
-   :param scale0: Coefficient of the detection window increase.
-
-   :param group_threshold: Coefficient to regulate the similarity threshold. When detected, some objects can be covered by many rectangles. 0 means not to perform grouping. See  :ocv:func:`groupRectangles` .
-
-
-
-ocl::HOGDescriptor::getDescriptors
---------------------------------------
-Returns block descriptors computed for the whole image.
-
-.. ocv:function:: void ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride, oclMat& descriptors, int descr_format=DESCR_FORMAT_COL_BY_COL)
-
-   :param img: Source image. See  :ocv:func:`ocl::HOGDescriptor::detect`  for type limitations.
-
-   :param win_stride: Window stride. It must be a multiple of block stride.
-
-   :param descriptors: 2D array of descriptors.
-
-   :param descr_format: Descriptor storage format:
-
-        * **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
-
-        * **DESCR_FORMAT_COL_BY_COL** - Column-major order.
-
-The function is mainly used to learn the classifier.
-
-
-
-ocl::ORB_OCL
---------------
-.. ocv:class:: ocl::ORB_OCL
-
-Class for extracting ORB features and descriptors from an image. ::
-
-    class ORB_OCL
-    {
-    public:
-        enum
-        {
-            X_ROW = 0,
-            Y_ROW,
-            RESPONSE_ROW,
-            ANGLE_ROW,
-            OCTAVE_ROW,
-            SIZE_ROW,
-            ROWS_COUNT
-        };
-
-        enum
-        {
-            DEFAULT_FAST_THRESHOLD = 20
-        };
-
-        explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f,
-                         int nLevels = 8, int edgeThreshold = 31,
-                         int firstLevel = 0, int WTA_K = 2,
-                         int scoreType = 0, int patchSize = 31);
-
-        void operator()(const oclMat& image, const oclMat& mask,
-                        std::vector<KeyPoint>& keypoints);
-        void operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
-
-        void operator()(const oclMat& image, const oclMat& mask,
-                        std::vector<KeyPoint>& keypoints, oclMat& descriptors);
-        void operator()(const oclMat& image, const oclMat& mask,
-                        oclMat& keypoints, oclMat& descriptors);
-
-        void downloadKeyPoints(oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-        void convertKeyPoints(Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-        int descriptorSize() const;
-        int descriptorType() const;
-        int defaultNorm() const;
-
-        void setFastParams(int threshold, bool nonmaxSupression = true);
-
-        void release();
-
-        bool blurForDescriptor;
-    };
-
-The class implements ORB feature detection and description algorithm.
-
-
-
-ocl::ORB_OCL::ORB_OCL
-------------------------
-Constructor.
-
-.. ocv:function:: ocl::ORB_OCL::ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31)
-
-    :param nfeatures: The maximum number of features to retain.
-
-    :param scaleFactor: Pyramid decimation ratio, greater than 1. ``scaleFactor==2`` means the classical pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor will mean that to cover certain scale range you will need more pyramid levels and so the speed will suffer.
-
-    :param nlevels: The number of pyramid levels. The smallest level will have linear size equal to ``input_image_linear_size/pow(scaleFactor, nlevels)``.
-
-    :param edgeThreshold: This is size of the border where the features are not detected. It should roughly match the ``patchSize`` parameter.
-
-    :param firstLevel: It should be 0 in the current implementation.
-
-    :param WTA_K: The number of points that produce each element of the oriented BRIEF descriptor. The default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 random points (of course, those point coordinates are random, but they are generated from the pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, denoted as ``NORM_HAMMING2`` (2 bits per bin).  When ``WTA_K=4``, we take 4 random points to compute each bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3).
-
-    :param scoreType: The default HARRIS_SCORE means that Harris algorithm is used to rank features (the score is written to ``KeyPoint::score`` and is used to retain best ``nfeatures`` features); FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints, but it is a little faster to compute.
-
-    :param patchSize: size of the patch used by the oriented BRIEF descriptor. Of course, on smaller pyramid layers the perceived image area covered by a feature will be larger.
-
-
-
-ocl::ORB_OCL::operator()
---------------------------
-Detects keypoints and computes descriptors for them.
-
-.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
-
-.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints)
-
-.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors)
-
-.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors)
-
-    :param image: Input 8-bit grayscale image.
-
-    :param mask: Optional input mask that marks the regions where we should detect features.
-
-    :param keypoints: The input/output vector of keypoints. Can be stored both in host and device memory. For device memory:
-
-            * ``X_ROW`` contains the horizontal coordinate of the i'th feature.
-            * ``Y_ROW`` contains the vertical coordinate of the i'th feature.
-            * ``RESPONSE_ROW`` contains the response of the i'th feature.
-            * ``ANGLE_ROW`` contains the orientation of the i'th feature.
-            * ``RESPONSE_ROW`` contains the octave of the i'th feature.
-            * ``ANGLE_ROW`` contains the size of the i'th feature.
-
-    :param descriptors: Computed descriptors. if ``blurForDescriptor`` is true, image will be blurred before descriptors calculation.
-
-
-
-ocl::ORB_OCL::downloadKeyPoints
----------------------------------
-Download keypoints from device to host memory.
-
-.. ocv:function:: static void ocl::ORB_OCL::downloadKeyPoints( const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints )
-
-
-
-ocl::ORB_OCL::convertKeyPoints
---------------------------------
-Converts keypoints from OCL representation to vector of ``KeyPoint``.
-
-.. ocv:function:: static void ocl::ORB_OCL::convertKeyPoints( const Mat& d_keypoints, std::vector<KeyPoint>& keypoints )
-
-
-
-ocl::ORB_OCL::release
------------------------
-Releases inner buffer memory.
-
-.. ocv:function:: void ocl::ORB_OCL::release()
diff --git a/modules/ocl/doc/image_filtering.rst b/modules/ocl/doc/image_filtering.rst
deleted file mode 100644
index 6fbc19a..0000000
--- a/modules/ocl/doc/image_filtering.rst
+++ /dev/null
@@ -1,719 +0,0 @@
-Image Filtering
-=============================
-
-.. highlight:: cpp
-
-ocl::BaseRowFilter_GPU
---------------------------
-.. ocv:class:: ocl::BaseRowFilter_GPU
-
-Base class for linear or non-linear filters that processes rows of 2D arrays. Such filters are used for the "horizontal" filtering passes in separable filters. ::
-
-    class CV_EXPORTS BaseRowFilter_GPU
-    {
-    public:
-        BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
-        virtual ~BaseRowFilter_GPU() {}
-        virtual void operator()(const oclMat &src, oclMat &dst) = 0;
-        int ksize, anchor, bordertype;
-    };
-
-.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`ocl::FilterEngine_GPU`.
-
-ocl::BaseColumnFilter_GPU
------------------------------
-.. ocv:class:: ocl::BaseColumnFilter_GPU
-
-Base class for linear or non-linear filters that processes columns of 2D arrays. Such filters are used for the "vertical" filtering passes in separable filters. ::
-
-    class CV_EXPORTS BaseColumnFilter_GPU
-    {
-    public:
-        BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
-        virtual ~BaseColumnFilter_GPU() {}
-        virtual void operator()(const oclMat &src, oclMat &dst) = 0;
-        int ksize, anchor, bordertype;
-    };
-
-.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`ocl::FilterEngine_GPU`.
-
-ocl::BaseFilter_GPU
------------------------
-.. ocv:class:: ocl::BaseFilter_GPU
-
-Base class for non-separable 2D filters. ::
-
-    class CV_EXPORTS BaseFilter_GPU
-    {
-    public:
-        BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
-            : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
-        virtual ~BaseFilter_GPU() {}
-        virtual void operator()(const oclMat &src, oclMat &dst) = 0;
-        Size ksize;
-        Point anchor;
-        int borderType;
-    };
-
-.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`ocl::FilterEngine_GPU`
-
-ocl::FilterEngine_GPU
-------------------------
-.. ocv:class:: ocl::FilterEngine_GPU
-
-Base class for the Filter Engine. ::
-
-    class CV_EXPORTS FilterEngine_GPU
-    {
-    public:
-        virtual ~FilterEngine_GPU() {}
-
-        virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
-    };
-
-The class can be used to apply an arbitrary filtering operation to an image. It contains all the necessary intermediate buffers. Pointers to the initialized ``FilterEngine_GPU`` instances are returned by various ``create*Filter_GPU`` functions (see below), and they are used inside high-level functions such as :ocv:func:`ocl::filter2D`, :ocv:func:`ocl::erode`, :ocv:func:`ocl::Sobel` , and others.
-
-By using ``FilterEngine_GPU`` instead of functions you can avoid unnecessary memory allocation for intermediate buffers and get better performance: ::
-
-    while (...)
-    {
-        ocl::oclMat src = getImg();
-        ocl::oclMat dst;
-        // Allocate and release buffers at each iterations
-        ocl::GaussianBlur(src, dst, ksize, sigma1);
-    }
-
-    // Allocate buffers only once
-    cv::Ptr<ocl::FilterEngine_GPU> filter =
-        ocl::createGaussianFilter_GPU(CV_8UC4, ksize, sigma1);
-    while (...)
-    {
-        ocl::oclMat src = getImg();
-        ocl::oclMat dst;
-        filter->apply(src, dst, cv::Rect(0, 0, src.cols, src.rows));
-    }
-    // Release buffers only once
-    filter.release();
-
-
-``FilterEngine_GPU`` can process a rectangular sub-region of an image. By default, if ``roi == Rect(0,0,-1,-1)`` , ``FilterEngine_GPU`` processes the inner region of an image ( ``Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height)`` ) because some filters do not check whether indices are outside the image for better performance. See below to understand which filters support processing the whole image and which do not and identify image type limitations.
-
-.. note:: The GPU filters do not support the in-place mode.
-
-.. seealso:: :ocv:class:`ocl::BaseRowFilter_GPU`, :ocv:class:`ocl::BaseColumnFilter_GPU`, :ocv:class:`ocl::BaseFilter_GPU`, :ocv:func:`ocl::createFilter2D_GPU`, :ocv:func:`ocl::createSeparableFilter_GPU`, :ocv:func:`ocl::createBoxFilter_GPU`, :ocv:func:`ocl::createMorphologyFilter_GPU`, :ocv:func:`ocl::createLinearFilter_GPU`, :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`ocl::createDerivFilter_GPU`, :ocv:func:`ocl::createGaussianFilter_GPU`
-
-ocl::createFilter2D_GPU
----------------------------
-Creates a non-separable filter engine with the specified filter.
-
-.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createFilter2D_GPU( const Ptr<BaseFilter_GPU> filter2D)
-
-    :param filter2D: Non-separable 2D filter.
-
-Usually this function is used inside such high-level functions as :ocv:func:`ocl::createLinearFilter_GPU`, :ocv:func:`ocl::createBoxFilter_GPU`.
-
-
-ocl::createSeparableFilter_GPU
-----------------------------------
-Creates a separable filter engine with the specified filters.
-
-.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter, const Ptr<BaseColumnFilter_GPU> &columnFilter)
-
-    :param rowFilter: "Horizontal" 1D filter.
-
-    :param columnFilter: "Vertical" 1D filter.
-
-Usually this function is used inside such high-level functions as :ocv:func:`ocl::createSeparableLinearFilter_GPU`.
-
-ocl::createBoxFilter_GPU
-----------------------------
-Creates a normalized 2D box filter.
-
-.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createBoxFilter_GPU(int srcType, int dstType, const Size &ksize, const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT)
-
-.. ocv:function:: Ptr<BaseFilter_GPU> ocl::getBoxFilter_GPU(int srcType, int dstType, const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT)
-
-    :param srcType: Input image type.
-
-    :param dstType: Output image type.  It supports only the same values as the source type.
-
-    :param ksize: Kernel size.
-
-    :param anchor: Anchor point. The default value ``Point(-1, -1)`` means that the anchor is at the kernel center.
-
-    :param borderType: Border type.
-
-.. seealso:: :ocv:func:`boxFilter`
-
-ocl::boxFilter
-------------------
-Smooths the image using the normalized box filter.
-
-.. ocv:function:: void ocl::boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT)
-
-    :param src: Input image.
-
-    :param dst: Output image type. The size and type is the same as ``src`` .
-
-    :param ddepth: Desired depth of the destination image. If it is negative, it is the same as  ``src.depth()`` . It supports only the same depth as the source image depth.
-
-    :param ksize: Kernel size.
-
-    :param anchor: Anchor point. The default value ``Point(-1, -1)`` means that the anchor is at the kernel center.
-
-    :param borderType: Border type.
-
-Smoothes image using box filter.
-
-ocl::blur
--------------
-Acts as a synonym for the normalized box filter.
-
-.. ocv:function:: void ocl::blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1), int borderType = BORDER_CONSTANT)
-
-    :param src: Input image.
-
-    :param dst: Output image type with the same size and type as  ``src`` .
-
-    :param ksize: Kernel size.
-
-    :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center.
-
-    :param borderType: Border type.
-
-.. seealso:: :ocv:func:`blur`, :ocv:func:`ocl::boxFilter`
-
-ocl::createMorphologyFilter_GPU
------------------------------------
-Creates a 2D morphological filter.
-
-.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Point &anchor = Point(-1, -1), int iterations = 1)
-
-.. ocv:function:: Ptr<BaseFilter_GPU> ocl::getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize, Point anchor = Point(-1, -1))
-
-    :param op: Morphology operation id. Only ``MORPH_ERODE`` and ``MORPH_DILATE`` are supported.
-
-    :param type: Input/output image type. Only  ``CV_8UC1``  and  ``CV_8UC4``  are supported.
-
-    :param kernel: 2D 8-bit structuring element for the morphological operation.
-
-    :param ksize: Size of a horizontal or vertical structuring element used for separable morphological operations.
-
-    :param anchor: Anchor position within the structuring element. Negative values mean that the anchor is at the center.
-
-.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it.
-
-.. seealso:: :ocv:func:`createMorphologyFilter`
-
-ocl::createLinearFilter_GPU
--------------------------------
-Creates a non-separable linear filter.
-
-.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT)
-
-    :param srcType: Input image type..
-
-    :param dstType: Output image type. The same type as ``src`` is supported.
-
-    :param kernel: 2D array of filter coefficients.
-
-    :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center.
-
-    :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` .
-
-.. seealso:: :ocv:func:`createLinearFilter`
-
-
-ocl::filter2D
------------------
-Applies the non-separable 2D linear filter to an image.
-
-.. ocv:function:: void ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor = Point(-1, -1), double delta = 0.0, int borderType = BORDER_DEFAULT)
-
-    :param src: Source image.
-
-    :param dst: Destination image. The size and the number of channels is the same as  ``src`` .
-
-    :param ddepth: Desired depth of the destination image. If it is negative, it is the same as  ``src.depth()`` . It supports only the same depth as the source image depth.
-
-    :param kernel: 2D array of filter coefficients.
-
-    :param anchor: Anchor of the kernel that indicates the relative position of a filtered point within the kernel. The anchor resides within the kernel. The special default value (-1,-1) means that the anchor is at the kernel center.
-
-    :param delta: optional value added to the filtered pixels before storing them in ``dst``. Value '0' is supported only.
-
-    :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` .
-
-ocl::getLinearRowFilter_GPU
--------------------------------
-Creates a primitive row filter with the specified kernel.
-
-.. ocv:function:: Ptr<BaseRowFilter_GPU> ocl::getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel, int anchor = -1, int bordertype = BORDER_DEFAULT)
-
-    :param srcType: Source array type. Only  ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1``  source types are supported.
-
-    :param bufType: Intermediate buffer type with as many channels as  ``srcType`` .
-
-    :param rowKernel: Filter coefficients. Support kernels with ``size <= 16`` .
-
-    :param anchor: Anchor position within the kernel. Negative values mean that the anchor is positioned at the aperture center.
-
-    :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
-
-.. seealso:: :ocv:func:`createSeparableLinearFilter` .
-
-
-ocl::getLinearColumnFilter_GPU
-----------------------------------
-Creates a primitive column filter with the specified kernel.
-
-.. ocv:function:: Ptr<BaseColumnFilter_GPU> ocl::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel, int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0)
-
-    :param bufType: Intermediate buffer type with as many channels as  ``dstType`` .
-
-    :param dstType: Destination array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` destination types are supported.
-
-    :param columnKernel: Filter coefficients. Support kernels with ``size <= 16`` .
-
-    :param anchor: Anchor position within the kernel. Negative values mean that the anchor is positioned at the aperture center.
-
-    :param bordertype: Pixel extrapolation method. For details, see  :ocv:func:`borderInterpolate` .
-
-    :param delta: default value is 0.0.
-
-.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
-
-ocl::createSeparableLinearFilter_GPU
-----------------------------------------
-Creates a separable linear filter engine.
-
-.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
-
-    :param srcType: Source array type.  ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1``  source types are supported.
-
-    :param dstType: Destination array type.  ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1``  destination types are supported.
-
-    :param rowKernel: Horizontal filter coefficients. Support kernels with ``size <= 16`` .
-
-    :param columnKernel: Vertical filter coefficients. Support kernels with ``size <= 16`` .
-
-    :param anchor: Anchor position within the kernel. Negative values mean that anchor is positioned at the aperture center.
-
-    :param delta: default value is 0.0.
-
-    :param bordertype: Pixel extrapolation method.
-
-    :param imgSize: Source image size to choose optimal method for processing.
-
-.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`ocl::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
-
-
-ocl::sepFilter2D
---------------------
-Applies a separable 2D linear filter to an image.
-
-.. ocv:function:: void ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT)
-
-    :param src: Source image.  ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1``  source types are supported.
-
-    :param dst: Destination image with the same size and number of channels as  ``src`` .
-
-    :param ddepth: Destination image depth.  ``CV_8U`` , ``CV_16S`` , ``CV_32S`` , and  ``CV_32F`` are supported.
-
-    :param kernelX: Horizontal filter coefficients.
-
-    :param kernelY: Vertical filter coefficients.
-
-    :param anchor: Anchor position within the kernel. The default value ``(-1, 1)`` means that the anchor is at the kernel center.
-
-    :param delta: default value is 0.0.
-
-    :param bordertype: Pixel extrapolation method. For details, see  :ocv:func:`borderInterpolate`.
-
-.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`sepFilter2D`
-
-ocl::createDerivFilter_GPU
-------------------------------
-Creates a filter engine for the generalized Sobel operator.
-
-.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
-
-    :param srcType: Source image type.  ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1``  source types are supported.
-
-    :param dstType: Destination image type with as many channels as  ``srcType`` ,  ``CV_8U`` , ``CV_16S`` , ``CV_32S`` , and  ``CV_32F``  depths are supported.
-
-    :param dx: Derivative order in respect of x.
-
-    :param dy: Derivative order in respect of y.
-
-    :param ksize: Aperture size. See  :ocv:func:`getDerivKernels` for details.
-
-    :param borderType: Pixel extrapolation method. For details, see  :ocv:func:`borderInterpolate`.
-
-    :param imgSize: Source image size to choose optimal method for processing.
-
-.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter`
-
-
-ocl::Sobel
-------------------
-Returns void
-
-.. ocv:function:: void ocl::Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT)
-
-    :param src: The source image
-
-    :param dst: The destination image; It will have the same size as src
-
-    :param ddepth: The destination image depth
-
-    :param dx: Order of the derivative x
-
-    :param dy: Order of the derivative y
-
-    :param ksize: Size of the extended Sobel kernel
-
-    :param scale: The optional scale factor for the computed derivative values(by default, no scaling is applied)
-
-    :param delta: The optional delta value, added to the results prior to storing them in dst
-
-    :param bordertype: Pixel extrapolation method.
-
-The function computes the first x- or y- spatial image derivative using Sobel operator. Surpport 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 data type.
-
-ocl::Scharr
-------------------
-Returns void
-
-.. ocv:function:: void ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT)
-
-    :param src: The source image
-
-    :param dst: The destination image; It will have the same size as src
-
-    :param ddepth: The destination image depth
-
-    :param dx: Order of the derivative x
-
-    :param dy: Order of the derivative y
-
-    :param scale: The optional scale factor for the computed derivative values(by default, no scaling is applied)
-
-    :param delta: The optional delta value, added to the results prior to storing them in dst
-
-    :param bordertype: Pixel extrapolation method.
-
-The function computes the first x- or y- spatial image derivative using Scharr operator. Surpport 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 data type.
-
-ocl::createGaussianFilter_GPU
----------------------------------
-Creates a Gaussian filter engine.
-
-.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
-
-    :param type: Source and destination image type.  ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported.
-
-    :param ksize: Aperture size. See  :ocv:func:`getGaussianKernel` for details.
-
-    :param sigma1: Gaussian sigma in the horizontal direction. See  :ocv:func:`getGaussianKernel` for details.
-
-    :param sigma2: Gaussian sigma in the vertical direction. If 0, then  :math:`\texttt{sigma2}\leftarrow\texttt{sigma1}` .
-
-    :param bordertype: Pixel extrapolation method. For details, see  :ocv:func:`borderInterpolate`.
-
-    :param imgSize: Source image size to choose optimal method for processing.
-
-.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter`
-
-ocl::GaussianBlur
----------------------
-Returns void
-
-.. ocv:function:: void ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT)
-
-    :param src: The source image
-
-    :param dst: The destination image; It will have the same size and the same type as src
-
-    :param ksize: The Gaussian kernel size; ksize.width and ksize.height can differ, but they both must be positive and odd. Or, they can be zero's, then they are computed from sigma
-
-    :param sigma1sigma2: The Gaussian kernel standard deviations in X and Y direction. If sigmaY is zero, it is set to be equal to sigmaX. If they are both zeros, they are computed from ksize.width and ksize.height. To fully control the result regardless of possible future modification of all this semantics, it is recommended to specify all of ksize, sigmaX and sigmaY
-
-    :param bordertype: Pixel extrapolation method.
-
-The function convolves the source image with the specified Gaussian kernel. In-place filtering is supported.  Surpport 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 data type.
-
-ocl::Laplacian
-------------------
-Returns void
-
-.. ocv:function:: void ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1, double delta = 0, int borderType = BORDER_DEFAULT)
-
-    :param src: The source image
-
-    :param dst: The destination image; It will have the same size and the same type as src
-
-    :param ddepth: The desired depth of the destination image
-
-    :param ksize: The aperture size used to compute the second-derivative filters. It must be positive and odd
-
-    :param scale: The optional scale factor for the computed Laplacian values (by default, no scaling is applied
-
-    :param delta: Optional delta value that is added to the results prior to storing them in  ``dst`` . Supported value is 0 only.
-
-    :param bordertype: Pixel extrapolation method.
-
-The function calculates the Laplacian of the source image by adding up the second x and y derivatives calculated using the Sobel operator.
-
-ocl::ConvolveBuf
-----------------
-.. ocv:struct:: ocl::ConvolveBuf
-
-Class providing a memory buffer for :ocv:func:`ocl::convolve` function, plus it allows to adjust some specific parameters. ::
-
-    struct CV_EXPORTS ConvolveBuf
-    {
-        Size result_size;
-        Size block_size;
-        Size user_block_size;
-        Size dft_size;
-        int spect_len;
-
-        oclMat image_spect, templ_spect, result_spect;
-        oclMat image_block, templ_block, result_data;
-
-        void create(Size image_size, Size templ_size);
-        static Size estimateBlockSize(Size result_size, Size templ_size);
-    };
-
-You can use field `user_block_size` to set specific block size for :ocv:func:`ocl::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
-
-ocl::ConvolveBuf::create
-------------------------
-.. ocv:function:: ocl::ConvolveBuf::create(Size image_size, Size templ_size)
-
-Constructs a buffer for :ocv:func:`ocl::convolve` function with respective arguments.
-
-ocl::convolve
-------------------
-Returns void
-
-.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr=false)
-
-.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf)
-
-    :param image: The source image. Only  ``CV_32FC1`` images are supported for now.
-
-    :param temp1: Convolution kernel, a single-channel floating point matrix. The size is not greater than the  ``image`` size. The type is the same as  ``image``.
-
-    :param result: The destination image
-
-    :param ccorr: Flags to evaluate cross-correlation instead of convolution.
-
-    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`ocl::ConvolveBuf`.
-
-Convolves an image with the kernel. Supports only CV_32FC1 data types and do not support ROI.
-
-ocl::bilateralFilter
-------------------------
-Returns void
-
-.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT)
-
-    :param src: The source image
-
-    :param dst: The destination image; will have the same size and the same type as src
-
-    :param d: The diameter of each pixel neighborhood, that is used during filtering. If it is non-positive, it's computed from sigmaSpace
-
-    :param sigmaColor: Filter sigma in the color space. Larger value of the parameter means that farther colors within the pixel neighborhood (see sigmaSpace) will be mixed together, resulting in larger areas of semi-equal color
-
-    :param sigmaSpave: Filter sigma in the coordinate space. Larger value of the parameter means that farther pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
-
-    :param borderType: Pixel extrapolation method.
-
-Applies bilateral filter to the image. Supports 8UC1 8UC4 data types.
-
-ocl::adaptiveBilateralFilter
---------------------------------
-Returns void
-
-.. ocv:function:: void ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor = 20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT)
-
-    :param src: The source image
-
-    :param dst: The destination image; will have the same size and the same type as src
-
-    :param ksize: The kernel size. This is the neighborhood where the local variance will be calculated, and where pixels will contribute (in a weighted manner).
-
-    :param sigmaSpace: Filter sigma in the coordinate space. Larger value of the parameter means that farther pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
-
-    :param maxSigmaColor: Maximum allowed sigma color (will clamp the value calculated in the ksize neighborhood. Larger value of the parameter means that more dissimilar pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
-
-    :param borderType: Pixel extrapolation method.
-
-A main part of our strategy will be to load each raw pixel once, and reuse it to calculate all pixels in the output (filtered) image that need this pixel value. The math of the filter is that of the usual bilateral filter, except that the sigma color is calculated in the neighborhood, and clamped by the optional input value.
-
-Local memory organization
-
-
-.. image:: images/adaptiveBilateralFilter.jpg
-                 :height: 250pt
-                 :width:  350pt
-                 :alt: Introduction Icon
-
-.. note:: We partition the image to non-overlapping blocks of size (Ux, Uy). Each such block will correspond to the pixel locations where we will calculate the filter result in one workgroup. Considering neighbourhoods of sizes (kx, ky), where kx = 2 dx + 1, and ky = 2 dy + 1 (in image ML, dx = dy = 1, and kx = ky = 3), it is clear that we need to load data of size Wx = Ux + 2 dx, Wy = Uy + 2 dy. Furthermore, if (Sx, Sy) is the top left pixel coordinates for a particular block, and (Sx + Ux - 1, Sy + Uy -1) is to botom right coordinate of the block, we need to load data starting at top left coordinate (PSx, PSy) = (Sx - dx, Sy - dy), and ending at bottom right coordinate (Sx + Ux - 1 + dx, Sy + Uy - 1 + dy). The workgroup layout is (Wx,1). However, to take advantage of the natural hardware properties (preferred wavefront sizes), we restrict Wx to be a multiple of that preferred wavefront size (for current AMD hardware this is typically 64). Each thread in the workgroup will load Wy elements (under the constraint that Wx*Wy*pixel width <= max local memory).
-
-Applies bilateral filter to the image. Supports 8UC1 8UC3 data types.
-
-ocl::copyMakeBorder
------------------------
-Returns void
-
-.. ocv:function:: void ocl::copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar())
-
-    :param src: The source image
-
-    :param dst: The destination image; will have the same type as src and the size size(src.cols+left+right, src.rows+top+bottom)
-
-    :param topbottomleftright: Specify how much pixels in each direction from the source image rectangle one needs to extrapolate, e.g. top=1, bottom=1, left=1, right=1mean that 1 pixel-wide border needs to be built
-
-    :param bordertype: Pixel extrapolation method.
-
-    :param value: The border value if borderType==BORDER CONSTANT
-
-Forms a border around the image. Supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 data types.
-
-ocl::dilate
-------------------
-Returns void
-
-.. ocv:function:: void ocl::dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue())
-
-    :param src: The source image
-
-    :param dst: The destination image; It will have the same size and the same type as src
-
-    :param kernel: The structuring element used for dilation. If element=Mat(), a 3times 3 rectangular structuring element is used
-
-    :param anchor: Position of the anchor within the element. The default value (-1, -1) means that the anchor is at the element center, only default value is supported
-
-    :param iterations: The number of times dilation is applied
-
-    :param bordertype: Pixel extrapolation method.
-
-    :param value: The border value if borderType==BORDER CONSTANT
-
-The function dilates the source image using the specified structuring element that determines the shape of a pixel neighborhood over which the maximum is taken. Supports 8UC1 8UC4 data types.
-
-ocl::erode
-------------------
-Returns void
-
-.. ocv:function:: void ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue())
-
-    :param src: The source image
-
-    :param dst: The destination image; It will have the same size and the same type as src
-
-    :param kernel: The structuring element used for dilation. If element=Mat(), a 3times 3 rectangular structuring element is used
-
-    :param anchor: Position of the anchor within the element. The default value (-1, -1) means that the anchor is at the element center, only default value is supported
-
-    :param iterations: The number of times dilation is applied
-
-    :param bordertype: Pixel extrapolation method.
-
-    :param value: The border value if borderType==BORDER CONSTANT
-
-The function erodes the source image using the specified structuring element that determines the shape of a pixel neighborhood over which the minimum is taken. Supports 8UC1 8UC4 data types.
-
-ocl::morphologyEx
----------------------
-Returns void
-
-.. ocv:function:: void ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue())
-
-    :param src: The source image
-
-    :param dst: The destination image; It will have the same size and the same type as src
-
-    :param op: Type of morphological operation, one of the following: ERODE DILTATE OPEN CLOSE GRADIENT TOPHAT BLACKHAT
-
-    :param kernel: The structuring element used for dilation. If element=Mat(), a 3times 3 rectangular structuring element is used
-
-    :param anchor: Position of the anchor within the element. The default value (-1, -1) means that the anchor is at the element center, only default value is supported
-
-    :param iterations: The number of times dilation is applied
-
-    :param bordertype: Pixel extrapolation method.
-
-    :param value: The border value if borderType==BORDER CONSTANT
-
-A wrapper for erode and dilate. Supports 8UC1 8UC4 data types.
-
-ocl::pyrDown
--------------------
-Smoothes an image and downsamples it.
-
-.. ocv:function:: void ocl::pyrDown(const oclMat& src, oclMat& dst)
-
-    :param src: Source image.
-
-    :param dst: Destination image. Will have ``Size((src.cols+1)/2, (src.rows+1)/2)`` size and the same type as ``src`` .
-
-.. seealso:: :ocv:func:`pyrDown`
-
-
-ocl::pyrUp
--------------------
-Upsamples an image and then smoothes it.
-
-.. ocv:function:: void ocl::pyrUp(const oclMat& src, oclMat& dst)
-
-    :param src: Source image.
-
-    :param dst: Destination image. Will have ``Size(src.cols*2, src.rows*2)`` size and the same type as ``src`` .
-
-.. seealso:: :ocv:func:`pyrUp`
-
-ocl::columnSum
-------------------
-Computes a vertical (column) sum.
-
-.. ocv:function:: void ocl::columnSum(const oclMat& src, oclMat& sum)
-
-    :param src: Source image. Only  ``CV_32FC1`` images are supported for now.
-
-    :param sum: Destination image of the  ``CV_32FC1`` type.
-
-
-ocl::blendLinear
---------------------
-Performs linear blending of two images.
-
-.. ocv:function:: void ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, oclMat& result)
-
-    :param img1: First image. Supports only ``CV_8U`` and ``CV_32F`` depth.
-
-    :param img2: Second image. Must have the same size and the same type as ``img1`` .
-
-    :param weights1: Weights for first image. Must have tha same size as ``img1`` . Supports only ``CV_32F`` type.
-
-    :param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type.
-
-    :param result: Destination image.
-
-ocl::medianFilter
---------------------
-Blurs an image using the median filter.
-
-.. ocv:function:: void ocl::medianFilter(const oclMat &src, oclMat &dst, int m)
-
-    :param src: input ```1-``` or ```4```-channel image; the image depth should be ```CV_8U```, ```CV_32F```.
-
-    :param dst: destination array of the same size and type as ```src```.
-
-    :param m: aperture linear size; it must be odd and greater than ```1```. Currently only ```3```, ```5``` are supported.
-
-The function smoothes an image using the median filter with the \texttt{m} \times \texttt{m} aperture. Each channel of a multi-channel image is processed independently. In-place operation is supported.
diff --git a/modules/ocl/doc/image_processing.rst b/modules/ocl/doc/image_processing.rst
deleted file mode 100644
index 959c97f..0000000
--- a/modules/ocl/doc/image_processing.rst
+++ /dev/null
@@ -1,347 +0,0 @@
-Image Processing
-=============================
-
-.. highlight:: cpp
-
-ocl::meanShiftFiltering
----------------------------
-Performs mean-shift filtering for each point of the source image.
-
-.. ocv:function:: void ocl::meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
-
-    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
-
-    :param dst: Destination image containing the color of mapped points. It has the same size and type as  ``src`` .
-
-    :param sp: Spatial window radius.
-
-    :param sr: Color window radius.
-
-    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
-
-It maps each point of the source image into another point. As a result, you have a new color and new position of each point.
-
-
-ocl::meanShiftProc
-----------------------
-Performs a mean-shift procedure and stores information about processed points (their colors and positions) in two images.
-
-.. ocv:function:: void ocl::meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
-
-    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
-
-    :param dstr: Destination image containing the color of mapped points. The size and type is the same as  ``src`` .
-
-    :param dstsp: Destination image containing the position of mapped points. The size is the same as  ``src`` size. The type is  ``CV_16SC2`` .
-
-    :param sp: Spatial window radius.
-
-    :param sr: Color window radius.
-
-    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
-
-.. seealso:: :ocv:func:`ocl::meanShiftFiltering`
-
-
-ocl::meanShiftSegmentation
-------------------------------
-Performs a mean-shift segmentation of the source image and eliminates small segments.
-
-.. ocv:function:: void ocl::meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
-
-    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
-
-    :param dst: Segmented image with the same size and type as  ``src`` .
-
-    :param sp: Spatial window radius.
-
-    :param sr: Color window radius.
-
-    :param minsize: Minimum segment size. Smaller segments are merged.
-
-    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
-
-ocl::integral
------------------
-Computes an integral image.
-
-.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1)
-
-.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, int sdepth=-1)
-
-    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
-
-    :param sum: Integral image containing 32-bit unsigned integer or 32-bit floating-point .
-
-    :param sqsum: Sqsum values is ``CV_32FC1`` or ``CV_64FC1`` type.
-
-.. seealso:: :ocv:func:`integral`
-
-ocl::cornerHarris
----------------------
-Returns void
-
-.. ocv:function:: void ocl::cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT)
-
-    :param src: Source image. Only CV_8UC1 and CV_32FC1 images are supported now.
-
-    :param dst: Destination image containing cornerness values. It has the same size as src and CV_32FC1 type.
-
-    :param blockSize: Neighborhood size
-
-    :param ksize: Aperture parameter for the Sobel operator
-
-    :param k: Harris detector free parameter
-
-    :param bordertype: Pixel extrapolation method. Only BORDER_REFLECT101, BORDER_REFLECT, BORDER_CONSTANT and BORDER_REPLICATE are supported now.
-
-Calculate Harris corner.
-
-ocl::cornerMinEigenVal
---------------------------
-Returns void
-
-.. ocv:function:: void ocl::cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT)
-
-    :param src: Source image. Only CV_8UC1 and CV_32FC1 images are supported now.
-
-    :param dst: Destination image containing cornerness values. It has the same size as src and CV_32FC1 type.
-
-    :param blockSize: Neighborhood size
-
-    :param ksize: Aperture parameter for the Sobel operator
-
-    :param bordertype: Pixel extrapolation method. Only BORDER_REFLECT101, BORDER_REFLECT, BORDER_CONSTANT and BORDER_REPLICATE are supported now.
-
-Calculate MinEigenVal.
-
-ocl::calcHist
-------------------
-Returns void
-
-.. ocv:function:: void ocl::calcHist(const oclMat &mat_src, oclMat &mat_hist)
-
-    :param src: Source arrays. They all should have the same depth, CV 8U, and the same size. Each of them can have an arbitrary number of channels.
-
-    :param dst: The output histogram, a dense or sparse dims-dimensional
-
-Calculates histogram of one or more arrays. Supports only 8UC1 data type.
-
-ocl::equalizeHist
----------------------
-Equalizes the histogram of a grayscale image.
-
-.. ocv:function:: void ocl::equalizeHist(const oclMat &mat_src, oclMat &mat_dst)
-
-    :param mat_src: Source image.
-
-    :param mat_dst: Destination image.
-
-.. seealso:: :ocv:func:`equalizeHist`
-
-
-ocl::remap
-------------------
-Returns void
-
-.. ocv:function:: void ocl::remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar())
-
-    :param src: Source image.
-
-    :param dst: Destination image containing cornerness values. It has the same size as src and CV_32FC1 type.
-
-    :param map1: The first map of either (x,y) points or just x values having the type CV_16SC2 , CV_32FC1 , or CV_32FC2 . See covertMaps() for details on converting a floating point representation to fixed-point for speed.
-
-    :param map2: The second map of y values having the type CV_32FC1 , or none (empty map if map1 is (x,y) points), respectively.
-
-    :param interpolation: The interpolation method
-
-    :param bordertype: Pixel extrapolation method.
-
-    :param value: The border value if borderType==BORDER CONSTANT
-
-The function remap transforms the source image using the specified map: dst (x ,y) = src (map1(x , y) , map2(x , y)) where values of pixels with non-integer coordinates are computed using one of available interpolation methods. map1 and map2 can be encoded as separate floating-point maps in map1 and map2 respectively, or interleaved floating-point maps of (x,y) in map1.
-
-ocl::resize
-------------------
-Returns void
-
-.. ocv:function:: void ocl::resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR)
-
-    :param src: Source image.
-
-    :param dst: Destination image.
-
-    :param dsize: he destination image size. If it is zero, then it is computed as: dsize = Size(round(fx*src.cols), round(fy*src.rows)). Either dsize or both fx or fy must be non-zero.
-
-    :param fx: The scale factor along the horizontal axis. When 0, it is computed as (double)dsize.width/src.cols
-
-    :param fy: The scale factor along the vertical axis. When 0, it is computed as (double)dsize.height/src.rows
-
-    :param interpolation: The interpolation method: INTER NEAREST or INTER LINEAR
-
-Resizes an image. Supports CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1 , CV_32FC3 and CV_32FC4 data types.
-
-ocl::warpAffine
--------------------
-Returns void
-
-.. ocv:function:: void ocl::warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR)
-
-    :param src: Source image.
-
-    :param dst: Destination image.
-
-    :param M: 2times 3 transformation matrix
-
-    :param dsize: Size of the destination image
-
-    :param flags: A combination of interpolation methods, see cv::resize, and the optional flag WARP INVERSE MAP that means that M is the inverse transformation (dst to $src)
-
-The function warpAffine transforms the source image using the specified matrix. Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC types.
-
-ocl::warpPerspective
-------------------------
-Returns void
-
-.. ocv:function:: void ocl::warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR)
-
-    :param src: Source image.
-
-    :param dst: Destination image.
-
-    :param M: 2times 3 transformation matrix
-
-    :param dsize: Size of the destination image
-
-    :param flags: A combination of interpolation methods, see cv::resize, and the optional flag WARP INVERSE MAP that means that M is the inverse transformation (dst to $src)
-
-Applies a perspective transformation to an image. Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC types.
-
-ocl::cvtColor
-------------------
-Returns void
-
-.. ocv:function:: void ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0)
-
-    :param src: Source image.
-
-    :param dst: Destination image.
-
-    :param code:The color space conversion code
-
-    :param dcn: The number of channels in the destination image; if the parameter is 0, the number of the channels will be derived automatically from src and the code
-
-Converts image from one color space to another.For now, only RGB2GRAY is supportted. Supports.CV_8UC1,CV_8UC4,CV_32SC1,CV_32SC4,CV_32FC1,CV_32FC4
-
-ocl::threshold
-------------------
-Returns Threshold value
-
-.. ocv:function:: double ocl::threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC)
-
-    :param src: The source array
-
-    :param dst: Destination array; will have the same size and the same type as src
-
-    :param thresh: Threshold value
-
-    :param maxVal: Maximum value to use with THRESH BINARY and THRESH BINARY INV thresholding types
-
-    :param type: Thresholding type
-
-The function applies fixed-level thresholding to a single-channel array. The function is typically used to get a bi-level (binary) image out of a grayscale image or for removing a noise, i.e. filtering out pixels with too small or too large values. There are several types of thresholding that the function supports that are determined by thresholdType.
-
-ocl::buildWarpPlaneMaps
------------------------
-Builds plane warping maps.
-
-.. ocv:function:: void ocl::buildWarpPlaneMaps( Size src_size, Rect dst_roi, const Mat& K, const Mat& R, const Mat& T, float scale, oclMat& map_x, oclMat& map_y )
-
-
-
-ocl::buildWarpCylindricalMaps
------------------------------
-Builds cylindrical warping maps.
-
-.. ocv:function:: void ocl::buildWarpCylindricalMaps( Size src_size, Rect dst_roi, const Mat& K, const Mat& R, float scale, oclMat& map_x, oclMat& map_y )
-
-
-
-
-ocl::buildWarpSphericalMaps
----------------------------
-Builds spherical warping maps.
-
-.. ocv:function:: void ocl::buildWarpSphericalMaps( Size src_size, Rect dst_roi, const Mat& K, const Mat& R, float scale, oclMat& map_x, oclMat& map_y )
-
-
-ocl::buildWarpPerspectiveMaps
------------------------------
-Builds transformation maps for perspective transformation.
-
-.. ocv:function:: void ocl::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, oclMat& xmap, oclMat& ymap)
-
-    :param M: *3x3*  transformation matrix.
-
-    :param inverse: Flag  specifying that  ``M`` is an inverse transformation ( ``dst=>src`` ).
-
-    :param dsize: Size of the destination image.
-
-    :param xmap: X values with  ``CV_32FC1`` type.
-
-    :param ymap: Y values with  ``CV_32FC1`` type.
-
-.. seealso:: :ocv:func:`ocl::warpPerspective` , :ocv:func:`ocl::remap`
-
-
-ocl::buildWarpAffineMaps
-----------------------------
-Builds transformation maps for affine transformation.
-
-.. ocv:function:: void ocl::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, oclMat& xmap, oclMat& ymap)
-
-    :param M: *2x3*  transformation matrix.
-
-    :param inverse: Flag  specifying that  ``M`` is an inverse transformation ( ``dst=>src`` ).
-
-    :param dsize: Size of the destination image.
-
-    :param xmap: X values with  ``CV_32FC1`` type.
-
-    :param ymap: Y values with  ``CV_32FC1`` type.
-
-.. seealso:: :ocv:func:`ocl::warpAffine` , :ocv:func:`ocl::remap`
-
-ocl::HoughCircles
------------------
-Finds circles in a grayscale image using the Hough transform.
-
-.. ocv:function:: void ocl::HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
-
-.. ocv:function:: void ocl::HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
-
-    :param src: 8-bit, single-channel grayscale input image.
-
-    :param circles: Output vector of found circles. Each vector is encoded as a 3-element floating-point vector  :math:`(x, y, radius)` .
-
-    :param method: Detection method to use. Currently, the only implemented method is  ``CV_HOUGH_GRADIENT`` , which is basically  *21HT* , described in  [Yuen90]_.
-
-    :param dp: Inverse ratio of the accumulator resolution to the image resolution. For example, if  ``dp=1`` , the accumulator has the same resolution as the input image. If  ``dp=2`` , the accumulator has half as big width and height.
-
-    :param minDist: Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed.
-
-    :param cannyThreshold: The higher threshold of the two passed to  the :ocv:func:`ocl::Canny`  edge detector (the lower one is twice smaller).
-
-    :param votesThreshold: The accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected.
-
-    :param minRadius: Minimum circle radius.
-
-    :param maxRadius: Maximum circle radius.
-
-    :param maxCircles: Maximum number of output circles.
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-.. note:: Currently only non-ROI oclMat is supported for src.
-.. seealso:: :ocv:func:`HoughCircles`
diff --git a/modules/ocl/doc/images/adaptiveBilateralFilter.jpg b/modules/ocl/doc/images/adaptiveBilateralFilter.jpg
deleted file mode 100644
index 6508f693c38542868f5b2b08c7cadc8ec2bc915e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 65410
zcmeFZ2UJwsvNqa)fFNNL8i|5rXtLzg0+J;|lSP6=$r*%31XM&25KtuNjN}Z0<eYPm
zoROUAehYP<bN0UHKX2dnhjH%zZZp=P!(6qhYSyY*^;OM<97lcw{iYxzF9SkB0fA6}
zKM-;XBn7&Ofq{vEeiIWD6ASAmHV!c^&aGQGWQ0Wc#8l+ecc{oIDQW0Am}qF(=qM?f
z1z6a)xOsVbshNbt1bIX`?(y<m6G6ek!os<QLyC(_%5#_UF3*4Zh5P~{xQTj#4njkr
z1)&n4pb?-Tn?ck7pXew*UZ7tu6jZbu=opwcv9NCe1Im5_p`xInq2544N563c80`c6
z4!S{rPI#AF0)t4+7?akKnCE$T@=ZF)qDHX#z%Kng6DNNxY?9lgWaJEtOw25-ynOru
zf<nSl52R&e<>VDKG__#bI=XtMW>3s5EUm1aU0mJVJv_Z$1Ox`XeDyjwBJzDybj*j3
zu_>u(=^2?>**V1}rDf$6l~vVEUz=N6+uA!i2Zx47M#sh{Cg&FxmzGyn*VZ@o_74t^
zj!#a{&adS{0p$Gs^;57v$wdIjg?i%#+6~NWxlmBufdh@;2KrrY3_=MtOk+nPTAt@O
zi6z66iyE=$?y2vBO`Ha>N$7d!8TPJ4`ytt16U_hrlVpDr?C)|-f^g7KfXPE606{?~
z=gcX7*#CX}Z`47aA#6j}e*3UX9>=Ncpo8{Eix{O$+T=8J!<&jt-u7;&3K!1NVr(*c
z;j2}Dp-A-rNS1LzC3Ke@v|(AGzOdO$F3fhKF6E5jFir)%Z{vQUfWOtSjrS~((^uq*
z8TAO<IaOYh&ooK6Y|B1=>4yhz{p84BksKInHM%iHBQC45!Hkxdl3X6ok-wO5?|rxg
zm=8p(rZY=dlhCI~I3*-6`h^G6&JxW$csZ$Tk8)R*$67Ve73ba<CIU^VvU(3A^o`i?
zyXnR_eoU2qfT^_{=%gzA2@+)KymaQhH@B|iwq39(e;iPyujmG)K}+0A!pjL>+F=|$
z<SJ1Tp3yhMY+t{Ub{QQ?AE|Y2b&6o7^<zv4ZfU5FQ5s23kSCom@ZEkcDl_v62@)>_
zR70$a@>dS6ot=fE3aeFmnaU0{K9d}45+ulGX!S+ytNZ82Nm%(ibk)xpU#$p`VI<WJ
z+qx`iS2-nzVxaJSnimNZklhc{G02=BJ0G!yF^oJ@M1PJ1Y2ipGj)x;bTm6_)wvOPL
zY89**ijWtovm5-APLSy;)iyLxZ0BxT+txkZ4n*;+I`<kr5@f`OS1LZYU?|?^pMCK8
zN@a~1Ed!H=T2C)M%bMHjb6!baI##Qib6CC#&Yp^0N$Lv<+{U^rRBMR%S6IccSrq%%
zsIc6qFHnX4B-CK$|9g+BLcEHpGBH_IJ4LPs$J9&MYvY7zcxl=q-?n`GzqucHUIu~`
z7m%PcH2m`oXm~COc|k&~PqEn6-Dvt7w<#&L;J1;Wc2fA5+T~mEHi;pI&lTa@zm+@)
zh)Ylg%tpg#bZccOt*rdL5?duTlSGeU#o)(UF5WY7UGjTz$MmabBQ-v>O(Q3bR^*jy
z;~muTjQLqqJnhqY;+8ll6h$E?NqD|P+O_@jitLg--HyA(oSFN4xg<4d8;+km4M3Nq
zD+U&O>w5e#1alKD!hSHVUVOnLa7^A>IiZe<!{;=fl-qc@p$RE|{Ez)19x!HAR?hn(
znq}SVpX)Q{0|<q5O`MG1CsxZ!U9sAd$c_}?rHW5CiDtc{+!kDJAP22M&{~k7eGKZO
zVK|-s(euA+BI(wjRRQyD$e-xRl((^Ej@5M#SAG#&VVblaF|OZ!{7N@PQdxzGTCc`F
zO-HZ(l$}nSih_*pAPGDATtgrTBax|#PR2Q*ISvUD?kFAk&MT<irCQ=l`Hbz&`sG^}
z^715?E;^^rXGvT)>{{>5W|UTjB6?YdQHu*4JlnF8aMsFZm^4DBI=^9lYvWmjhAE8t
z-^`!aNxq*|2in$|?0sX*5O8t}TZ;9r!$#%$25UyVk6Qv&MKzv+1P7(5ND30P30DAZ
z*g%>|7FHirk}hL@fa1ZUt@fuo7<27y?Y_zU*2My%4E57I55JXR7}+lgOnUzfa_~US
zKz_PuG@8|UC`94&CF{-p#Xe68aMgWuVzs|?P>#MuyjZ|hX#Yk>&F32rkMw<CPlW_M
z9Ok2v+rOg}o@kE*!6pkA)Q^SZ1d5QLE}choM~&3OzFLLz7x3b>^3MA>i=LOB2%RKs
zlCN8LcoI0mXJl)AypSL%`P9OxXR=vyl;DsvIk?IaR8Lf;deoa;8k1Xm=(6o7_YrBu
zKCi3W#R0eT<;(pfT$?u@J4qhqh69z2;AWB833(2`h=K_QketeWZ*PmReVg1-KEA=N
zhqYw-uf~Q)iSJNFtjb3NUZ?-VS&oXm=@Akn#*CIav}H21H8^{Bzfj|9aTB(A`|Ejj
zeFc|F%QU4NMfRPtvcII6zpBB1yK&B5>PV1<NAP(|r92Ykef#WEW!aFy+&b2KM)Wp+
zZ_&*C2^qTpcMLzWg7v`-H`G}YX`lInyMo`f^J%$s8*jbS6CA-4Hp7myKohgF1Z5S_
zgsxx)X#q~3Kz-moW%Zg~LEP#E|K-isy%W%{H`L4D=51h_C;O)0EamwiK{(j!Ye_gY
z@q7&>DW6Lr#ALVb_KAorP7fBujZVIICkZ4ZQHqrD=oKdTuIHkPfxB;5_$p7AgY0qS
z^1fD|U<Z9@l&RrR`tTNdEnD!#*A+>}*}|&`>T;i~q<h<w6#bfqD31o|(BpoqLQj3G
zx|nf47cg}K0|}~be4N~B0aDtkmi{tBp}u3=Qv#y+VtCNaf|Ov;3Wdz5D_V(agBpet
zOdJUcLVsEcgONM<%gE*XZ=Fq%0k`1DMzAGqrBdXBhz#e~#02-r9#NHZ>wJ325NpC8
zO?hS-c*VTO-uP0^GUg)_EuY%lXt^vUc6W?Q#Hw!ShMUF>v=e50kvf&L!=zi}PqK=K
z^0(VGm3`}wAiXMSVvyQjJ5Y!}OADgvgnwBhb5;SOYf`i_;b2gxPU=T~x!}#Uue4EG
zrm8t0wJ#tC78?<jk?k^ysZF_Cp3zP>m)pN|kS)A<D_uIdW*9L`_KN301VMlGi{RK`
zW&-S~rs9w4%wH1<h|Lgue19>T_$v{|zxfzcEp}y*Y?sY|;tvuo1L4H!5|cBPd|vPR
zdAD{RpX%!h&xVvJ*Kpo7MISa?j8eXdXn1S)V$<MJb|Q{PIhC=vp@_P%z4uxK5`+f$
zG{f+=LN$%sK6lop2yVp2iIdr^&pd0ORbWoZ`oav@+5gO;uy9FD11I*#!%NwN0}&VY
zR>wl^_b<0;jmSJ2;dL%IFUi_YVd}nz%H5*&GP(Ok=jvkQ!OZ!}Tq(iKp#L8oXd>D}
zG^4y-Z^f6)$@+7>!X(8SSBh$>vQ^m2TEns`E+4wI?wdj9jK0B}wKYPW4J?8eB4vlS
zMYXKc11oko!{MQ*UKdfKNYHr>LuRc@|J8zvkzpPD+x7`nIvD4ktSAecG8L5+1TWxD
zP)4qh|E~!2M%wCNOxY7dkC(E=agDAyx>8&l5eBTEFR{`l_20^<53;W0gWn=SSVk}5
zYeE@F&^LAXTk74YQ+LFRSbGFI)9HZgbm0@|GTgC5+7#ZTJwJDjXVq{@oVX}^$>0mD
zyGbjbkf7)C2wp>X{Qn;JUr8_Vf1J0REjk(Vc})wxWhnkF>4kvmXU}tWd!S<D58=rL
zpIYv)4<bSHR5fpuhS}}Da}c%rLp-C#OP<w&d^SQ0jgg?6`H4Y%+|h3ojL|#PF7aw~
z`}%>{w9~7Dum6xMyD21@!9whI2XrL0{~s)A?w?o^U;dbL-?0Wl`-FlQCh#XK`-2He
zk2(DJxSzB5UsEr{Fz6*i!X3bi%>MKud`?~J#lxvQx`tEJG=l6n&%Zzed54ITnR83K
zhSj8R1pdV&;omOAmEt9smZ0ZJ@>o*O%S^`1%C=Gh#fNe>Im@S6-}VL9nzT-xjB?DF
zRWLn2(r811a%w~m3<^VB&Sq!l^oC-DLqD{dsf|8B@LtNEt&|_;OXBK3tY|2=?eBg*
zHZ`>&tSPJFvR-)6oXb&jjs!K%edh(%^^y<Qi+jf&UK(#0zR?B<w_mcm;k(<FqmD1-
zu5S$93+u_~L>pIc*0Z55QPv&9Gtuzx!SH4thTThk=)sjYdcBy_1R`oARJ@F&wrtI%
zO4#K#q$Cm^n15EngdkACLxTG8m5`t>n!wTzohRP?eg!(5yF^#n&5vcxt0BG6MS|{K
zblaawi;5sY_r(N|AQX*DW+W&;<7>kq+7Nsf3~eg6t~UJ2&|SXH_~a$U7-q;}O4n!a
zpC^9*mnRk)Zjg-HQ#yB*%L__l>m+{Bs%ERfEK59q*0%of5@CO&12GT&iv!iYU*#uT
znTM<MZ76A!j+S0@Jj>$HN)to5VX+=7r2Y7~1YvO!1^C5(5IyzJikZk+*&uLXcSoFN
z>{g12m+HFV+dx;~o`C<onF%brnAgiLD?E^TH`v1+;pb(KK+Qab^$`C)4p^T5)i@@F
z--Q9p<{uUNXQ>rtS17V-$La9`!YjQs5C0<hzskn{wsBFm^E2eVt_f2LML}8@#j<w6
zU)S#E9!P<k*@*{oTfgGu@!UKC^AYRPdZx`xaX0+yxk!1#(elf>?Eh#Le>O|YzuVz|
zkNh9ry#EEYWOVxAQta9ti6ox8s_YiHAzFvN-r*kjo<|crI(qmC@+w6&SZDtwTF5Hj
z%;7g}hCN&@QC53U*Mz}WhYv@1m`8j>#c#)?p7E}-=HQm6XU-*$g}&EN^$(VnWEn~B
zBvz9KaQXk52bbwkRsp9xQxk&ileA-n@-S9Ha{_?pw^1KaUwLiI9p+Jk*Lw2S@}jqk
zk1>cQJB8D7+b_?3`G~Yf>_S6$2knSXpJZlQ>|Ykx2Mgw@;A|=utPL2xpWumzZ1!rS
znSXh<C2i|9!sO~=Dkv!5s-Fz3>O9IKtu%^xrzUeH*0d@Y%Q^u5u(=trcXw{l2&>hL
z%#LC|w0e4<m99qE296iI5C23ZO{^8yXiys);^kJ8Q-!|lNZ3NU(FPudf!}ji1iK26
zNq97SV{J#R_z_8!>xZ4j_An=G*e_7gQcqK_B0+`bKC0t$)sLPk!juLS#ALO^kCYdM
zNz&}z5t%y8)vF;vY*QC=tC!5f{hRJ^)8~Gn)1GfRP$=5Zq(O#3whF7tL@iXc&T<#w
zPcTtLEU4BS_EeFe;d@9Bt-5}@>st}Ch(3E!+hTi$V=m*d-E>3~^bkCZ1ntrxLHYNI
z<R9CskI!xMJa7kx?GT+FXu1wjp1@#5xMTimwBaMfu1u3Lum)kV5H4v5Z{L1i3Dr2&
z`1w&5?~JT;390@NUw7u}$JD1)m({B1n{|@wg@~5S79xe*YI#N`lQ3nG5fNRxv^^g7
zX+_EoYYAqj{6006Xydd^cLD9Yqm#?pCrN|^ert{oE9&(72wz9(J|@Yu;cA^vXixx!
zmsKqU>wBiCY%@f5g);O~i&3m=+wtieil(^_1xCd{*=%22Zr+heSeP$c8)HGU4Hd<y
zl?sC9YRlscOiM;Sa0=zYF9SWcJU>eLVp3IO{xL%Y4MUWuO)y{Nf{Zg*#x4Q!)OU}L
zvSD#teH~{1#D-oS%#dz==7W{Hnw*3ahKpkv&b$VITZeS|;IR3^Gt@T31roGoUjDxf
zFhId|t``hZ{FZO1V&ewyN?bR*oo{kl=I#O2r9NEd2CN<TA%et4VlvMt_OdJXLA`I-
z6M@Ua=M)EO+BQs0-9}BZQg<?XiIAXB>`~s)X3f*NtPSRrk9YsgaixD%jl;xuY1n<^
z(ZMOc6O$(x3YXh~2G&);g|`FEH)9`?&xSi^_MJR2z@XARrdzY&Y$7i`Id>`7zO>eJ
zzDii4UZ6`2GlAwyo;<Nz1%g3udCg5K>P*>lpEdwQ)QZcnosF^e%+fV_7|>^K&?H6h
z_A6%SCG4e_9y2E6ph{Bd|3P=jWS#GA1-^<qj(j_yLLsw($CLILNK{LtL7$Y@@}F3N
z1LH{E;TB1dxBA7+-~WQpG7o0Pm~uB6_-knBUqwy-kB^(FKWaXWr}%olT@m~kb8D4k
zGB|6K3<|jd=1an`3|3p_f8J|j)i0{SBC9cd2bTqss=z12<X+y_A7w}%A}9nSyP>hm
z<53{nIOeR=<vQ;@#_nj`Q<HdRW#_woDeS8?hTxT+LAUnpwS_CW04`eN-tP?wgpqWN
z(N+0$+Evwi(G3~h?T<@HR(`{zb9z$jeIQ?>Q>o#sZj;im5VPTOy&n8uT|fPW%>A#B
z5f9o56Znn2;?<UhK&&ar6YBj@w3D&@)m^?p9V3Zs9VySJ7xN#{vX3zGD$(WWXvV-v
zB)em;?L-CcPCaq_6wjOB2d6Z8zU1v=Ely@sH9|c*xV%YKCww(^wQAXH%OqH!x41wZ
zxNS#Y@k#R*+f%KkrY)57z{Y|XYLypvKDs;4G6RVkjgs*x_Jr*vQF;N&-0cr-!$s^@
zRvk4H1Pa7gpR&~ia`Z&p0c>-(<QcH~NoXKJegKlR#|v-j*$ulwS3#gDB0<e#x@A6?
z%Q7Q~waXk^@y#71$X}bE`iucUH#iXwmVgw?{8Sd<p9y4;=}6FmB@z_)X`cEVJMj`D
z1_^pyP-2Y)x%g_@8tsI2hU%UexnKUa@)Zf9Z`cJ$w8zC<vi02rvQ@y=zwtI)Dmk8W
zzF~X%%r@6Xdr%(lV-{-p2?jr{QUHn{hL?817Uqqt9#V~Qb%)6E8c%p5U;3+LB?VUe
zS(xwsrPxs9pDvz?Ah8dCud%{+$&C<<)jhda17~Vw@M<WlTcYa;ypiW0IZ3=^oBKyj
zsQ;uFzR@2ty!}DM>OTk)e)@5q%`pck`;L_Yay%M*_9V&1-N=0C2tC@TrnWdVAk6lQ
zc}2NDcW7Dr=@*8AlKD5wq^`r#^*(9p`1>i_)^kTEQKbD@XQm_xbsFnyp`F2)64I7V
z|8|JUzXj5xP7ca<hDO9WwWX^G5JtwYe9WFn<0<^$P+uQK*g^TG&QRg}g%Mh_>)z+)
z3I62t{EEchRV^^dTMEU9ErRo7zQ~2EJBBKXx_MJ}spdedO_}kS1A;nv{Y)dd{?!bd
zqo9d!c)#}j1|IBl(NLY&^S4JiePd>4ZaN>k5-0aJ4Sl!0)Sp-1cxdB(OPVLNsEE|~
zRO^$6hRonL@2O68BVlvii~|ObO+59Uf=74unM<A3>0vMs;T9v{3X}D<p^&(TQxa<`
z3Pbz}w1V=TFI#Ku?aF!M)FUds92t#8H_utc2+dk==l8{8SIko9Kl!fo=>)gFY9YGw
zSct>%RuEb?6=bOx`*eL_u*PxIy9^1!18WK?owH!l`a3`lrQZ2_j)FUu68v+n>Pk)o
zb53Gz+DYC6IksAJ8==RqoNfqKDC0*%Yy}R4vxnZi?Xlz_F>brz3Klcfou;W8r(mL7
zC`V~8%<kmoD<YK}&dZxyTrpN-fY9XJ7I;dTW<k1Py_8?fV}rIGF1k%2sC%zJp{%X>
zUW?SNXJRL#Q{y=-Le~2F*{WCjzNI~~@)cu)jVlok_}#5slI50T1Tm*#gXK*F1r%ws
zju`XT-Dw?Kd{;aQse^1xHeeB1Q-Red`~2CKlWGI8U(@djre|>^cD)KWvODfyVo~wf
zTDsd5HkztfrMc=Ro1DWv(+D9wc}{~M^vvuMg)Q<38ku<Vow+d)eazEWR#&g@1iaRk
zTh`b2v%zY#7*rbyy+e!#Dedcej17ukt3%%#sswi;>Xe1-lcFS7wqLvtS)@`jh$@%S
z`z$^|)H_fT7%7Ie`AFcmJ}?gZ?#Z0Ky{W=4@m4*ddPDy9!dx@>jye%tl=AhuV9abj
z8^tQ3AjpYxiznqh{oBID8n#U{-e=@6HO21H?@H#_SfVJ(IRg9hk`~#8d95%Pu7g~#
zH)F3yZVmR>8a}q%QMO5?d?h8dl&(wG1wE{n%VvJpKsF@%b%=%er?UL%jVRLb>6F;F
zRVWz`5qd;M$pY!Uk}P(MuNEU_{XfLCui{`|jC$J6@jJ&#7{!k*JasBKdm7=bf3V@2
zLr_$S=cSPoA6GMfD=r+i`8`X^l|{x@Zyj2@98vPwEhl0&g&#C6n=bFn6myGmF}P#%
z5$10zWuMhTEWblG*KB04^<&i@rN$AIChC<7<=xdAE%tnuYayyWzcpIUr@&WcJsD`p
zyrm+|F3p=@V0O~tL%)=Ko6!W9*YWBYH6T_<d=X7&OOa5%gO`O>0`rD8upZ=>iRIVD
zE?+7q=H}YjsSk5?tjOR$)2u423>Z#HtBz|T1a;&@JW44C&3sq%=8VB2YqjC8H+tp#
zOa{*}Q&ur|?oc3ANfBn%=oS&*;97^Phx)m3qSrBaJn*s^quK+0FF!P$)4D9|(Fs_G
zxekl_VTANsMeZ-e=RNf?Y(=ZPg&N$=inu}Xmbs-N->TwYE1mY;3fE|Hi1Zn1-+<5z
zm+yOOWcNLSEbiCSv}(1;#4BmWmyfT-GiS|E@T#-4rx(@>$=pLHV?xalJ4f5krW=hw
z7%)FP;*+e0LVFzr6*}Ki@z!m*sAr@V^yt{B8;8Ce%yY|=n==cZXvKY+YNu*5)b5O3
zIV}>UmE+dn#M&smCb%c3j~NRQ-ZvS1in&ak@4BKJ;K1vX7j@3C@{q23%ZQNCft0#6
zPKfWd9*HxFdY+T4*S=H1U7b!ut3!#mjnk@ZltGm=bJjAPgU1KmhwaBSusEbfqix=I
z_!@+{uG71<r5mbrb1Q^Q8TXm{`E*?&XUU0=tFU-s#>x!4G|^f3iw#zV3sPW}Rm|?i
zhzOsuj_|_nj^3ArvP_Dhdsao;$burX-DW$;CGqsh-<L&!r(gM+R(g>S$>8{f28C4c
zrG}|Y%<xs4#8J9a$t$WFC@#zBH&`FCj=W_S>M|=lA&fd?;K&g-N87NxAwcs&r=Yuy
z&B<y{VL*^(Z(6&3aACZ&f-QSe?0a_2*Y&D0X+C|Ym3Skw_>o@6%oj>LG=gj>=OZ=~
z!OU8MOm!Ax-7B?{pIz%h?z&Zsw)UrX7uW{%FfYMu<Ce|FRXX7c1BKe`-E?88+a~z~
zg*;4mw2dwMc!P3hx=q3$!+VAv!F$OHT2G3#86Y)8WCUKHsWp^u+zBcodo3gy8#`%A
znPjmnCjG3OB3MgrBW~VorqJ0_wYaXwiv^`UrT)t*(WZ>F#mqF9?JEaNb=I8riFVV(
z{Acw7ue0WJg%$8%19fi~rOTY{g0roJ!sYBz8}juF;aIPOSVL?yCO&lVZZ(UsWJo{C
z!yFzKQ2&r}=9K{9er+1rQAot!7mb>dPI@4ALEaR@Q&mdguKa$>yx2CAyA#7#V;Ght
z1fGjBckE^nN;faH{P;C$Tu(vIf5C{AXXZlNN3SRyQ|M_FVu7$eAbmu(`-_hfC}`<7
z8}P)3<xnMY7CojLcSM5ESPk^aV9QY=vUNuS9zFv6SsA%@21B`9k=0ebG`!^5&Q7!<
znok;{V7H533{yYVe*LCz*TcHLQ>nb7czl#MZKOwm^(G@xhAd`f_W4-8WGSRkGh^rD
zCzu<Mr-48JUjF{i2idGF>2h~H4d&GpiME(gy5jD<uR1}kv=NyDdlbxYY-Lsql&;H|
zz&U$9#*8JP>l&eXZr7-e9epa_LGO!BJZaBbwKsFA6@9#|ZYADjbV~htRJ16mt35|d
zzC|-qZ-^EN($Cgox)@?clm#W?iE`xEmCkWD)Y0$Ejpk?iKf6aNIAAM;IP^rg)FIl1
z95%P*FJsP1B<FMochYqp53`%SKYS1+5B1|qoh;fGy_ce6klvFKV}-MRVkYNj+$ucU
zW?kl+0T$xic{F>SvUdEQf?fY7D$}fQC>=^Z_|8FB@yazq8yj=MdO+yQQ30d{w)g0C
zswujHJFk4l?&E@ElnLDpGxx^q$YQ=^zy~Fa+V3bK0y9tF%=dAp`tK-y#N2H}YuF6Y
z&~F>nJ#&e-dTGLDgi-w=fByLm$JVSDKz9k45i=>>%AGU8e-i&Y5cZo{M^oFpp>YLw
zcFmtXn~hp~fTniF?5)L{uqxDNc`i{w6K+Twf}=`|_rCPYYOSG_vCsU}Cqu~<3TelS
zstku@ddlpM6GU0(I)Zk{^(`J<mNo3%T+>4hEJn4emwR}gm<6)n#J!07`f?#e&sRyP
zPg)!WCZ*rz^|#Z<O%auAghw5UK|=E_$ZF_jo4&29;NsgbdGe3Om%@Etx;MxjK$kd;
z!_H4t_6lAqjJVvoFJPnRa6X-RI`#nxYJ#T2`}7Mhw&6`-r?Q8IXPd4_kWb;Mx=v8J
z4Ck%q2=|ja!=l>r-=IqZnegj23d|PYJ9G#sFZb&<im~ATF$s<H?hB%$hqo^;;g0Ki
zy}8LVW^H-vtLsjQi#kS=ZW9w%b+*grK6$2RV?Z)<69E*Q#OxZr1F;x3^fPbYPfj>b
zVa{2Zd(u47)M!W$lzQ3=_BPkjt&+z^*yD!`_y540N8u&ix%TBzH4;=m_M;>RS3rUi
zDvVAhf$YYz2-k(E=^K1?@gmGJvgOeoGr9|VWuVZPh6ItXxGK(IFTW1c9qJoghPKw8
zAPBA?2(KCVoDG5){*QXfQ~Ausfr;>iR&IWalq2+?tWUtadIl)NC~f1M_Gu9Nqb7O%
zzz3C*^2cO_cY*5FKl1RzhM#{FV|ucKg+1;&U5F{^m+bSdFeyD990sc#mzAB%uncE4
z<peV`ZT#D>T`?YB<sw0pM!P_U*d&#O1oe#}1RLh{uFRR<t;>rLZNyS^-DPD>Pk2G|
zo{*)@sKKrR4RLC;n`ATgHT??`)By1AV{*IFo6(JgWX|Fhc9j9=CB5Fu%S$9^@y)ed
ze<vzCYI3Jj)|U3WBqU~}RO23D3UIxD_+4V``&544_2p$pAK&SS+P;J`#qvNt53$wv
z{08pCbvt#bq&@)*DM3^!3;)a$032^$GV6SxIexffL!s+0Ps>)-epmoU5VFf6d>i&!
zfQh!TIoaVF6GDQ@0Y*h1+<Cs0Ch4lAJbmU7`q15V^)T1V`_b$)ePjaWeGE_zF8m(l
zM-2E3^}KpZuA$TkBN}2NY`1{b;~5ppg`9@`Ic=QzNP#;5bD=z}R_}iY0cwl^Z&a!G
z4**P)^HlYjRMwpe=qb><9O!msPJh9ITN8WU<i4-0nppXjHbqHFy~ddA7O9vUj=Eyb
zbk9jT<~9AS#bFLQAzct~tK;t$2=zBAXO>3sF3+|X=uVD_urrbbqr?su5ImFcO+{U$
zKJI}x#UKc&{wCtqc{kRvx^R!=J%%dO%Cmwp3QXn?W?S@REuyC{RmJS!KNL~Ok?F|N
zrGS5V10HB<v~l`sfAsCxhhfHYhT!jNbq-;H$_v~fW5$`SrWK$uB9cr;g3eCNOAtIz
z_s-p6Vn^p60;(q;Jrnh`lId!vWIVBO*DM3IqLo*1p31pW0~>~}r-Ox}`Suo<9Weaq
z*d+v>_vKNiq<rp-o{(&S!i!8nUfJ9k!^#H%4_+1u@hR(1m<w5<{17_@GGY9?3JN(d
zpd7~uV-uUFK4TT%tgHYn)CqF=+2xisalYaler&Lam;gM;AB^Ww=ICRKt&c<2{iWqg
z*h`73!}+AG{FPJ!A;Ucjw~k2#02}b*Jk_!o=Ac|o!$g3fO`n_D_!&jZOkG#_u<}2e
zjM`kBMfM8b_uUA=-r6QkqamM98L>0IC<eAhpis}lFkiXijs2WlEi%>QI|^#W`<PgD
zz28PLs&1s&zk0Wr{G%ldFBR}3Mn8;z`)Fz%gZvxyg*k9bDH13v3`!oW(;~)(7tgRC
z%r4`><E^YDH&(tWibTLAbTQ6NoXp4oJO&=96Q)!9az`2Ss>EowK!KP`^T{^Lko-CE
zyg-e}FsYhy!EM0ErG7VZ==|Mb`0D15`?#Pl?@QPdx};I#2)Wk=OJ3R>|7h;npNtwD
zgR1(j>E`7B&DsqrUl}{Huzs$qd7sQz!nwZf?d=eu>_G)0MXh0$8TTu|W7pfDuHvFT
z?uG|iuj?%jSA4Q#IG}4&@-q$Z1J_}xrGv4p+m+)!4$Gh5I^V7io8VB13uyHYO{tDE
zgUKYPGxU0_<lYWQf0MDUrTws%$I)FzVk{4I_QV_tB<<RzpE9c7)z4&kVW(3EeTft-
z@bBzke+Po}eOdB&h;x-LQZaLQ`6f!6oy@}}$LkGOg^2aGG_Rx<%~<hBcdxu_t_uT)
zyUb!e4=!&0R27zOHB4;Jk9B#+pYh#07PeaPyYC1gKz2V8hhKJF?i(`%cqbxzTZqp1
z(1%hqSEH=tR}X+40`<@7qe2wi_esCed%;kxqeI<lbBRJzq{2q46<-Amgd3D0{LJ3H
zgLHKl=aBguoG3jVNMjY;QaDdfp~`0MTulmDJ<7kp!g+`41Nl_6dlq_Mt{`QWla-6P
z%PCgK>)G4#lc!8-T~1!zBZi9sQ5z8D<SIWUGK@MF2_?ZUb91NM0zH@bSSBhbEb4SL
zonUu@DG0w;5h++~?B$DgIj~qz5Y1z`LTpMt|3G`o$iw=?)zeSHgVwD*;fw^a#a<ew
z667c<g-P5(SC7`W<K#=6LQ_gwYaYw?(6J!arDE6Y^VFAb(pH^`3f@`;FPiR)atQkl
zrVs5jWLS|G2-7Nx$!@#fHI}5K>eDcM0C#{X$EPzC2<AI*aT}gKWME@pP>3j|Rd;eX
z);Vw9mrNeEpvpDRnVEd^s-3S)!7=wXR&ECUmNTSKXYnL-Kp<HX9wkhr#gVDAJ#f4l
zO-EdE>cIOEwUqaY%(h*ykmM!a<Fyejt(kJAHS10JTndt<4?)(lf>M$k1HtM%kXarf
zn{7G0=|my<kP|9tN!KZyXT>Gd5@bC#8^Q`R7Qr21UmwfIX$QWU-(qCV&4px~l+5k&
zy|rR6m6EUZj$ylPo#0>LtwnJQTy`Ind$g&VtQ^Rv5L+x`TfWn6YvOqq<yrt5n^D25
z#Zsj;&BSFxzL~7bkQW9^2ugg$`-M=hj?d#O>FVnRx#v$>b3>b?a?*#_sj9`h_nS?s
zUtw{h+R&*CM2x=(p3<7+Hq>Fmr36DQ<gLykS%gH2Mn2}$v|&MpVewMVpjaMUR|RaV
zR(&32E!QsP$dU((1rl_JNj3z^3nEX<`(Sjanbo_yo>-`vpZl9}u)y<DL7!Fgw+Vk4
ztA(DD&O|YN(>)mFJXYG<8<yx0oNVwR1|ww!7DLxv8ffzO`#E}}G4y$c&WYA7Bmz6n
zDq>9%0(&7#j+b0}q%UD{TWLExp|8e@OJwzpo!;J~r3_tyea<r^`7+gdPv4|I5;HKA
zFu=0S%$TWrhn-l>Q1Tx`;Q!2f!fXIk-oIIk1Qh`L!;)faG|S*;Hy**qFaac4iK$5S
zG(7T~${C?c;`uk|$AS=Tq3ch)zdpHUzifKz2yIQwy=+MdX6^oaE|Sl;onP}bVS#iy
zWI-0&@gZV#2nkw6j3JO9&eMMMPS#R^@(LXFTd&L<cplhc<?{Zh6-Bjsyg(>W2SB%h
zSXw%GbMxIE=H+|nP59It;te?N_^hydz3I~ak_QPI)E!ro1}As(#$g9cH{l5c`=4t(
zCl&J=!9Ek6d&TskaLY=XzvAg^JjW9^TS#t!=8{GY>`7W6W2=Xljog0O=CY7~+x%62
z-gk2g!Tp*$B6|)i-<6IJ;KzEsB)ijX3^l@p8mkZY(Tz@lAe1`R{R;RuVIyuRPVptw
znX>DIA!I&K^kJ0+)0%hD@_9a<h5eg@dy(Ah701qrhi^PSpj|DsVU%vMKOf3hx#BTe
z*72&<3qk9SdQd4vRvF^6EcQWLK65W~3jf@!p|fgHTk{dmdoY-HwBX$Rv(J0AV>MV-
zeu1YO>(!6<SQ(={g`HcJ0njJJKJT^stVZ&zogsn24L?5S7%vTL=5{`(t(b_eY9NqZ
z?f?Q5?{jS(FKx(z)I~e=8+<SY99L?Q91Ss<Ke+rjGO6$^874ZUncpyiU8G;~d`EGW
zU^Yzfe8jgjs&lhui-hegBaC<W{*sy@%Z$N;daKl4@iH)1Tx@UJnJT@~s`pUskXCl5
zwbjUqo2g}D53grNN>UuGXZ(6-51$d^d-4vL(zqMbfzlnTtgQS8iYu|ALt0&Gk-EYI
zm0LPLzJAFIt03TvwLDOgen>y8kP<p?(sunx`^M1PTWNOx<{z}c;Kx5sRwYT|v)DHE
zMt^-dBpYkZ3c1hlgG#jqkByL~+AR{%>#r-VeFQbt$Ju_IqNb|MI#pbAi||^Qk!P{0
zHq4{{fhjejU~?r4;hej`#ciY%K3pYTL1@SafdbCr;%;uh`=|jmy@XF__95gENu@}T
zdV_+lg^rB;(jj;qmXFQloV69cnl{z{!Q_QzYf{y+vUr)b5Q#KSCe0W>1$WcJr?+Go
z{nM20^eMrR+VD~>D|^$BZ>Y^)YTzv=%HK2-^$wtAyN|{uyrtor1ufn}ddKWQdcPjQ
zz?&tO-INkx7WSPB{C=Y;V&zlu!Hoh@W~%HFVrTk9*XP;FmYFcHHlKj&60VLOb6PJ%
zl_*$vM7#chpsN0CJnoXyt}pD6Z$(O=vA3}J?GO4FLJZy{St-GX?Svs3^sd*q9awX&
zz(ZYjw9-*1lmdI$Mv1Xg)oHv+QB_*FTTnDKC9@!yFIf-UWoC#-$&Q_YDx1vTU;Ya?
zEiI*>nUvThhd;^hGUpm*t&$JI#OxnSROD+M&EA?<<JS*}sF4n4PN}2CFpt^IkY{bJ
zG7S!R)TGQKTMr2>fa1y+L@`DlJYlXVW>+5qfAy1S=iM0@A!C80NJgTDzfy~EK-aW6
z!VIMM8e@cTeVu3DPPciVv|Qv&mY#LDO1|>@nstk<6oCo>y0G$^5??CzwGte$TBxjZ
z=V&RJ*}p3X?F)H8AztJMcRzhw)t-<^X*e1)xr&RZd!wl)?PW$#YJDZmrPqq*7W^U3
zH3RFnIE))t)(q+pc`nMF+v&o=tp8K{M-<Y5gsq+S08t4=_*-wQk03!y1VAoiKn+w0
zc@tWs7GzYOLzm<R;hTO4^J?zVsy<YBdpMARj-g!kxR~ZY6eqTMjo`g9S^>N~>@o4L
z9)c&E!s;9R6VfWBZQT!L*L-s1c&7xOlN<7n(9_r%u_)sjL~91!qYuq`kqgZ4yUA|o
zl+ADTFSZgSOrEv0UY=huqbX28^xZhoYq_gP8F@h?v7s!f3EcF`1us)s+W8oMG<PvL
z-JwB(B3{npUw9)y118nyOQI4;5CeV__3;T3l+t1$R&R44?9e~~>@RCLjCLb_na&Ql
zWarrCp{dW|7>?s7c|{A%pyucUj6+h|>aJWM4JL&_PThFUT0A|bQ*XVEh}YSiv9uzO
z2yL;sb!J~6L2%|6`l8NN5mAt(Yn5${fo^$+q{iXYTF2Ttp26&uZH1f1Tub5U<xs<R
zA)-qwg`PRr2-r)#cmTBMmN#6mtNCwCw+e{kXW%-GzIeZsKYnbZ%-yKE;&$4=(-&JG
z6iqjT26917zL2iCJiNk{t8n{@(DMx{3Z-h(48^xsIg6RqP076~pF?>028ESR0ZFz>
z|7b$5Vc>Q)62x=SR`Pi0*~n1_-wd$rGCDCitA4;1fGVJSUv;{0mcE-qe~{3Fb!1!T
z%uL~*S)G%+PXQxVGuLdS736jAdV3jl*Y7v3tCC?S#HsuIyin|zTiZ1oFQv=OUrY@m
zo}7f<!*RfMLeO?Z;Gf<;EO@g`GIua^$k*!;q_Z4B&9+LmeVBDD`nu*$s1}T;rIE23
z@o>Wy@a3Pqv~Jl<!P$Hd4*8(JUs%P0qb-qj`1>Abt!PhXxa_Bd7YA9BE91b%QI-C|
zTc7OD*3Zd%=6Ak7K0ZW(KKd+Yooj4V)>csO4;}LUVzKntX^YeLT0c5stY~*NhJsao
zX=g}p=NBaxeVC<K9p{RX$9F+DjI=Pu+*@yc(0>0&KkQl+YDtU5Kb5SLA?K*}i}Z(+
z=p}gG6|R7Sl3wHXI-KXCqY*QmaX!&2#csO<Q3B*je6)sB@IIAGhfxR?{dePwA4)t;
zI?O?WY%2gvCowr`c53m;V$B9_!O?U)d@aS|3$&w5LQK7Mh8bY4fs9v)c4o@r$^s#5
zgBjpH5%xpRu>^6!t}y*B2k5A8P|S8Y5_GNRisFd1pTa(DLrulO<(i!eN0c1$JyOhl
ziUj3dcTXuP5XMOS<YS&jV|YpH=1%@EULQZ2dJc@{)ktPO#J5+xHmNkz=c>P?4KK9a
z`$4PI=a)@&ILF=#P8loz2Gnlo5Fjg(<LSgBzz2N6H9nFzfa|1Rc75js;+LG_pE6qc
zP67LW#h_C97c9rO%QJzW8Q*==kiblTF!ocEFq@iRd;%-0XnMc(^`v+@e5Tg>;F7nt
zv)!<oi4eAKud8S~@9Q7RlvLBlthl}ko95GT(P8*6m1fQE|D4znVRh@CUn9)@zKUw0
zx!WclC7#Y_n^XfDO90;GnFdC<fBtFQM_{90iu~Pps>R|iZcS+bb{)~~D0fqg6|=U2
z4wx<Ls+#h?^FCl&HNGn7jsIlR-Jw@xv>*xyx}brNTU>wLUiz^(Mbkz%`&UB}#9Fk3
z2}57wY6qUkX6RGuho!4lM)t&qdO2z*QLlaT*5=D)AE|4M#-u?6g)DL=h~RFeY<+i5
zh^sQ}VXZJ_)@Dhl%Y6rA_Dg4|4>E`E_8sJ3;gP{d7358aWA}IrUJ?X0=ZwU{>fdCi
z>*Xl4ujSmz7OniSN~yV!y+J@mL!&*WEV}0?)V)($)~Do}H7@kvE0ZxzFY`a5Ge8Er
zQ2?3J|L6p`N<Y^NSJU#c$nF5j)!@WBjiCkZFDSWDC%Z4{iM=TUG~}0CRi4452%96E
zlq{`LLNINe)U8ioq3UuEvV@&q7zs5CnKO~6tl8KlW|xs=?bu~s7yDqfJf7h%=D;lg
zQ1eKR;4T5nX0MWKwS%LbfX4h*36Z~s>mdNWuYk?di`;S^bu2e|HuBk(@L4|6xd{fM
z7jJ9zOm9QJlaEXmPGw|MK!tB*?sq6gv@te%RR8ZF%c-ydBv@;VH-^(WdkiMxVf5_r
za1?q<Q~;Vr>=il$LK$RxSmpW<0apK5Ai+YhKrgr^?L0Sm9{Vhi*4`--+FK75Zs#V=
zldVV@sw=gSX3b&k-gq3@8L#ZMr`PFp&mj6eM}ggqk}9S{@MuyK%A;A8m_D?iV`q2w
zs(5*4kznhn%SzRcOR;r2lih^!1W$Tp)7=(!kszI9R{6<|X6cFP(73Rok&+Pd73J*g
zsYFhKj?kT2M3lX<&D2z2g!7j*&=>HYjxt$`6cL%Gm9)r=r<?E-U45xCnQ6bl$meQb
z_?ywiT2;s9C^MA06U<gIpSMWqR#!}Jukw6prtTlLKd#SYYNvaxk|mY6YJ~gIVTHAv
zRg0x`IR0qOWhgJAMC2A3mx6gIBw>K|)At{?wC1fnvV5u?@KrJ-ev&J4#ht~dD-bIo
zBWvjB1APQ-S6rCFa_`@mjYC9zTm4A&?8y_oH~M<rqfuI5bC-eo4lp3oxKtGixb+WM
z52^Ok``YOdPs+H6DXm4yzb`52W{Y8oXemyJR;max=8u}^-|9j^trI@1D`U~9D~P@5
z9xAEQ3)x;3Y;m>+$_gQ-6b5(+k9!nt*sm@-Rw-$qt}^J)7&yQ!tedflF;&Bxxm2~B
z^<35p{sRh4DA{QjK@AcLoo#pHJ%z0J(e}eeb5^n+Z9vVc#GN9^c7%fiHb+6%S^v(f
zUYmowRnE39G8U&Q#zPxJVNM8@%PN>CwfI9d*fc_9q;vJ~Q)bosj*9EFzs0BCP&r?e
z?XK=chqbd(CX+GHAm<)&9qinrab1XUXv;U>Bxg^kv5X$e*(R@icf%=@hG5vo<~9O6
z>YvK}W_;?R%bgJ(ujsV)HoGbE(PgY}%$3@xg;=(*w>JGV?4tP51e!7SzQ+~bG5fSR
zb5aX^czP;6cS`WGt)2$S->vqe3sj2?dX#ymv?_LnI7`y5Kei#(=5lPoomY#U=Zue?
zWAI2bXSyj8UF`m2Jg8m`%(--^gydhex!qV-otn{~$>u+x_j6E)TgWUhlAt!(!$>tg
z(Y)*wd%k$ci!vxrD@`G{1tvC&?8kmmH&QnlY0o2vc_X_Q!r5|LTqd-=!PkNyYy}==
zvE~>Ld6cMP+FX>2?}P!*zRW_)ig*yrA3n~S)xq>RL&6ca`;i$B+v_1#CkSWr7_~+J
zGH5>Cm=w{F#K-6NMxj6da)C}~5xrNK2na5DQ8NFK=j-@;6d3C!JM2|Rij3g8?cDO=
zH;a~*iDuHotZKOi0S2VLB<|B&L65$`?-`PA8m#^yYR#a==4%G3CccFSfKGC`eg`1v
zqM27h+Gruk8>D9Bt>jvz)y>XrR(7K@s6098iWWobvlTXRwP6aMKPJqn@!yL%iy*2c
zLJT?s;i)gPsXsTR+|L>tgY#%?K~C1TW2Pc6v*Sw6nG?b0=c7xA-RKIW#&R{_`nfrN
zjA^NSFhiN%+uR+lsDc~$G&fXvKaCed^?;!D#opgyR$G+?F~V_b0a)=vSH^jGYn5)9
zHz$F|#n-;H&M~#|H<6C4cgH_P-s<J<0LM+1)sC&W&SxvhcVyb>+8{x8CH<Bu!TpQa
z57h@^K`i(RdQyXaqV@~G(ycfIy$(rr2XvQhWEWO1eXJ6X$B>{W8B<q{I#Q>_XH;Z0
z<(L4-V()92eatG}qNkGzt~zkP=i4GXo-0!&23tAy@!Wn~ZTbE&BWcz})hak%exI(t
zYgdIrNcN*N{giz%KLQwAmm9Z#M16jX`r?A=RBd?!dPo92Iab+C-)6cD*+z6G1Gxpk
zrE=JorcZEHA1Bc0p9I9J4aM+4S?=M-0qw5*Pu=9rdYFXL-AN2Z6GE0!$@gr}<DuhO
z7hu^`YgAhvB1aaUg9zCwAbGK_01%rs8_P`kSlPFa-e+k=2~^wqv`9T^an?H&e^}(b
zdanK0ZhH?{<fIHTUo(X28jm}_y{eCjX5dp=^Qwiyw{Jc}f*7Ap>8@~VTm>iEy4x>@
zB_8sbZo`nExhq_|l<lI$;f8yxKEYu{S`B6TNDv$dI&<bOkf7w$Xv0^-Ka>SSjM!fR
zkkr6&F8n(pD3QJfBVDCP?U=RV=NDnWPIT(}Y^`|#rZ&li|76|rLaX7LIa#)UafbQ@
zW*Ku)n|0Nnh}Ab{Yh0@_|LPY%znG?s*dl-56Y8z*^K!D_&%YJ^zi~hbT;-3<DTW{8
zDkK`WS&O&)NzhR3alU`5n#?Ya#%#IuZ^ubi-`sK={zNb8nx0AOmX`f5j;VhRu-gBr
zAAv3L5%)j+<OE3jP-n4>b3A8mG9R#*b8_*YN*`K|dHheOAyn55SHbA<o%i2AFZ>4`
zh5fFFTt}kM($6Bd)fdt;DsGhsY-!GY_J(CJx7;=8ArUNyX*~8Ao<dY5u0ZaA2RvjE
zl0e935OzRJ-Vd*pbtl{o!4CG;S)mM$b4gD+>Mk2J#*OzcY(ag%g{}zfKnh6?!eu8$
zk@Vqb36&#$XB95#1j@%|MgdEPVCyj_VN|~)<%@AlljiP)=m8r=D+Xx;R>4`XetWEu
zm4q7GPWpHVcvR4OX=S<)Gpii$Y4RHDHZjQWvHE~CNSY18+z6H^{h(;^IA4JowU(r)
zNn8zf{RBF*@Vpl{G=N8{Oz?h)FDcdB)oH_*fUPCnpu9d&J7HAlWjAZe`fowKjllC%
zZm8W4=Ky6}rudQ3+t4(EC6ySNo^FHFr9qnht%6?IrGn`IO&c2H7s@Gq_c8_--Y+F(
z(+n-VOv)`)MMZfoK}KAviUIUq0;QR}510HmKDM|HKUPJjpdh}6%MbtfR3R*#dI%B}
z4%G!V0kHskQ1!LmEHs=E9VMRce**S|0l359>EgzVro?@wwYj5Ze1spw62=(_T~Zln
z*zBaf0>S!|pZXWJUg?znt48&zIys*=Ez!3P2_2Uc3sp8CX?ghI`}zv{f%ExG_=$Fj
z^Y;lh64z%Lhqhiq@V^_K0e<Oh(Bs9`E!a@m-7omd?LG}x4+br9kFShWhugm`2p+5h
zK{INhN5s^&Fy$#VFT9rerO^;Cifl9WdUgG$%uoq9cUuJ#ltn&Q`J5wvDG;%ktzppX
zzp61BdsOw?{qV2?F8HzaCNtWn-O@fngZ1L83r{@E3i#OB2>W9s$ZF$6ag>6>9l9$D
z4~4Rp<aAuUT_7z3L0v`7<S?B8$UEm#raq5{`T35~Q3ZGVmd|JS`C&rK(CL7{9~QiS
z0@GOi6PU(g)*5Q|UiKK$v0pWVqyKV<FUB-`{dhT|Pqr4Hx*cHv`h3hJbh(*Y;MJ!7
zLZOUAFLaGT#{T2Ga89A=n@P8hg#pNT@GDhWw=L<}<@hej;pS*z3!xNgD6gi(faYWD
zj-lx}>xj8F3Wlmz4?@1)G46E^Q<zM`SfopLj!D4WUA~&G&=xkZ7tODl^G0nOdtrDt
zBe5w`en*p4`BN>$qnA|BuhiJRcvEDwrZY_N4Y?dxxqvu$&-``z)Q0<QVc#Ss3`hiJ
zTaf^6{^{5=W$~!?(*TPFd;p+7004UIUeyp+#flZTfo83|X;%3%EBk_u`b<-4;-E4?
z?6G@iENyIFTEJ=|G;0+bKPkm}_VU)|x*A(}d(FsjVHV%6sH_nMj#A2GP;~h$TqIso
zQ;*ly?%~@apsCnBtH6t5?d@b65e8j06}o+Pr}n^-dUM~+1lp7OnAuh>_MYU$x!cgi
z&4)jcA1UG*Q;=(RJqNsP^dkTZ06>0H3GF-5#o6l3s&oO|Lr#7q$m#3c#at}AXV`|d
zW!g;Rrq(`pT!`0@@keq4mcBZN#}>MyWmASK2@x<3`4c6_lI7ZmvsKr)e-Zr1wN-^y
zdO_RWAxk&{K2d1WFmWVX7S6<E$a!|c+3)!3`RK>bgNpFmBJT2G64LpNRkLokYF}1z
z3TO?E-|%?)(h|p8wRX4<?M}awPB_v#!fvwO53d311GRB=oAe4+-1=@^_lsrd9%m1c
zmHKwM#1X>Pvp@v9Yp8DrUFQJ9F~OQ5BKgjvcHLkj!(0T@S+sAztXKO8{p<jO&-E2r
zq;5=pRokLLr1P=*HWGAWt3BfUQXC#BkiUF2o?W|<ia8CYqr^TYX?x4}od;Gav=Ga2
z^Sf4n-(G^%E%XhW&i6S7_SrD$JuW)l@50#+Ccx}o@9Ltu0kA%Yzj75dg$Xt3nhP5o
zsfe&@-f%Mmg4qn+eb(1BuDnJ3UNAvzwFcT)>F>f}f?8goQj;}|XC@OQY!u<jw?QMU
zstz_m3AA>4G3{?=UGKQUFnNidKYC5VC;(FBkR8lk<<*YeySKs_e*1DmbR*{_ZTzDE
zs*(00)7}gUKd}uAK`_{PfwhxF&gwRLyEuUF2xn>LI5B@6dB5|4SMMO}AgAuGAb1!;
z^zqAvCDDui5w?NYZ|74hQ<7(jh#%LZjjkCWVe=gR>Lgx<7Ka^N#SKo!w0ukZtr8<~
zRSReM%aWFHPF1FZbxWe~3oh`Zly?Wgqj^;(;}L9EX<ixu-$q!q1o_$&2?Xdr+A)t5
zM+(TFB0;PZc@+4kr$%jOu(fr4*z{_`LS*o(UJ;zZhch|kd)(ulrOREosN4@ri!si}
z<mai+uqv*cpxxT^c!RjE3BL&qCnBn-S0K6QucEx%W26)mM$~4HlgjaV@uK2Axqs$a
zMcpMgzgb@BLJk$XkB#qGn4E^+^ZSC}o$ts~=r8$D90jCQW06sg*sa>f_X0mn>Bk6*
z<}7!zNXtUb93$5t>lO?*tL_mf22uhD5Lyr=7gV9u-NK~X$x_zrai@dLV#Db2wx>^G
z&1Uw%5zwi)Zq1z79YU?9u07slt)_0M?2yKE=Y447Z;_M5Pbwp1=p!(b3L$(w>&xrT
zkcwj7OiVh1->huzKwi^5^><ve(V`T_pjzuJ@$&S4meXVBpDuuM_`VFixnA>0&2gb}
zzRu8#xN@Ks%Mr^+Qhg(J<*|5%pOn*0zTmBHr#8k!;(Q{1e5#gbZ#mdQa-BNkQ)kaV
z)rx=mq^Y4*!C^IWEI(tgO76isM2qJCg*qQr_aP-v!{2+<=UJ~!GXztAyy>=$&r@4$
z(ax3er-Px=<`cazrGU%K!+i21r|BlabMaBu8S(>K$7MH?kOMjEy6INt{W=x!*sDTv
z;&hlkgOhqq5qJ3QT$Cf;ZxU-Z1Ro+JJF@dLe)fiHcNtfvYZYYok|<4{2t?h2Opm3!
zqYQ>@;W^$r)j&)B#>KR$$;oK$<9C^QlryJy#s(+M6TdqUz}c1RUQ}y2V_4$Mt0ltn
zCL1P1S7%U|+PIJ!oJ!sgG^{$+-1O3T7+j}=pPglw&Mm!vBnmSnY-5Ui#9^DrhAyqZ
zz2be$7s&C%*^+-Jo@KnpY)aYmJ<erjR=%neq=B`2@q@avLXYLZy_b;BrGHa+!Fidf
z0-n<Z<f@s$2TL(0?4#d#EDCNr%_yXVyNfdRnR==6O;LC7zGh`PPktAxTbTET-R`-a
zt(p>Sx++t9g2~l6?!G0T`pVJ+x2!HWYaZtl%q;QoLZaJ=-*!iQ-hZ;SfAaq@_a0DD
zb=#IOpn#%+ASfV7KvGGjBvUAob1caj1SBVkC0Pq32g#Wt6*=degXFA|bC4X%KB(Wl
z-@V=a`n`VrkMWN|4GQX<ea_zd)ZS~&HP>9l`DT)Nagi$bW$0ys@obfk3N`%RK)rKh
z=Qy9|Zvnw!kp5rUH~){!lTKDOASNR7KJ9;_vXzbS=KoAV660XSCXNIQMb*;c%3|Mk
zOUh{99U9t$)6tzkYL23wqGpQGa|Vn|KqN#jxT4raKUki2^&pRBp0JdRfrNfIQPvpO
z@3j%WH^YsXiGuHAaU=4jVJ6{{cG@mDkimWAr<YtlBGn(U2>6{;DVoiWT~$8?#xf(@
ztlrIq%Wp~pF4SM~A2UICKwD7qzXo{hlWqd08Vw)`Sa9PkZmm<}vpd7YgG;mX#y$;(
zwETZWpL!aSHl&w|O2Z5cDdYB|=E_WOV5A41Z?w!aAJM7Pgf4(WpL=@{JLOm-chVcA
zFTm=w_-FEW$gH$heKw&5%nP-ZZ|D^O2}eS7Nh^fV6ASI1Xw?r<CUfZpw&P`+*m6Y;
z6=T!VF$AxQToiloXibAcG`{{4c)eL5(nh(jaQOuATw&+Wpt@QCA0SJqry{7YKmzmY
z{ME{SPgA)^i?I0^RQNzkcr^UW3I|vlvdtd1w7hrOU@dh>jh=lODVW2R^xb}1DAclx
z7Ak@DfDrRWCgZvc-s4kXgQe6k{S{v~_t88n2BKtv$v=*=U^BW`(eYNC#QBUr(H`p3
zyj{HN{Ovz?UEJAO*LYZg($<=0oNTu?T*Ydf1-H*Wvs@56cp16je0WhlB_kXZZiBWI
zH@}nO@$%f_!!7&=?{_<P*xU7Wj@2fV*WSUxIr5lFmIDK!TXV<*0?`Wv7OWZLb=27N
zDcb!P-H(H970<XIJ9(rpb+Ffcp{_zhw|x(p0oz1IRwt0Gk7vHjd?W>4h#IS$(%rQ>
zvF;l7XD-LeM5T=BvaF&!VLuKOdrI;B<86MTxyKD7&3bSJLX>0fc(o1{;sn<@m>sS4
zPQ%W<l=^Ya`7vki6S)zMiGpt-CiiVef_|tzC6BlM!Eo!IQoVM5@+Zqs5RKBs`sp~&
z;P=MOoS9I-J?mUO<COchgA?U1Dr$X#DXXnGxo`S|BcaG>gy2S8J)B)8EUW-i5=iL{
zh|A;R&2#vif^)~pUTr}(Eu)<ojp$}mnDQ`w99rs1Po0ft*%B*_ac;LSscQ$ONscIg
z#v`J4gh3pPqiXLJ45=yjlx`bY@lZ-t8DfRL!ZcYIt(|1KAA}PfdRJ-uy*O1@u3tvb
zkFJxv(UJRL4)5q79Q};Y?|Xu|vmzLguQ2&OG>gB&7_~dhVZ7C^Kqxav(oB*fvt@(D
z)cf+@om)gP#eTc`vkux!Qo`)g8Xw^LVX6Y%kAfRD8eFpbgh!>=qnGV^i&bU9DO}xD
zM~MlChzZZl#F)BxjVCpbJc6R?00GeX<#LQcKqA8;kTF92wUb%-j5>)B=R*s=vxIxe
zmS)jUw2%+LgSM>MnyLR9h~NyPAU&1hQZ1!W_5@=Ocx704otpEsmEj`Vm8fMDR$h6@
zgW>8`xkUMoT9FwwgjAfeR4g!b?M5Ge@I&C+Dt-oES@dOnp$+kET%!AlcGUF~ja?2u
zh$!t7gP6|H8)w-Q(C7M5&u%;SB|6SoN21Y1kh+F{WDM^@QTsfj!Fr9V>WJXG?)Ew2
zPMx9~%VD2?qS-v@*BQ2o+jx251h}CIo{uqP7Am*$!>7<N>Mn7`eVYcSm1{k|$#+$&
z+6~17NFlOI@7Pxqj4Ho+s&vndS*vd;Mx*kj(yD_M|030w>E+tc=$Q<|Z^Re{I&0Ks
zw=n{l(L|_dp@Tos@^~mMw%=Z{BF<2BU#HcI3lZ#Zur?YMMectTCSvx$kOUD&I%2ao
zN#rveNkh+!q6|VEacuzu#`aMniN<o_rI7J&Ze1L&uTs({fLlkmSo|z5GuJ(bsrShm
z4eynOv%FI{nyh^V1tojeZK6@l_N5NYXQhmpG41>2U$=c%ma7F#%nN?BNF5O&6lR6#
zf_!umQnC_^OF4NJ3)Bx2lm55VI)hdb31R|JOTaz7RLyDH-@O<piz_ZRlJqsnPl}uj
z6?>24;eq~faN|V&?SgE4q$5!hcm_krj70uKiflr}!=O^)1<`vH8J5gK-CIv_zfSpS
zl^TzU!+sjo&R0T%iWk~luN6LY+K)~sQqB@ZAl4-<gF#w%??OEv+?KvAsg#lbAu=Ix
zQcb8>AT-;Nq|<yVx6&zXf#aT~Qi6GVi~0ITezKm;KV&t9<Yl=4Rq32@eg}{0<qz$A
zFkGw%D%C59n5G>rs6DmyGap!0)!8z_m9z#R2)0+#J&AK~v>8s@CeEaxu3w~y`ulT)
ze@(?S0;rhy3F8B)natzw@_qLv2~3IR7fxZf;rW|V>-AeFm-h@cBwyEN%nA1S%f_o5
z5;aceyq?U=MZ?qqc4krs#$1)La84kcZfNnK`xb6G&$jvvo#M`2@-*w0K<JTay<l6k
z&4_f>bnm+h2d2xEzpQ^Jm?=iqQFnv|tmX6=H#bH(!)fh;vlkz}K}FkkJO2|zOxge!
zWC?xK?x<J9WutPH_!O}sld-hZsb-S9OGaC$sNs084dYE0c#MG&LGrfyzd6smEe|9K
zZhb+QYGo1y<~o46MbfTI0&mUj3sZD?W;e)MYjwI<^wUK$^U7yLsy+OS40cyQ?8z8{
zpOL>dJq^5-KQun6l`hj2KvPYW4|*h8^!!KA_+(*5^Sq&NDk=LEvE%`NpeP{}P`&xB
zb$&Bnmk8L;lYs#6MC7+~bYyP(58ca;=Yv-+vH;3TDsQmvZhQ1%UjFRCq4oKG006cS
zm4O}sKnug0KR}DQUkOelS^aNhrNnVt%#x$o#7psW*CX89#LWN+e!KwILAj3|f~v(^
zdUL~kuCHQ9VzQTko>D6K+@xY;hhF$jJBbaRjF(&qu%bP-K3{6=NgHeYO)JqoQ@F`T
zepnM7$3mx~_Dt+irr88%TG@|NB}wKF2a&aOV)pQnFS`SoSsA+_Tm*$7x`2)}!worj
z+i3aY4@YkNcV2pBuDlepfm{iX&;5(S4;7aQe8n+@blsKtKz)vRC`y>#=x%GW=V3zl
zY>fj#alctRk(&#O$K5W7b@T4Jo3|}0ow9F?QHX8LtC&3M;$Sov9YAzd++MRFWb7_w
zDtrX*hx4m4Yf?_0#6tFgt{*<ovTO_~XMXKBTxJ2C8(JAnCqB<!DUeJ(&C`-+g^n}l
zTWb3I$cj{VraCQo6540g&xveAWYTVwQzm|!EqL?p0qRvE!%wsk^vlf*u)KVUkL*ZF
zafNn~u-AQ49t@;UcOJY%VigJI3lHCz+yK-}KK~{E3k4nLC7+7PG`@-HzYycL;_cXp
zEy${_)s(MzgEc<AGOaGK%N4qcW;G~aMA`D5PYOU5ugQ-8MdJvNg~ip+_^v{?a^JR2
z<<W4$t>(?~Y-sn&<qH9F7?25DUTbm)_*A84+S+yq>cr_%W?IJQoXwjW;!%aVjC~Iw
zc-xkyZr7=HhMk^&Lpm%Q&ug692T#v`2O(6)N>ZGV8{@|;_J^$8&}RLgXxqE}4=>w}
z2&;()-W}yeeKwAOex7bweyf5ye(@8H88SsNHW<+NR86aZK1Ag9Hp@(z+HU0~MOW0+
z(NvHkLCss!lW<&s(YviQYqmys6OD=(%?Zhq7EBz?6Z*`Y4Q*SJey&gnzoO8l@Z)M3
z6%w=LN;jS-tCTn6;z`~C85s_|GU<uvbQ8SMh|42rrSQohRw8^lA#&_Xf#_YG*E#lZ
zJ{1`|a|_jD^M%<68^O)r_D<Nkx^w5V<WDwg1*=kkm8_98@Cq3cGPrMoY@8(mivWg?
zlpJU@xHAOu(TmD4g-vjS+RhB*w@CRsP<WX>Ee;q(?VLHS&<uLQ@;hX4@G+0Ls;73^
zO(&VzOe(xYQDoBbr6T67*=b?Hn2yiGYwwq-@P{CIH4{izg6Wtip8@t0R05L--+nrj
zoqRKP;d^3$fjN+%m{}0Ho9zvkiz2bJU@DIiYzBo7DPmJ+Y>G1VWHpWX0UnP64ioii
zIr^Y$>n2;hcVBe2c{simnxJl6y?R{o9Hj<3q&aNe__ly0{|OVah6uaALtIAbDh#q(
zh~ibF@^KKpQ2ta6Zd+G`Lv>^4bon}3@>4R?tN~qm>cXi9C|7{?6zTEjaKA>R<T2lH
zBF+P-^Z|#<4g^43F;SwFCG?j^fU(1HK@A(!y|gR53XU6}B(iH6g4gPPZmhKZHh9Xq
z&-kYm@jtCbnQnECCsj;+A7?|nl$%XQ&2<8465kuU3h#N#QpuMY3Le9bHrBqRL-v6v
zO=#o!kziD`6+wkx0O5wd8KO9{^wRYL<_qfrNDxJaKhcuEUHSn_X8Hvd<O~Snbm?BX
zesiNr2cwtK*V{}aQTUW<cn_9h9|MVBfHM?w<wJQ1M4gscQT0zd{Ky`1b5kGK`rUZD
z|0AJ@+<sTncG})Yc8s|W`4C7A7~uk_X@RQplVcgnHy!ot>nvqd`Rt=c%-+_XQ^fDB
ztVoC4H0$RI+DE7AB}>58gUCRkTF_RSpJ;@RfIV1{e9oR%6m9h0ID|e0xvW9>=3vxG
z((0KJY}zEo`7GkDg8ApsM*=yW<4AGpuGGjS($;&Q=WS&>f=1q~*x1g7!x;+p#fDTf
z>w`Jp-@#vh1ugEfd7<QC(TAF7cGET91~!a>TB5CGi^)phNy?Il97abtWz*IKb9&o(
zVCw7QOZmFwc0XMKVwI6-jAJ*Q6}xP3@ltax6h-i*>RfUp!AtkRRb`5j32F=fzh!q0
zc`3^By%aL+USmS{{6&FXh4!<3c1;?{ABIiLYWODQJbIHQPTg}ofF4dQ=4MhX>*0i-
z34>l<Q%MyIM=1!yHnk?}TJd}<E20W`ntKgaIHeUt>>_NxeHnvK`%@=T<2WRKiL($R
z{I3oL&6}}vxL$b>*)|b7&!!B)6P~-b&qt*))WY?6Ram7Q302?!wGCYVj?Lw(4>(Q=
zaWkwnou+zM>1w6Kc|NwK=2`AOZS~Q^gVf*6&E=_rD?zUOVaD-j_P1C~`>+wme%)3L
z$BOv8cb+>E+v%^vSK2;CMTw!)WUoV&7v%kViB}q;zbe>tgKNEos)(c|_X;m*`HU+N
zh33vDNJpF=J%Y~*+O0e|jT$pPO<##C6^hr!ks)VmNv0^jxNbM;^~%__{_i_iY3=}V
z<<k1>o-W0yj%d&3!^O?8%XTxfxSiO1mjJnX_>!0~?AfZ7tLZ-1*xuI<fgcV7i733M
zun+T`DxOD6^jcWl#p$Gd<z;`S78sw?sraR`D)ACXh@_s=dsi(v<aHPckX^1+8%u*x
zJ(@1KH-Z}iigVfS3Hq#$7w{3soT>1Q_zo4`254WeJl)mEt0&5gE_lix*|5&iB%>Nv
zB?Vkps%{d@1WMc4oh?E}zhK4qseHoWfPI`qmCH*2IyR;stho-_lu-MutCx*Y4iBS;
zrlvcsDQAQ9pn?lsG7!^-j}=nYACgW1@+q)(@u7M8Ldw8_bBYZs?p2M0Bb12_<;m$&
z9s$zc1Xb=<u#*ste=A25fp!eU%P=e{yz^uc)3Y~LHAU!P4e!TtrLZ!_J1{4_FL~c(
zNT+KU9kF-ZM_Ir->h>Jsz-5)56l2!UNQqdNxcns^-{4__B|2T6k1+$@Vq4lD4lfza
z<Qn2*Zd?d>!Zi0HR<pGxa05kN+!&>5ob=T6&WPX-jju-or`MJUJ*o<C#UhO&a$%Ry
zREG&{OjKfhaINY`@)r_@Y42S6W<ojIn<T4NB$=aIZuz9^JF49CK=}d}LCDQmF>Q=)
zEO=vS4<=Q&Yi($*4i~||OCdxLG<Sbt6}(HP)P`7wn_<U^U$lk{R6Mg9KkorWD&<iP
z;F)J4HXf0F>g^?a`V3=%+p}P56JS6>WX*p`;hPZyn2tE*{cEuz0MlWB>Z_A}3)ljm
z8U`OfHzb*RoKEB*lmd^<8uy=c>3C3AQL5`|aRl~zLUGg$YoW~1WO?SUp4-8H`&d4t
z58xNv+6@f2vuMR{DiZ|k&{~6)OVHx1<Lm~rpJ)-R77yfa+`ey09^%?VLH+)&q&1uM
z3vW<fb(#8N>%NBt`K2I<$7CV@<4Aqm66HK{H{{(#CB?}+eGlfNE^5KBVDFmDpJ-5(
zw`G&twqDS|JK9z5;Add4TzJW==QFhe&H&ZIef>ApB7X$b?u{j$X?>|Q!eT9(*>;Oq
zSc_+XZ;`U6?o>YS>qkB4EH>V!^rL))l^LVhdQ#?F(vEpXhS4Aw-a_Z{sZ@JUN3093
z$I<AhiWj37IHSeinP?4oM)jz+wYJoz>)3r?p^Lw`s44oiaPuMq@8U#VF;gxnmt~cO
zo_SR&itljr(MjcjtMwVV&ALp?PTVqTXhD8A<kC-Kf`X>AZ;|r2r>4H~M0eKqCz=6l
z<Q#VT=n}oOVSEVyntq(4qQt}+tn0*4{E7>kmkVVHi`>q)Jvv`@e*?BT1RxRxYvNk)
z==5sW$TzkaTZ2VG1{zT(C?Ac^`)T>9tW~;`-wsj9D9oQ|Xk%7Uika+{G}Jj~(*Xec
z;gHmf8<9hq$p}?avw_*b4F6mlW4-F8O@Po*qLZY4oF_{FVqKB1Z^?S|6e=i|K&FrF
zyyQyagvF!v%0jM;6gsUV`xqc?*eHL?<oI^=I!B`4HP3K)wU&_tPR7=izD;tGf&v>z
zh$yR}-80(TvpS|*oFCxLF+*!-ev82^l|B4<CM*ofMDpaGA`c^ojp{;J@c!!8EZ>Ok
zjECPzW1&P`>tc^FJ5@L;n9SiSvrJ2oQJ?Puf)Ss-lbOa9jIV;9u`N`m`$~RYtTvMq
z1wvU0Yz+Eg+6GhvH#%H0o;}LRlE#u{r{XOT_m$m&vYXR&<(TtKU}O|j<$(yqpEPCr
zV6B-ukq{|K+y;L#XUe8Gc-ib47tI-_N`P@sThU9Dlqr<kc65S=aKj=9KIL_v>M$jg
zZ_N2)^Y^gBRBAgH+AE8|4bS6qqCU-_B{fgPiTHRKq28Z2009a!DOetLAwL>DyD%r@
zjkw7zqNvHT>2Qu?(PS_AMU^VH7w%NZswVXsmw(J}N%eIK)mS+(5t39YjyJwag`)^N
zDN0a;$%-f=EhE24kLnAtRntn!^Hnex+jfEZJ5>_x;@OeE<5+r=u+Eli7%M;uWTmfc
zSKjC4dYA>GxtnuSW#ax>pgPCv#5quJyI#8Xfli;48p~N1EgD&}xmAgM3gG!oT$@gL
zvSp=Ac4p^aI=o;2?PBC7nix`x?MJF;JNcPr17FN(%!)JR-2nk|V<4sqIW&a6u%(aX
zCp%oI-t|uxx>pJWUjY*;cRasR?I2dq4sXpOgtK=wxH(tB$(3`uT{`BG;#p(gNS+$U
ze2;0N`4Xi3i2LmM|4h3r`8)0Q(;wPx>_2F?w{$QtJscJXA3aDI*p${foY43b7%9Wy
zL~wWEL&4GGiCd?yFFtf|>Lvp*{Hj7rE92g8DD;W#vEb=(d5-(`NldN?M4RFJ8&J^h
zJgx-8G4Cxa+WiIk1`ut+0rZH7*~(Tc(0oEo#_-*%+-t@yf1A1*4QVP9-}L!o#kk;=
z(}i;6(g8Jg0|>6Iv2o;UcI%_Oj-3T^Br^X7SCunUn_lFyru2+363iSprgSXN@?hY0
zo?~p@%Y9*6T@js_icBWw@)2E^a3^PwVq)DYae?L0J*lA}!?z#!TztUH`6nj_^l>)t
zuf~Xp_8h99gn0#KEIO;o1^G^U4X4dH=Jmh0wn<UvIBIIVcW2h^HexTCGC8&UmAKm^
z^uNGY8_SQleq}1l!4lm)9YYP+Ux!$WBHZ2#z*SXLV@I3mqxcez7Rn=+Osbh_wY6q%
zo7%~*fsikJG3x7*S(C8uZLRF=+Vh34I1`S6&YZHfj=-+P<)3NaeFT7%THAN9e1$05
zZASy@tsQYW@yQK9Ysa2mWpsNm=EG8_vkpwE5&_=cZt3Rz?trF;bDB?ocOms-P%c+m
z;!a&HIY&M+bvoTj*zEOp6&oNYU*0wneA3An_UCCz%b!!XLIPCVrN5}Q-6c<jQh}^%
z_vgl~pW`NDRWe4DoF`I8aJoU9g(VTVhJ!j|nOZvmcN;gK(iU&XI%wx@>|fHf=%<Ux
z9kWGNC-!g;Zwtq6n16z30CEFQfmF-?Z^^dZ=<Tji>;E8CBmSLK-Q|P$%&9az2!nCB
zAa3E!DSGK*OZ(YM)vSas?>O>pbXwop{gq>;RP^3l-E+S1i&9B(m+4nI4cT9@#Ohq`
zzvXJD@g2RVe3svNk;WN!xyjS}W4}X6t3a}zbV4q$r_-3X{ngr57K`8OrQjpG>MOeS
z9b*p5X+#S0hpu&ua-RI1TY%1=Mnb7s_oO3`Kf2`iM~d!0ADsIKr1ihz4*gqj4udhs
z`w{zlqiH9f&FSj3Ek{c=)NMzJbpKnp^Q07r1MtYdGMN5hAOUxE`(K!<+PNBym)@+9
zO&@@kNEb1g?Jm*1M}PT|RQ}3_vTs6+Z{k0dpo&B4W47NmURf%f+Tqmooie+?4*d<7
zVfzSlR9z7SOQ+-_MuqGi6=Yd}H2uE`)^YF<_kAE<h`ovcHsV3Q!mBetRs*})*^GC8
zdq+Ri>%`E;27*(nnp#p`X|Si+`Z&znQ23tuFO2A~q}ZG3gvxh6;NC&=EGJ0N|G`En
zOZ(m)ew$>#B52&vqTfORhPy^U80ON@K+YU<k+-pWh?4CUk#d-wi=NXpywef0ZvlP8
zXA*VLZXHsgjaL%@*=XJw>lMw4f7fPvm#R>qerTIZ9!Ky~f+jwD_eLiTZPL=j!zPmt
zoq4m1ee8U>Dk!4UhTX$0LyvF`p<dQ{i_4_ZDndrlEtiB2$dvI(R<{`o|M)m2hbVC_
zt|8pQg;VB2AS$~xP6lcj92lEy8Xi*Gt?@3MaEt$+e_%v;ub56PWojzd(v=l4v_HHP
z^wB9uB~npELQ#~?%pMKkko_hEGjdA*D)Dw3&VJ3?1K8bk4DHOnOK>R}M8nYlPVMji
zKT&vtg9&^R6`ON4bcB@Z9Yw93Pdo>matI0}UmYjsnDkz~^uW|;R|POoh2$4%o8~YK
zo*%Hzn7jvsWl9gc)byC2>hm1zEYF@UeLvcjK7IwbD~+eiw)wK^7R1`FNn88*%#WGI
zwx;D~kvmp;imzct@@JcXZieDthe9lM&me^2qjo?!BEZ?{Rv(%CXVB;%Rv(|iF@j#`
zbHRa3WG_PVA#L$T4*B5{M`K0}slKH?yA2hFj(XCB?uv?vubDMgU(8081#HnUvMrML
z(MKVhaj>;9B+lRjWa&mW?DsybK6Xl(`YjR&>;Z%zg3E!$`IyU($B^Ue9U1I+Fx|<v
z1bwpCi+XiIc>x&K>7=!u3vgYGL%|^Di+=eZkiI@`y8Ba;OqU9@B4{dQY5@#N9R9>y
zWeS+L4=1yDsgz!1iQewQC<8!CRU9@(fQv}FL8dpZJrG6+2v?gk+@Sj=0e_I7md5bH
z3t%Ds2W&)XQ9_^|T7LRXRjgrs=0xvuh7_DVjm}$VqOXBc9E{;&C3o@V0Uo2#uz}OS
zkaDvjnINWS+WY&q*C*=yqH<={$UG@6Qp*Gy`hB~K!gs^Lp?dICx`+oAPVnKh_e?LY
z&(i?-l){+cP_|}-qA<ybnXQzaf=Z~_50Njb(+ucnO({We4Tc+!6^6rC1?<(?Pv73=
zgl}dyrQke&&S?G~9l8eegyEkrf?{_l;Ocq;W{Zw8Yz?a-?@>E!pMDc^w9JiqZzdR+
z`J;0T`(LP#`MNl;uH<ysL?q-K4Vn6K3R5>&Cm#sT$NooJ(|>&i)H%~dy?B&-^b6pI
znnxD`VzPmQ=Kx{+{C37M6D0wMJMef>i$ws&7xn>oD7bv!8pr<h+j~qWH`m`w*OLE%
z*Z{(d0QJ1|Kd2}MP|+E6`V?Ol-V82JPH<j`&({_#{rG%GaTH2}e3vcLDl_~UxzxC6
z;<zy9;W3!${u~yw2wHp)T<laC13TciDPBE+4qo=(6IX9&Y!D*<vfQ=>8Mbl_Q~{7V
zi!ZRS(Kj{VEFVN>RNOrFv+Z7fX9Ksm6?e{iNM8#}!^f5b>n0JqeRGo&fHO~Y=pu>}
zX|n-l&6@c74S_JQ1p~OHv;6$!H`qS=B@iy5vIuECgCDpo@irAo{D53~6+rM-K)oy`
zY`x(epvp%iJ%K7G0s^`}ps7z~?<DzWU)6lOSQYxrb>Y%iVssa0zlaMYR${46wr}3?
z34E>;9NF!;UihwWCG#x>->7B?@&ONly=0j3hbU2+oVQZQq_PcfUUVxa*R8arg=*iH
zRKp0RN~v7q=~Jtua-I)8iYG4!07@BCVKSv*a!aOfM8Cd2LXp4Tzk8k0EM&OW7f61W
z3LL<(`Id;~fTECfu|q(U4R!*}?HJ7&Bx&NJ>lt*WLx}*4IjTlgnPsBY;V8ctKQevC
zn>I&fNmW-!s1xEReVab6^`2asv)jFh;RjyQic*uC+K!JEEpKthsYVEp8b5wdGhTP9
zn!QraoFOkt#Y-4u0~(<+ix>{&-Bt9)lX9}vG0GiVovo$TvzJzEn<9GZ%4Ug_!1++U
zEpP9oh=C`y(l%gQ?jL|HxS{brly5W}Aj!$u)i(#H!7a#eHViPnE0B@G;Rg++LIzv%
z>aN#7^ssW6dd>Bl^<Pem|1T$|)W{aY15-xpIJctn;pQQc9@$~O`G9SZ(w4nJV+gC7
zfaK<@Mu*W?UgJrt?Bi8tQj!N`S2FKnJZ61n0o11cSP#n_S$XN%xOllK`}wX(Ny>EE
z^v<9uA4&k$O4)@twwKp@I~IqIUcbRGmc0_T#L00?$tXTvOHK^nEj4)OD}XG5to#oB
zcn$Iq>ctn>1Q?~VeCfRA`nvIqNEr}?;ZpYwihJL8Rh3a?Q#B}Ms<Cg6bCgP(wiSZQ
z_#k772du0Gpwqaif4MaRT|@x_=PIs41OIe&Ed6qn9B4Pl&kUr=r7xuq_>=v0_vwM9
zen$R^cx*}_@BY&POxxsZ2M7SNe>p%Rj`XqYt6oj{_5yBA^F*)n>InpcR9uQ3AH`qx
zrX3U5h70~sBMczHrW${$Qyf7Olx(XbB%3_mzCeW2ZQ%x9&WVzeVp{u>k(vF+2iY`b
zk*FqHH2wn$<7)%Xs=zsE552(9aKoJ|i?$1EKfB}2lSG~7*Rz?~%)cBFCaa^9&}0F^
z+Jv+R@Ru9)5`<H_Mn>m2UEyL*&pPSq+!uL+b<(P>=`3*ES9)qHMdB3fU+2)!U@7}(
z{9p@b`<fOlpU}ysB=<c0_Q(=#N+?6_gwocB@89E&Hpf(Qq|(>oFM@_^iwd0y$V};{
z6Up1`_V9$FB3>4&-Q}TfAdMk1&^GQT>Yd3MUIRTYQ?V5~#L6gaklR>g=y)Qg`JSn|
z{sWm&vbzd&copC0C>Oj>G62)O^Wm6KX?Wu88bVT1JD!nAqFOmFyoA|ke}!!C9XKCX
z)Qwk6@5iLI#Zyx^PTo#Hc=4rE@SEJ2lKJO8-37BIG#iIc)h0@+*`ZDp6Jd-U^52;x
zRwM&@>|Y<AYIQPIxJ`6?KTXD0#wQ+yFI!Dp=L|97Oh0x|Rs@)Xl1+tmykezHLB(ip
zqq=6k3D!hnJ4*YD5Qf~d!k|8$xGYM1CYA5VN2c~^To0fo-)YL36-pw%rwa0^o}atp
zG8(Ljm8ew*rT1(-;TMyIT6Bdj(q3V59BYY}QIsd^V7VlzHLIzZJuTnqH&arAn$+26
zG&-zgZjTr1+mpekb~SNkVyNry#&9GF`6P_RIqlPB@V?%-jR8kGvUCOG-Y!(DSCxv)
z75dm%)eox0IkA1xVds%EG%L=EcCfC_1tQXG^#;x7*s47p*Ba8QuSsO_F0K_7f3PcT
zc68D`r?vc`lFP^moa}7VGjsntZicEN&5h7bh{jovHNjA;nnNTRGTrTQN1*x297%3P
zM68PcMQgprAI(hjk*UwWliCWx{<6RS^WMOJE0f@Fd2~1b#l9~X`}^)GxuqgRYjlmR
z{$qEvNcb1yf3$U8tSWg?ehhUjk!kYRCj9TeF%-qkm)*q|9>=?743!zH9|ih#Vee=n
zoMCH_ZrO~2UWA--6rfm3noz1k=aZfiulm{e+o@&5MgJ8xAa2RF{uL)swi2I_qf5>>
zs-|nM8~F|Q)b$?DkpH6K(rRCIO~h^w3NiCV?Ry6{m_4wO6P%~UB-E2Mr|XQ;4*SC<
z(YuSoXg*+nr|sK5R@Eisb0N5@l3=AC^pC%VtjZ{?I8Ot=bfE@U3Yx8`^N7=YLK(w7
zrahjJq~Kbu%F|w5qKKp~1P^|qjTaO(-Q(WsUWaZ!ooz70xPt)+In1L3)+~!dWJoDo
zz)OBwy2_q-rT40FI3-4Q@CPcYaS2DjOT0YBW`(zXc)%4EW&F%3r14yBVyc}&IDNXe
z%AVx90qetz=jtfd;R@j3P=@1@9kNbAme2CA;kX)o?80?z%BglrQ$jaMXIw*)w>CmQ
z(bnYv6BU+eafT5>rMPW8%Nt~GjQX~&Af2^(`V&p++dk%>=E)Y;O$A|6CDHui->yIw
zTMK_39|9z-Ri3MGz7D@`PP0Kc9r(<QWq!cUWI&Y%7lspe=Mg<^a!(DXNkBP|Ci*lY
zF`&#uxTFaPM`qHf6Z|?$NI1QVNM~HT_4D#dahN7moWWF2=L&$~<vh=JQ@W!<p6fE{
zCT1vVAbfq6G+=52H#c#?^VeBz;ay<>b}E(190~u%2=Q~&5zdNN5zKLu#4h7~m!DyB
z_VLpE(&-f*Fl1fjFQyZha0D}arvHptc4i5`=k?7e38&2X$X|~pS-R*makM2j_(NV}
z9{L-TP?>Rtxok~Bov~!isBMxkqOh6k>-CQ7?WZdzTz-w3^hiM0+Wknqzr6@K`~ss<
z*2zeG?yAnU8;sZxF*DH_=4{Lj9_=WJlJAi?SNj@q%Ys-QP|#EzF%Kx$qC3Oy4otpy
zRj<2ZuQ1uwNgG#j{7Urt0y7V@`ISv7Db#b1!`W29Jb8LPije|d!t@bGNJh2E!bhRW
zr}u$$&bQ|(%yt7EFCvoe-4F*X=A;Nu!%I*!0a8|-QW}noF_=zRrQM32gSSzA*A@>s
z$$j^5)?+g}U@YKt_$Qj725eBWksXkg{OcW&=X9fyy&G8}%CmO|w2=gjY*1~pBcHnu
zUq946!v<&wS`%kb0f)f1^E$eKc%8$!>#dYm<b-HUdRZ&W)SP5uk2i{1=a=T<UGuE?
zRw&tJjmsV|w$!3;S-6p85qoWG@F1$c$3|BS)-s2CY-qnR6mA5fuh-6!S{a6y3V84g
zZG-ZtA983u`ftwG%zWYH$7tAZXG>#rwWg$s;}tq~E`@)dI|mSx7V1Pz!`3<q3RYV9
zy}I%fNgdbrRiUyJ1J7j*a>18_2fTQv67;aJw@uc&kGd|4H8tpy84u`q>GE0p?!R3q
z`(7dO08NcptMs<7m5ROz3Kl!a6dt{_n5jIfQie_B6bHCs6DcoR8jy}g9lTEnveyUh
z-Okh2DGKI!CFOb$p$-1!rltLI(V($cpS+J7W~KmgvG()Jdx9BAi)gN85L&-ZR+=v{
z;l^QK4__chlhefWnz0&!82)tqZzNbXQ-%boSk;?zH)WqtK@&rI84MTF)h$akV@QMf
z31_q_?FeQZ3fvaapyCKlt{aYq>hIjI*@EJP2DE63?|a!!Z<R}0*Sh)U@>)r&Yn0Rq
zI~^f4f+gAleRTW^UbK>%Z%lq(sU%pe;sMi=ej2-MzG=X4@WoDkQTygOmr{kTvZk4$
zupOpLQTLug-nOBv?)iE#qAWtK+Nux#R==F9Li(F_W>*taUgX)2WR3^kAV4<}uuLr2
zG)IN7_Y1}?jAGeDO`iMVzg?(eS2ua^a0Gd$eE%}=lQ?-0l4D`x_`4((4)!Z@OvkXR
zW>w3#h81KuwFyvFQm!>+m30uUbc8D)p=M^hf6^vF6Q=1%So|!{n`xQosoAZ0V7V#K
zKQk^Tz@HU@WWFw`t7)uy*+UaVSp%}t#-)F|fYewoo^c>rRqA0=F~4}NED6M9=0A}w
zjm0xJ-TGjX$U4jQosEG8ydIkno$kT>sar@lmSlse_y<unaq#UxS2A?%r#o>mR7+~+
z@0{4_0E-Yr)4Y)DrV%#LJ-q_Y4=*849|X6?k{JpHn6HVdm6u67py8D3Br+*vV#;0X
z;LWy=tLd^j7<uy&4FHR>R_lJRIW0|<J4|Zl=?AGQ>XOVa3t^@3mG<S+x5o8l*4CS;
zwh_S{PouWSu38={lIdMFYx|;o@Hfvah$pXn0S&;YY@N^1U%%jE>WZN|GRy<1LEqqh
zEzeWzCJwW)Feyw4Cf<aqwRM84V#u4DbErnG3RiB?;EN~ugu_VJ=!epxO5t%cQt%t0
zpniXOZ$DiNQb0>?LiUqjMqGKXlimk0b5kA>DPUG_8X44ca>mt+8hS5ftiBN>{X887
zXYm}vKfV`nriz40UdTxh!IQ{f+?nn>oc?>KaRbQdhU4d-Orky^nGUsjq%a;;SU5Q3
z&+#>Bit!-%j*CuR-<eBAfzvnu3gfi^9>%qo&*7M;wRUV_e|%?iw&5Go04sYZc7l;@
z=LQH5^`M$;JER5mgc&uKG<3F717r~{#M~gmiH@I&duanNOLyL|qiN?mY`uEWd@|tX
za?pu)kbB}J#5S?LTwa439L`^T8UCR=Nhw1wWaqG8<_4i5IinqSE%?06VD}a+%KE_R
z5~s!^($y*1(ioo(^e+18pcVsATFDB|G+^>ysI3$w4ImVuqN`Unu&bM6(|=cx`fp_r
zT$f4x>m@g;s5SUe;MKIDC}L9l7^$%t=w>@}RdJ|zB~%tKsU0PkfM9tY@ZnMv98_$k
zp<=MAUPD<ESSEsUg`|ayBlcDU48CkHxzJg}U-&d^Y{=Ksz=Zj6to<+f;<%J%XM2wm
zmDYA^s;%-pu+u`MzFnFd#Jl%>&6Mc)Wd*@b$pa`NeS*vH9(+)ykC;@Yabpw|K5lp6
zx!k<to%~d6W-BONvRB1iaXfcM+XZ29m-`@3^_yS2QoFLM8GqKRw1urc$OBjx>Ea&B
zpzidj@Wl`AOE1%FD~c~#wozj(M~Q2(pE-n+1Sz`42S=_#%4NxtOvkI=ESjmtFwL&m
zzVQGa8ec!cpk^^y@zoHK*D7sFS=)46@lVsjUbA&&BGbWO2dXqUT++sdZ`?6YEXXB;
zt3X;*?Ez#wAB06h6m?gdX=iAb^bLRn)%X&suyXV04ur~x>sY|m{fGwv!g|TIjy=E=
zYunAGWk^d4*CN9$Pc$D&r!cz*aK%)82aob_n*BwMMIK4jmN{d^0n!NtshEVV95{AG
zR{kZQT$S8&r53MMCXlZPl{JLxWx1pR8duf%4kN3zqB>0gW|~?W4ntOH+K~W2XVEL9
z*CQ7N8;kAs^L-c%8e7-_%wB-nB4#UQuiXE55~I415I`{VVMl-^q0MZs!wB$4U;jkg
zOhEyvXibAq+Mj5i2p})=+di%jsv5D6Bpn!PTq4K5vX@_m%4G9@*?4(hPOg)JSr;Dy
zJquUD4}*(_a*N32`Yt|wSDVl-`7A8hR`vo%b(&8#cq{XmFnx<rt#0WAfpwP$9BDBU
zU#8ueqwqR~w_QV`<t~mRQFZ~4zr#tDzoCl2`#7mqb^b4SD0Ixn0W9*b9z3I@T!h)7
z;9bPk`JcdK<B~jJD_(~n^D1|>#9LX<9&Ew35vU;Z?b;E<9;?ViC#-H101GZV6(z$q
z7|lmymVu?>GbW))&~Oa^rZK)(0&=~DZ+ijY<nMZAV&X^=!h-j}cLmT}yFTZ6=pCTY
z*KgnrNQ+_AzCNyr@h=qn`XGRG{ZF8qq3bA}I_eAHM9JKwB9-+SP6VKC;OIZ?|6l(8
z_jMuH+abOQOVaO@QsjT-7{%@p7I%M71R3U82ADMu;JQL(`2G*>(G9nuZRzPE0r3Ue
zp(nvSY218%-BT9k0kMzI(PxJ(pE{{LVG36aUKEgAc#@u>R#9}7r&7ajiHymCd=-KV
znl+H^%@5c0Zmu_s<;}TIa09-Qu&yZOYQ^LEMm#~^@D0}tZ(t$0F%VBC2B}JV?^!;g
zk}<PlazsyRd#r-8ZoI_n(~#1TD*fCw%A)x6mA%)7Ihk^K;0;@ncDt+&6wPwR2$L%|
z6|{F{miK7->vY1_*As3(D^<g6_&iPZtDDw`hsZKsVT<S?hiYoQ-GuYF4P_v@`L>ib
z7$c-Ouu<)@Z<WQ={t<w3?PDfiFC86b;)&ia+d7F0u(KQTr!aOMO*bo}0Z>Ujkw(0a
zK%Hi}=$cS8qj>y#m;6S$MMNx}ASZNdyU)WS;9iUy<3ab=PR^))9EOy`LIv+KWOQdH
zM@pUg4eOULh%~pI1=$+4pdo=xa*kT*qvf<=hzI(`n`#yu{YEC=p3sheUe^hJ7?>$3
zTB2EtAB?F)fMvirn5nHFrVdpbtObR7<g&kZ)OlX-#s9EId3*M`?Qn_j1NK&a{2dFW
zX-Z}bGP)a+(y_J0)mr7Uq1zgc&tFtqe4)+Gf1H|0iym&n6xCklEt5lap-gaF+ke&Y
z2)sB5symGKyAV;EJ7Fbfa`pcz2xl@snkOPmtGN&GDKcp9wwWeN)wEKV6d;qGpi~#o
z=OEm?j)|g?Vv9_nxmHth5W@>jGR|TO0G~|VA1`jv6ZEZ)uYDZVt4i44A!x3Fct=U1
zQ$D4EYh$bZ$~#JK94p(SMsz;y=D82tU0GVdhvvo?XrMT{x~lA2n3YsV5icQoQ%1<n
zunb35{Rl@jFK<S!tnIVAV%#|L6<wFl+&m8A_UL0Kj^iv=%mzNaiwDW)j=%RM$pi0<
zggjH?B;78TRm~|tXHQU5uZe!(9q(teLTnzDKmR4W_EGNe^JSu7mdwxJ>4wh6GpnfC
z<2WwCQrz|%1kQATBwL(@|MxJGc2|O#ml7YJ*}b`$)0LZc%-&Y5#J}L<MzZj%_#E1Q
zrHQ}&+<6yBsGi>FNa6Rli2osf>DmdH-q`!#Mk~pXxTe4TV|T2v#F>9z{$#^5%RhQ@
zgc(?1!-0H?QkAza_>2H#GfDC?`{Hew&Ct%t0BEpdo=I&vlAmnSKbqPzzX2E5d^iE#
zsrk`V@A+FI<gFI?3~TFX>ZAVA<%p_tZ<949*Ho8!Psa^V9=_yg#zM_Ue)hw)84ESo
z%n})~7epiat0Z0M!|$#*`)pODL%EsE1kBy9$I`-Fl_qLTL^*9kG`s_wPJ51WAy#4U
z-gpV6EYu}9;Ud-TMU)n412>$Cz_=CmDdvBiB)0x%`*(xzUMOus_F1n9Bj9Voh^GD<
zxv1NW<Y$I|=pm51e_MsKYpe8@ufZoM^WNxUbA|42pUO;YgT2{7$-a!@la@=LEl1M~
zp4+;cwKdID+stMZ>m16s`8QTIRF=?_!Tptd^L9CiNc)@lhYPIk%iMt|R~DnE=yzto
z2X(yCkI7W0%}KvrxVZ0_s&{l8foG|s0j<=W4P3+bVe7iU_&3N7C?&+;fe2oEtueso
z7k{NCKNnOQYmj^@kwGq!;8`Y_t5Mh!o?k-Y&6zyj$ac(&7_W!{pG*LeHpltiCC6Ly
zqinl!xvp-pRDc<F#YZ+9m~8KBwZ$T9+Uwr_Aq34eF78;D>Cs`kaVND*e5f~Qg(v1Q
zytM<z|KrZY@{+aG?Hz6SnUCilhM?#h?^|8@*kRPN1Hu$XdB&{q7*`DQ`6eKIlHBdG
z3hVtw1tehw^Qf{Ebx>udw0ccO7K7vsp2EyFH+f@L(;w<H6RAPYqlL5stIs+zyq6dC
z!=+BJN*2eSWsvoSB`t`+#YVS-ym^cWiczanp5U`2ifH4ba$^>^o!!$j@=vXi8_ivr
zxyL~o_hscQj)*D{_-Y&9+p2WyU{V7d?7Yt;&qo|J8%w+HctabQayQhTRbQxxOjh76
z>teP!6qmHk(ary<`}XKD>2VBmqhW#%ppi9RU8^m|xZ^GCj6QY&6$@Ab!ajty>@BSM
zIw!*yGkrbC>N0Y;_lh+WwYZKqBHu$}sVTKM6py-|%piB~K*bZp?;6`NF={BawEQ_j
zjk!SG2&XMds6LNLP_9RGa9YtL;XA7AV@~tamd+b40Rli7xm6d(XU&Gx!{N^3Co{-_
z?yCJn3Es@?6d(l&QQbm%#k0VuHAiU8%y$A-(UQV-j>{7~e7T(-<sI=Q$v<gWEU-H9
z<#|NAr$xd7=dDHBggEGlr2}%f(-@4atTPVd<(&`ED1KdIIW5b+AFMnvUf~9vTP{I9
zdqxfe(iKW8Ig9W0eN}lCT#sp7R?zvZEzT2Zrw8CG*kS5FHk{;QFJvsXR<Vx?V;0O`
zcdLd_TRmL=*7w!tQ(eENW{f&U@l*Li^I>doZx-TaM7|C8XZvn)OoyUy!<!R@twZhN
zcixHuBR4sKEKGeW%9X~@>#9;7pDShH8_EekE~XfnlAgL$b2?fxB|idnb5{4V=TKF@
zn=}fqZ_ckel4iu_q&2MyT?i}ZH>3(CxtR~E?xOtUX&R6j>tU%bope!)5)wDGS>`JO
zf>!nE4}Bjfvp4m7rd5nCfM&Ix2^c<eQ?>P+sg}E^7N^hLYTvajLv*%dnYd_XuzX8M
z^-)^M1<F*L&(^lOm@toHM<dc9zm{ZFX2|CV$*yY_y})F^R+OixUVi1V$~CN10+3yy
zE>He%D+a$<0w@A@P*3b)Zh7a-*|~LYT)UM4?cCKlR~VDt@THjv#CG8h=nLN+faQCj
ztnD-uq!^@P#}e?eU8OwJbR*(Y2SLSB=ZtsW&o5pD070C50B1x6q4guu!<~jO7PXS*
zL;1O&%5*r0R4eH%>~)BS?Ildip3k#%K|1g>tzoZbYKDz}74;=pw$66nD`JrHlS3;)
z3N15FYEH&nR&q|1RT_m11Z+d9Ysbo(IDIdDxY+&0!W|Ec!Tu<jN`l1b<Moqe&nCQ0
z$+D;^1z*Hd;=Bs(HXW*f7p1yk1*~*J+U5B$SGB`)*SNg-LEL^zoAZuhs;Y5%N>035
z3_u}4Fc*Tm_UN|L`EgZ?wt?;Y^qm>{lSk)(XoR5rlUzXF-{c^{OmrmcN*DHY`78tw
zVIR-~GUCzmQ)@=W#yRZ0l>fmw$sxX!!F(OYhvPwl1+Q=F9%pWt(x>G=w(8^=bf2K1
z58wqHg5=8pV4W{ONwmUq-zprZ^f>5qyU+mH8@{HfB*;D`5I0W{;14-_2c&VpfNJRt
z-4AV}2FDYRZysg`EkgVvjx;=i=~4KL0NWUHh6gARiC@xm0@={NPOY^rqR*t%lb0&?
zP>V72hus5epvHv!x@0zNo`?k4Q~}=?2#p_oyYdobxWI5#faDHu*>um>H}3!V{n6o1
zv{Uhr>zXx75LCU>H=s44rld&hF*7Hg<iCBwb!rd!>zt!mkL%`eaAYTi@}!Gf@$MO-
zbbj|HNq|rMvhGh~X~T4DFV0DIfxBcF;DiR^0T;K0?1fOC0qHc?{e{d^Kn?)7{ntIt
z{Av^fP~6=pt0iOK`wCkIy8Q%Tt*=p#c3#%Ou5Zv8s~ejh!^`xs$1vV-`n1?(<tR)Q
zF}ct<PyUaV4q+_<u+`Py=WbR2-Btv~Gw*-8y4l{!KQ6IZ_3JP~!uX<JWBJc33cPa&
zxh}Z#YkovCWWV?vay0^@q?lk*_>fXHb8-Rpw_Y4}?9J-~dC`==CQEy8^1*Nj(0zsL
zMh73axIi`=e@$IxU<*q3uRi_zpv(U?9oVfIfT-{wN(3-_CM<zYq;K6`7j6K?yh(cA
z^4tE*?>=sD?PZOiYX5Zj3ksOO>#?M~o<qmI`5P#q-;<LFaLa*dPyX{%&H#;x12_2o
z9Lds&>&^#c6a0B#bB9l%w*P#3)*yd-Dg=&Make1<x#w3uka)PEki|dq^k)!n9;RGB
z7Jysuw|Q6(e`j+|w*CEh7Oeicw%7H*^qcObN3>M~!mhBRHJ_GXJdQ;aI`F+Iltc~6
zOF>FUhP|#dVhjUxafx(KPk&Fv%BAxB%tvOlO511N!cO>=!5*6vo=r(niP>+`Y@0F}
zazLE!9KjU%=FR3Ik19vp6~dm`wEHT)=WZWPa7ZSse2sl|^5}FTI|Q$%o)K1*neP#B
zlKj<Go;}dn0hqw|Nm@0!@R*$z>`PqMA32`wi}*j$o^@WG+;cTOCTuu_>G$r67oiR=
z+K<j*2Mft43>U;?o#9n8P~odp#uC!rf0B-k!oKI~d`$MYqg$`@23Ff4mpAuEGTvn*
zAJL#L0x!)rv)+|^ZVAJh<_qOfEzu!cj2>&plwV=*JY176PZ8I*V|vUAn7f1Q*X>LH
zdT#^!W-|eh`LFNf`XX+4yO27eMtPJw5Y2CrXMaw?W>p+=u|pcPF2pFPPkAz{yQq_S
z6~J0gIjJ<X1*t>rogYEYe#iqgegx_r1|PD{eZV^<gDk!}*9~N$3jg}1Qj`I`<FtB-
zQda>q3MtAwL3F0}R+)t8!^%+m>9Gu8h9Pf@i^e^XCZ`AMUrEZ#s?nrsd(rHJiXr1=
z<%0Cv!bL=>i0P{&Hl`gckWqfV%aJ)>6uEe<Gmr1y;#oZ?b0i<L%n&|2*$`RP&ViY2
z_2eKOq^u}R=J69Pps$1g^WpDw_X#5Ga&+NsVSLS&CqXz#a0U$ImXHH2^pRVZwg~S#
z3W@F=U-+);sFG&Km8eWANuDHn>y%lHvK)nT_)Gq3x>{)l_s_yP1Yfz<Fr}~<d}%P>
zq&x%!#Xl}mBNbJ!AGypg42NT?kCd^}0d;FK(vn6~g;TIzC26250h^}r#&e7n4tA~4
zBr<zesU7w>*}!#SVhy#h<F^DL7cndbG#m{yrcYNXRYCB2NntY4&H%~CP>{Uzkbw7X
zqM<G2=}0sNbbO~2Lbk4UJ~-M+xd-j6k=JYXQ%`yBOxN40R%x+IDch16CpS1HM^OF5
zNLMtB=+A`K+n8-eP>}8XurV#p_WE<iZDF#Nm^7E01fX=$>Gw+SF#*{LwVhdc5+ZXr
z+QP1YfQFKA9-oun(zQZBByd><LbR@kUSY-o4u1eNu_NtIZ0TQV*(%_}SAXfDBeGn!
z9ij(h_cnxMM!L78JOt$RhO~~@%0OVgP}zcY_N?&^lK_*wZ03ZE@zbmf3u2%s>DvA4
ztgio;udit7h34$xAXd%sZ3l9}LYB#}VV;TTuv(_zrD3b-;tm!|F{bTL6M&M~4S8<v
z$FU!$2pM~OR@1!Hbtu0SWY+Vc8Usd`iTx?^-Xzaapt9mq-c&+sLV$#@b0E`$tsPmY
zS|*QTI027ejAr)j48(RT1|qQY=1_sXwHthZ!Yc2}Um<fqe-sVj#w&`0xOtL)Zc^-U
zJU}~BEv~4(s8a+wkm!NcD6BN#eJJ9Yt}1vZ+VgyPdZv0zOi^O4EFo`;ei@SaW);4i
ze?(cK<%NS!yyF_TzPP}yD-x#Irr5%;z}*m7vSqA_V8$N1GxgyK;dke}jLWR*e!0`B
zSt}!EN_R(ODO`19{^?yv`;|6Myp%??`EulTc#s{x8usy#Kj%t?hd^mud6w?u-cK8T
zG}}q#`|7wi$mJg*Xcvj<4;#<oM<cc5qNBq+*HZRChcux%j`quFc_$(3Dh(qn_I0D&
z6UFWJ6^3OLzK0oK<GFVpd)T*oIwwzbwmux24w2SoJ+&_g4`Ix)uw7Yx?cREn=jdbX
zMpnt=*NQ$G996?hbaW<A{Gdg2!IM>Rdw5k)+WrkmQ+{4-P&j^aqytg<^H)Z}<-Ps6
zNM!5NmWsZ;CWitYs@8?PdCPzaZKVx(#;|J~(<Cn$>1bJGH)a2*tyZl?1(oOawAdkz
z_yN+A!i8R`k3DH@$>u9IiBuUvk*8zMq4tmq$xxnJDQiGz5}i^<`_fj|{FparJ<Cih
z{M{B4*Z{#~<B`W){vjo_(~iV;=zUk}QLkKRsnlx9E#WJR12Qr7SOMia&8U<0P5W}W
zhkSW1b(3C#Zlhx{8MWQ_O?EZ>8zovhGR#C(oTH+yTodfRz2NjxhjNn8(m)v2Kw@t{
zbdy`e#H(s*`b+y+#?HTA*MZ#E|Dn29(YjfX<0uvxC$B83iB0395gi^&b5;`!im`NR
z!<(y>`06Erazv&N(r&2xb57R<lQu0fLL)cmiDwF8JpH3GAC^7OWi);rTD+~KW|e6Z
zmVMWTbG)JOF<<1~LE=*vpqx}k{|1dtmz)^dVY|GwOMOcfiQujLy%zu%>Le?6m_&hq
zJEEy@aQ||IJ(>j++q0`WGWcaPipMVgP}#uphmTAZjD_0bupod$&21c4O~7_`gy3A*
zQ;%4cctx_b%v)S>U8;{psZaPubzz?^NrfSqbX}a;_Q-Ijx$iT}^Z@h#5hc}oe2&pi
zDiP+5)4(|O*kzKas9A!}6RbUMKEqfkU2ZUF6GNlO%Bqj)@w;MIx9tkgTB3rVH9YND
zDEi@@n&wu1=JHs{+bzbRLGN1~T8N?sl{W_neMut1J`}s&d`fzyDOrlgf&|_sW?pN_
z9Di9j3KC@wba2SbJyCM8Wgp-v%uc8_SkmLroc!`Hg<kqLr9cObN6C-3cQb1>&rNlU
zK7AS}(<Al%lcLh@8F1UaSz?LWE;}<mCI6((z*B+HgGpOVpO9XVf84bM$UpYRv(=?<
zo18Vi{gNv_NMp`<+deJG;Dq&sbG3ID7M?A1uOZ5ZHs{vk4>u1f%1f<KM#2-&dke)p
zaDwCWj6>@mboolI1Y6{`it^OLu{88WIU!Qr*J!H>fVNsp^yQ9a`X#L8bu&W>nfty+
zgv%&9GeTMfi`9??HxU+SkI%-P06c|YT>-ywWVnHnzOVS8Lrh>mvR-*YuCS+5RYLUD
z?4%$A@FNn5m8hGZU_?;z%I~v7F7vykJv55XnWF^{pP~n2xsJW?zkVtTKQ72PaeR8y
z5^wwIf3$bjQEhGO+7D2Qv_L7O#c6R!p+E^PE$$A%p+KQPacO~4q-ct}x3~m?ySuw<
zafcQ!y1zyDk-g77=iDRrj&F=R&L0^-vL;z`&b5*`-}m=EPlc|2&WJdhg^KX}1|PZ&
zy9+*ym2J2%aPw|97HPnO`ifjVtHt=@r93^puQfihT5zL3S89WgD6=+)WHlI_I}y9P
zI-)oBuyFyVZXL^yOFG@AJKw1o@~p__SI-%y=FsIwD<rLmadsoNh@*msbOYN<``A9l
zWp(ifqr(}rV29Vr%A;KOooxxef{#mLXmo;Yi{~VWn|K7h{E=y)3lwfRuLzP~Z|b98
zmo2$fG*`JgUi23j<N-F@4-TBe(7uAXTrN5llCgFBm9W~Vc?(qFZwiV5!)RkEYQ6_X
z5qg5Qi5*XsbSfgCK7#hIbhT6Z7<lqj!@@o?Hs!yIl_IiMlA-T6ie;oHmAbWga9>f9
zP))naNz}$h&gFP3S2aWFNbaiLOn1_LuGNpY>m*CUmaG<SAj5btPwgWvW`S=;n?{VL
zy_ns}mP0jm`!1MFp$;Vn@(u7D?q>5>ieA7*S{q^z(|-knkE&V?_UE%FI!$@5-mp+l
zD#^&`mZT=p(mK6K&!JkCH<#`#i|C29Z(0~H<N*rUi9dF{tjmi^+UZ>fjGoP~rg>6x
z^?JOg@#t!7V?z*wQQCUlD{Z=yu6m+b#6li9)Nyo_jD1EUi}qx>Q{VJSKQ6r&lNB{m
zhD6nv)n|!jLta}B$%)TaWzeJ6e_ti(&1G*z`KIKSdEd5HjkHRpC-5>Au~z?#b;&&T
zy=ab#SE+7NzyF||Zh}YfXqj-gikJ7%CTx52J9oyX6p}8jHTDolq4zE^RJVXiBrxKL
zuxHAaV$fyW?H%!&vo^Y>QhM}64KxFw4&(Rx!>>l-V=A=Te0uF5rgd*1Ryyz_cAf5b
z=eYLjW;31fp{qvOG}uh}0A|b0O;@LNtj46AeNK11%z@nD*;^6`T)9w5s|?mdSsyNb
zt<Kcs2N<s_FXhSPRiy$~1g|c-krO<ae%@nb`{&5hTfOewu@Kx_j4FTJNG_S1Dvt3u
z_(Rzt(L8OMZ{8*YkGA*;SI~p8zUpUcmh|j7IH!ItWqQ{H1b5Yl#<EJ-%(6^wa@<tE
zCE5R--wDbHP2mG$WfpP|nq_9XZrO;lE7-Y$_~J<mmmku<s_Zk;C>cDOdiu?H<~}ZH
zrDzAVCui`wT41jzyTIS^M|U?*QLz9}qtbCza_&yIajyH-AIfkkf)pJ}e?O_gB`I+_
z>Hhxub5995grlm=m0USI<ss&Kg|{<Z9|~3r7l2^N;L2~$pFv+EWB8t!{lPmMVgTRY
zWMx33lphYCAyaQ8B2*qkRkupX%;-*r$cKCIQ2r4A1>~HlKL0t3wvbFc8J!S>%Ro&9
zl#euy9W|7CJx5wx<NtJCe)VEo>5OrM5s;lT+$7PT@}N=wPIRL>$IMEyU1?YZ#UW<F
z;?AFzRm7mh@I&nihoqI1J^-vJ$AM)kVHbf*7*NE(GjoMQZ}&WRyq+8%Sfq{_24aJJ
zr%~=2+$`!*kTt##0>z>5g$VdudK<`2@tw_+II3zsz8BU+sQCuUgD9F>`Eiy0o?4~e
zXyYFI2QqgyXsC!yg52;s^U3LDBIVYkr35pp%gcg-o!~CQ0fVYcil5eG6e~Akj15#+
z2zveD@$f=!O9`1u&EC*xl`TK877piuv&82o#VA)NbTtz`G+T@aOOhkh5{I0M{_zyM
z@&wBjVf5F^C~&Po?26E{Auy8VRJPjJ?mK^2#2glQ{x~~%o@<Ng6=t87BrZjdY<0ZN
zVQo16To-g|N@S%OT{?O{>p7Dmo9JUAh#06$M$x|u1r<3A6q7)o+o<T8bK)woc1=n?
zgHzRvS|?I3s#FFVK_><>+t|8L!*Mtn<e!7vu9Cb@l1HYHRws~n^|Fr5eWC@>*tD2p
z8ftWn6oWbE{CgT0v2Eu33&sPQX#UR&c922IKH>49q802oz&A0zlFTY0V^)eg+_2Y8
zXRU_M_+Sw=>apZGcDR2#o4P-`p#|sdrDyQj%wpedRg9zr(jOtw5wbZOWS-huEHN~)
z<cFVCbRU-mChmf%+5-mFZL%=yUn9^v9yO!L16SYOeD&U0(dS6v-^@pW$Rmm}wCVN^
zZZJU>7D7%3S4C=gmB2YXTp<TpPdh_I7=TUxLV1iPHP6H7szoO{OJFRjDf|(pfZ`E!
z!X#&QQo@c0oMcY8J>sy1$mEzFBlEkYA!z6<H{d$`wDLhCKc8SA@+^WZLj9qzKc}sh
zjW!LI2PEQ7^h?&D?!YHs2M)<{rL`l1qZE{s2gzL2rY*8<Kk9kXW1p9eMG&ocOQ!*z
zyma}FF3sGFKdoLICU`|v$d*32Q8CEd>Y1K!-kH9zHYvDrnEC#6x_i9JNM5<{?%0<3
zO+inAtXFVqZzy?LAF|BKJEMPN@@@MIWVuFb_6wxT=?el$N$|SSK{wHl>~ZZk<E!my
z!EKxq%(LF*bCBd8jo_ip-agQr#<(8kJ>V`FY9k5&5gM>}6a5Yet1fx&1jwY&7>YF&
z2v0m(3@&qgBWpJkVEgqSAi{EX)BY|I7DhvVOZ_P@pjn;c1{K-w@m6*7#=F=J;JudE
zR8ws>hQ+;f%sJ{l(`O&VfU|E`kF#}jwV??`M@z-^s_wjA$Sp5to54lb>2p*{1H46V
z^aIN0^R(K=qxGM@7X_%Q3MHBQ-IY{GC1v%{V9E=ZHOgphrN`tg%jasJAU%i+?<c9;
zO&6)<tAk5U{Y+y&DJnh3Cu!*71!X=Qp9ijqhWh0+i)R7G%53<mMIH^+>zQ6$T$ef5
z@Z@a7>RDKFDuNPUJIsF}wj!kwJgm2wV}~ndlI3TFmk0}icYc4Dn>?QGc;Hl(ilE8!
z1AGu|ABVL+n{-0@BDAJyt3l{m1@ci0%3fJ=n$(<ohTD**G1crZ$l&YlzlGP{-fyMV
z!PivYu$7!@vo}1r<Yb?p=c+g!tnBJAy5D`Wb-j3pA(-{U0<sw-DgsrWkPDvqf@ty7
zUi>~<cyyHnHU>bsqOm9hf&;_SeAG^yMu%hyu|e(2J2qCt)?-E6@qS@6)SL@;cnjY@
zZQw<hP>=(`#*p+>o1A5a7)mF2t%bfxIIBDe>b{-Xo;)n4wOYd}N)3IM_&7tU)X<Ba
z4H%R@U30c3yPxSmSxf1*heChI^Nl0Yud-;c+Z1qx5{B#F61Tj2FK4R0tM5uGULxH-
z4rBpoe`W!zdDX3w44Bi%)RSB_-V5+UX{F<bhygu_8;Hj9kNEn(^Ak%9tbTnxBUci=
zwP);$>AU`RgW*<!lL%BmdP1@=`s6@~5dUro$!cjG?{*t(-=ca}4jd`rlw>ehM{9ev
zJn58aR?bDiHcB;K_F9jeg`m2<?HAB$R(dhsicr@cs4;J%0WrB9q5AO22f>1h@WZ`p
zVNXw>mMtImIS{}96*l?xCrbKKXmx=yIwx$3SkSAOCM@X_TUP-dcfV0gHd4DQzWFl`
zkggwGizFLolI(1XTo|vFr3vK9&WKoN>m`f`O80U&!(tfVh|=3LB6QZvf*%)wmxUv9
zdyJ7LQF8%-{x^cdS*Um-8iB2e$#Qa*Y{81p4w6&rwQ<^*@*U-EimfK9YO^)<*6Ij7
zR+J|u$(EE&Qj?VkUsWeJm*{P&^UO;W7Lc;{Zy3;|aLj%dHN&<NZv%g}aB6F=>J-4y
z2akDl47ZP*L^dq?FjUsCz3#)vSE-9_Pf=Y+K1MzRlQ9#kCwImoO%1yoG0;7Pw45yN
z=S|CsCSaW@Z0hrPEWPq?WqRgQl@w9emnGhP3S~&oD9C3e;LdaoNW4vaKE^lYZveO9
z8zVc^hs4=ZHNzZU-kEWGV|^n5z0j1AcflCEbQ6{EEU<stE<yZ49bVR(MFBHN;~>d$
zWeXC{X%g6$HDTh`V$T-{O|44$L_13M0CZ(Gl_y<d#&-YBO{nDk8d^X!^}X!=2Ww99
zr<_ToklE4HaQOKMC^cKor7<c5jl55S{K(^S@}(ZSJav-_VkhB4`QVsR1dL-NQ{qvW
z2(IsEy%n0w$h4x+q>X;_JD)sJ_j@S3a<0mCjwxkfA;AMSvAoIsmKCdm;~W{Z_R7Iy
zdh-jiJVY<2^I%zittzb}YK%3RCL{8_45>Lfdd`+*Ao?mOQLQBzt&o}8)Gfo6oll>o
zl{5`c6rCSk7$Tdv<Jh?c+H4k{<*{(;M9wIbu&5?FzJddWoDpmc*|e3x_#eBDz}R0?
zgxEs`Ecy`%FBwKp9GPeUAG(VNIbDc!z0~+TGF03b8OR@9Qo8r(ts+YkQB+Oq9>Y6=
z`Qn$_mp2Sj2Go85B`~qsxjbwG$Hok!9lrE=Exsgn7xzV8@#O?wa6_>RGQ_qUmoHpN
z>@hbnbhE+`72Y3x)q_s~I_41juyB2oAb&@>O@}dk0cp^FZL#ypB3@M)6|*y=z>P2M
zab30YunoFW@{uouf_u-xk2EC3%TEAy{U*DuYeSz<-B4c=ug@2pLK;BG%5%H8=TJwG
z_quDVTA(`;a9vGL4U2u$#f@6WC?%#F_vy2_?3j@&CU=+MkDRn9W1W;fyWx(P2$}H2
zkq@$24=O}~a%HFcTpVR&B{U*WCbPwMXqIW*#B9|%7PEW8(}zF2$#`%gmaAo6_%hTE
zhuN`RT6!bQjFr(03oV~-5)Bn@5T>7ocEoWWdy_vxd7TO`B>kZb_mx`y<ja4KZLZt8
zPTX{gWHkC-n6tC0#OPF|gPbTbfj<*-3a6!W+jLdEulwtMv#`KFLzsWeIQ)=AZb$C3
zi<TE(_6j#(usV7LSfFR<@sFm*AKZ?g1^B~?d(%paUHrB-9iLyw$y6pTa|<4~J^Ane
zERdK>%Q69pnm&0lB>oyXESe6GL5cf{q_Lju$9YT>88Sb@WyU7WxQYar*>rVYNm5TI
zO_v2mMh$v}4xeNjS4vhIKHGrsj^zx62#EhY6B!w`;XQipe!Myj4600Dx**3sX{mxo
zX8CP^;@uY!ckF-gOnwF9ZtyzYM|XkhZ$^**i4d)_lH#mRo0-+&tYlHMM@bDCh~XBJ
z{a|mteOLw&r_N2S+rRS;wQ@#ACWK6%E-Slr=(SDxHbi{e#(h2S<H(?UxVDH;@VD@j
zeVR`#g@Ts5vj%g8*PN7dN8p)UCgdOp%>%=HVEKr5^$l+^kl1<pRPQPP1iEpHd2RRC
zzHSH|9~gV=A}#;mdfx)L-a8hz(+cC*PgG;Z3qLR5ql{MUXcI4<F{xmIk~uM!S6Qvl
zmT&5uwu6u%@+N>PZ+r3gAHG18KbWCa5|oA#{qj3L;Q8Tnilu5d8&P+o--`WBDbXoE
z9`V+kh5FHirL@gYcXweYo~VgD+l!9Z34<v@-AOl+t$BcCt34pus(HyxBI~I|@*%K%
zg$(vd#E;d%JU4f5hVBUJm_8Qt$xc=T3WYv=-Np;Z&(f<_Yp6cB@HkYXj%SQ<P=<sU
z!Hyw9rOje?TqL)W%^C~PI6KXj*K!Z-daQs<s)alv*mt4UlU{PHMJn0jD1Sryy}`uO
z^o`61=su20$gw-RaVFY#>qAOD)*Ziz>+71e-PGW%o0;K`Fj?S*e+7%5w^;!^O?erK
zBOsFu9--yqOG_r%^`zto%(}>sH`f-B*nRs**jt=0IWsMy#iQO<YuYJ1x^k&DkRI<P
zYnhtHTv^%tSI=~W6oO>Ptx4Xx`ZaKCp2`MWsMK@5C5^iy@s-@X;LkDY*-?&_YLzxa
z_hzn(-lbYuClT*<wUd%+H_M6dCPfGa!~=((WoO$u#XO@-mkCM1jl4M_c^O6Se3r4T
zmxZI>N28d`yNJtYYS4xY9bGHRZRz?N9l`ZXLC5g9$G24S#gkmDqQo4U{>^^;43C;C
zvNA!u52>i?+L3w2h7uh1rY~;=rwu0pfzcfxFq)_Pw>FFQECpBqk!!9c7@u?pKeOXi
zmV9w5=g}@KpfQ)%u_6o;{4Q`}QaOV}F^GF7s($(Q-ky=}Y)bWHE2@i09NbTPM9W4R
zzFyzv{`u)H!S2x^E@aJKkmP*stybc$+n%L-1yczxLMHN|HleS;{&e|^?g^e$vjGq~
zA`K0-Zua(VZYJzN-QCfCyS2BZ)NcagTP{TH`X1foLhH=$dgy|$6J3xYJ$x2sj6Vo(
zf%Z_xDy>-cna3^)!t9mPNA{G~9E36V#OxqO?u=7UM%Y~*(7<46lcew3Eqc1&z1>o)
zE0faA&(oEj?e3f*44#2;q^2cW1;WoG(5NI1X-xW^Gsii<SieD%NaA5+Q$HGek-KFN
z_(XOjZ`59weSg<a+@^rfIAYdTv%NmM7363s-`}p|6}Mr&k!Se;i`xG|ze2txZ0aTJ
zS6B*(+<bQjf6`nhlCsA?zBO25t$+Mx63X{(w(QGLa+XC?c41A0Tr${g+sM3TEPKs-
zc$P|7f^0MNNh=U(Y52p~>%1YcTsa64)HFuB*pYM60;T~39*8iucp8zzsHdP1rp9#m
z`n8(O=KD(0P4(Qem76SL9XDfJTWlO#cM07_CzT(7l?~s+s56+~j8W{@!w6@0O`-b4
z%)VvT(tjW+@s?(y9S}tls&6Pgb%S2l_J`z|6-`aatu+cpnIkk?fE~YPL9sU>RcbZa
z3Jh<xRnAc2iM^=>DiCbfev4I41qmE=sPRec!g?i`ZL25$$L<6%j;@%DumTR^Al$Wr
z&5^t!oH{-j>Foq({<MK;P0;lPfo^otO)SEGWs0O{WlI_sF|wj2T|517%jYPr8Lh*m
z-UY^&f7U4)nuxELY~&kFkHgFAWJ%mKH5(ZpKd!*?s%zo9MLfWs04rFM-iOC{o|n0M
zzuni>3q9#%E8SPhQL@Xf*(<xKNg_ZzB&9CEpbDEYUkiU)$ynlL$X!s+BO9041-Wnc
ztc|m%zzUhiLX?UTn5;TV`8`xe>LBBb`tj4xoBGK{Ki-l*k<CEwO{m+eKCPO-9x58w
z=n72XcghFxAm54}W(XR0&`b#P!lqwMnl0KCrYX%8Y=56zPgr}4Y6D?fQx8|JI(xC8
z9(i`q4+*asqbaU>`9xf@Yj+fe>*mwCWHVd&;T*Y<t1Pu;?%P3CnzzQ@cIZe3qkAtx
z@V;=STl=aOMLV9qSUTZrP6552-OelQ{tRm;U|(~@Qo1x2Rv{a8SxT3rsE7KJyP()5
zEwS5#r7fU~L!Ip0XUw~ezv#&aKQAGYkPEA=6>r~3vE-GNiFN1?YqISc&CJt=_0b)*
zG^66uP<|tN8e4V5Q%QAg+lG8|r-DHSWrrMbocS)uIa$zL=&aOgD_}?~#j{qh#iWH6
zBE6!mUR`WN+Dxl7l1TEM`x8^Ekt(f(w!VngGo{(DWa?$L$;Qu5+iYcSYSx1O=tOH5
z<zW(oyQ&d*KM`T(`Wl>>QS>1Wa`swI5HWw#gG}>{*8h<nm;t(2=-9yKxT$7})?Fm5
zra0;-9prJmt;!fbB8PuSbyETK=VG!|_}P<F+O{QI!SR^dq|&<l(}ZCJVP1c7zi<Rf
zGM?55*#v#2&Ya%tZ6JK`1w2ka3*KF+WSW#^%F^6+J$EJ^4Ua_nc3%+2cR1aQKQzY)
z%X_Kv?D!4>3+`;FafNs51GKM|_|<zy?hEqekt)^9)ML&zvuT<7M_65CB+KsQdq_!1
zOK;6Rc{xWC;o_N~Ig1#-;y`&^yBuv9gtmL~M+B4^AMta#KdXI1VNHaMl{px^diKz;
zbtN8x>n6O_Y?!POEhN%J86<I4;b<o;@`h@_&d%a6{*o)$aV~Gf_3Po56Vp)9l$Im~
zTSPcFLM<j8!d&@vymVmhFXlT6iM%W0bJ1~>w@ghC-ugM8OReUD-=mV&btxB_Vc7Gz
zOjO{L!L`~xnS>1K6S-k460I5M%RPr7+oA$9yO|T7P5~^$l|@3?sA2L34TcT5ewoAT
z4=?m3!XyaDsLtEIxjn6`?aiG@cyXj8=4$fdUnK&6wgZ!e(?||WE-H!35O|=2*v--^
zJlEpJxd$JZ7<qv9Ed13HIwn%3MA{a!DeV{+zOwGQPg1T=Y4`6RpNz7LEF#{Xg*=)s
z6cE~u{RJdASIZ`0eZTEKleh;{ZXieily@zp(7OXjmFR!HY*@I~OB=e+f%%0gyAfo7
zqVUuD^EYxbzYStz%K~<TC_jaDZk{GU4ZKmV0R*ft#m#&P1#+bsP;xz8upJ2%Z_3e8
z4V3;Mf<#MnqgDDx=_5LH;IEqlZYiKjjjE_mLpRVB!~nJJ41h4Scx08DazW$VkFEiz
z_hdi(k_EsXVAAH0QPoJrTd3o#_MXbQKASh{J@Tc1@Q>6^T0%DXmW!<9)HFmI4l)!k
z3cTqAh>6~l2Dq^qP|?eSTu@YiP%#52eH%C-;gkIaC1LIMBv4NFms@CrrGPG%=y#eV
zG2pf5<wFI}nG9~qUck3bZbEO#a8T>d=$l??VeqWYjd5ZmemtPZBe*RWyqujIwxI+u
z1y_iHUZ58XSE6PC?<~yDrU|Q-eM|gPawx&CWhgJM%}QIF1``Ap0K)EpTz7$CBd*|8
zwNDIFe39BlU<xh~1F>^^%Xr?L`Atkz^X<>LXBptznD8_9spX$?8y(<O7>TTaaq@U(
z<&eG3L-pmO7@V#jVgd&l+hWF`eg?#c(A$dTW2uT~g#F8HwH*}EA=;R`=h#)lK=weK
z)EC0LE*)$ielt?-zqZxl_O<}t)&m=OmGSkLyE92lJpGc{kA@5bO5js~n<JSpZDSUo
zT4nMF;-=+?O`HqNc09uHT=DjUnDjfp0d8&>(SPS5Ki2)@qr|A=l{f^$7T2Fpph@Cv
z{SDydaiX_Yoy(_OHWp`-;9Zqgg;Q6nJ}7Y)v#1>oRuc!ImC1=Qvy)c-z5Uf(Ba4Ng
z_SzHCcW9%d6UAqW4Ve%Yp8LkL-uU{At(CLt6s)Qo3Qi4Asa+Hm>~>(iq^BPETM*HE
z;$jB;0r#!!K&ScIjPEBOZc5-7v1$1C!7MXM+@sEYUS||prG|?I+@Tm_HZgyTbq}Tg
zw#&qzk*RVrg?)MfO;Vtd$uWL`F>{!Q0GrCp{6hoeZ3STn_r1n@c6hdurjoCoOU^5x
zHVE|`%C>7%B26K6Fa;qIr0Qa~1EC5Pzzir94EM7IHo<5Xu(KU+VffIvj5uPcQl)Tr
z-AIm{*utCDRkJtpU8hfZ8C3vLjyx{Lr)a)_0d3jKH2-vEd7k`q%WfPOXI#PZXSnN{
z=1B#!@FFFbJHf|<hnUn|D<@#ayd>e(`)U&06qz;lOl4*-O?jCPWE8f0>>dFLa_73(
z3dNrZ^4hOazCeQ9<hKcOhQCOV%K-`U`P6tn1{_9i@HtN>tg`CfQ?w^8*cm5N6^@j$
zs$6dwtjgIy$#I$R%Ts~gqS1XTd@D9>ZJD#PB-NCMr19maR0ejeEoB;c+CP`tUare!
zZ$7rUSt;n1y{9qNUb?*OWunP2Ci<qN!0N@HtngAf{?8fyH{JPK8k+s^+VGL{gIPtX
zE;|1CUhQm(Nrd`@o^^K%&gJ$qLNS{FhiuHRo16N0ktP~v%hVL6H1j#zS+G^3%yg<$
zYb7}#xgQRj^q$!0o6=dK4qv2NNOoh)c}tLJ)pVLyZn&e77$K#PrVTG*%R8g0auw*$
zc%d<Oi-ajZy=IA+hZ<h&LUIugR?||-nWH~V?9-gIDE`tzhW&B3$h2VstaT!Ha0|mz
z4Y6WxMb-MUh$V<7pW;exQ4H#2Ndw=G;vwAXEsikGkXjgzElBI$4<YsqGJ|lw;y$QB
z$xctABDROSa*lQ71^i*rXK$(*`JIq(0Xb6<y4eyJ)3)=(Mv78No$v-h_=8AKG9N4=
z!t{9mosV#_)_k9{xtVe}yqbd{H8c_X0xeOvQAH%p!Qg{$@!l)b$iVM-sHw^lRP{pO
zHtC?Z@*H~+Wk7`xXd~zjVs>^3jxc2lf0~fJls7T@6472qG#1>a(}dtd*=`e~*>7o~
za6{)6pR7TU$JRWcr<n*#HYsUT2`PcueJd`?;I<c#6Rip8fA;a4N+7;QRE8PLk49j*
zrgQedQJb7cB{yogZb;3HVe6AaWEoCtvX1C!n;Xq)&qHdIe-c%Ix~<R;ptGzq_!WQ&
zBRcTzIv){kk{a$E<vIPFK|MO>_yH7o!9Lxd`tJ79#olA+{a-^r@~mQp>QU)6%b>)t
zHv1LLu{RE@^0_eSQ!%ylv|MH-32DSzo&cMMch^zh4EI@El%uo~sEH6YRPAC<&D+>H
z?KC2e5$Z8_wSSh8d}q~>ks#1ivaziXm|q)C9j?j{qa}xI-My7Qx2ZV+lh?^jWH|b!
zHV&lC{Q)%_9eqNjG|iDfvhbmrnZ4g7&PU%KBh-$#Wo6k^jntES+uE{B2$iORVCfsf
zoWS7pQ6sj}!pN%(m@#)-*VIEs3ufyKj70oR_@M6#rrY4In6)2o>}}KMCd5@YEBAnk
zO+bBX+-f?`Pg-_db>hCHJl35_>>#=@T(zM08S;>p_6iz<SyupF<x(XNO0<vHJ0@hu
zGEzUzvDn<~hZ<y&CuiblFk*FN02NC2UDZ`9?n}0YKR*JmnN-#B*Ri~EIuVatSh4WJ
zVK!>BrLhc^7r-%3^LyS>N33qGz^V|XnLntSxd0UU0_QPd)xGv|I0704Z4D>z5<Wpq
zlS&tfGSX*Mfq&o6?;_%&_yzPGXAHy06ksmv#O``&jMbTIox7+6X)tz}=6qDgmfcs>
z8Cc08E80}W>1|ziE}&!^DEz^c+$~k*>;Uzlr?uCEr&&1Yo(RzwA;vZgY)kheqqJP{
zftWq(jRE9@jla~crt+*5^?fOziVrCM9_blggIOD^ar2NYe3a4H*O4MMCI}JH%3d5~
z{U)xKFw#q~s&3%`W~{ugbR*b&JX2g1>yX~}O3s$Xdm*&WBYW8F-m1n#x9U<YzlEJR
zPDjHfn&XSfsSciuZ%QJMqM_DW(e=rZ%j(#+Q(J~(9|<G7UTH+wt8q5G`Cbddea6|m
zu#%>(buydXtUNiGikO+ro24LfLVzc3;^j@M>3ea^96#*`zx;cv&R<t;Hw&}B^+4C2
ze7_<2Uvll~6+!3lIN@jN;b?xvBP^MT{NHH2-`dGJMJt}MdMeY_nhjz{-c`aB#~yml
z{K~xCWQhvheE1%o)|da576=1k_mNa*;iD|Pv{Qg18Uq52RHNK1h0h|&bS!@$hOn<<
z7VUC-P*Vig9HAlF!6)`o%ZI6Zj4J*RMq>HKZu(W(>;$Sz0C7m2(>-KoV`iHrnJ&7V
zmhipIDuX=l+eOfXo6=h=6-^OG1X_Pz(4|C=RHo_??(_Wohce-GxMu@7$j}*aZ9Xx{
zqErWPB4hK@c%~-XZq_f#VPP_2uQ9mtHw(CF#q7TtHVnj^1e}~Z`++xP;d5dlCmM%|
z*a0Kn*HD`(zh8qy>1e27O0X|kYG3Y+)0G%lQFG^aTQFFaqO6<zx%O9m{8L630ml3F
z(FsY*H0Gq#bd4ci?0&+<hkam!%2gsoCelV(daz-b?;?B<m*S86B>p3Q{2!z4Gchn4
z8qVe?r}T8yHNRcU9nwcE<f@$vX9C&t$syHn^(wm1yh5Bg3#Domqd?J0!t<l>@^(|^
zJb}L2`8T=}MuX$}mo(4Z28wX5>rE-6AthC1GM-)#t_y7DsW#`yJOl<jM&PyX<;03s
z4#SSpFvZ%MHK%o_19<f53$^_G`zVbFwHfJ1#uqA?d0{xS!8Wp@*vsE;LK9e`IK=!Z
z$le2{pz$Q~0VU~rifF2YEHzz#Zq8n5SbUVM5XFqzkn<i;@|hmuKU0Wf>w+2zVABFJ
zGmGVtFc6IM;gjZ<qz*r2W|Xs6tGG9n20k+^Wu#M6gXdp#=x&8;>J&y((-@y@F;OoO
zC9L|+D}UWxz}~QSh}~niRekyW$O2F(fxh=c*UBo3badVRnUU5US`8PSzml_5bG0z`
z1fk}^5b}wPZNk`H#G$%gFS(JtzZ>lds}{SZ5~M>V5?3;>h+B(0+fF@baKu8@uHm=S
z5cHVXvVE_NjD$l;&{nIcomz+4)VuNJ_kdjdGN5KFLt0)tT#Z>w=D97*44Kl7;}mLB
zfY8-s3zqyT)Gcg20n4Kf#=oCjyD}kXe`E8`@T<l9n!AxfT|xQDPfd~rGzBw=)@l%s
zbmKEvPWH`qq3#)b-2xo}d^u~BC6A7AHNa@#W;5-y!^Hw@1Z3a~EAX|}MTVu6E-fw*
zxqPMWUqDv6I-#L6m?g7`@mn0;jviLqN=<2E1nXb~nIW`!{GIG=O3e5aQvLHS)y&n(
z<>A6(?SYc+J6<aF5T_C-DT*U8QNAtFJ3~g^o1ivl(oke|)F<SJv3n=XAJ0^F9f`84
zu~23)V7r>(?`(VPAQsZYf_+|M?_vc>TNV+y6;?Ak<7x@&T#Bpc>_+}wV5-2o%Dj6z
zu%Xz*+W2&~S^l>eJ?p*#jM^$iK=I^~VE2>|z*NhT?%oQ8&^^WFjtJA|f-fdQ&HHmL
z^&NtuoT7s2m`-=u!#%ur<LAgBT=InvhJqpTb-jWVL^tv(#(@czbAeIi;&KMi6+Axi
zVe|+AtZnTBmQ{#OeLmoy6$S2zTP^HmbmghhR_*ql)$Ot7uw~gKS9Af=o;_i#d>_`{
zA(e4Z6*!Dx+L=>s@sA*WOr0-15_0<bn9D29qLdNgMOkWllX*12qPsiU5MTdARag%)
z=M7m0zZX5_{DwP;CRCeYB4BLbC2=1~u+KWrBQ*;lJaFWF$QqMP`2)wgR7;lj?%F&D
z5}kGpPio?P-h4BwNB-C)`V#VqW^KZJ)c2#mtZ`#eMEOwU02jlP@H}PQlzt(tP5Mv$
zf%$e5*Ocs?0R3X3DM!}Ks32bcdYHg=GFnb~Oi(rIRp=3Z;&oX~R*e*hu4)DkFLvb?
ze2aPM-d}nd8Y1S3G^D_WX;J*GRCSiU{ko1a25USk_}tg0B0q+^a;L;=<&mzK{jWzI
zp7L~|A~b)z%li_h&!*y2Sdza>VCKZBwrcccc#XXki>MEXXK4-3zPJZAsSi%#9)Wv1
ziPKRa-Z3XWyjAdCO>8tz)BYseujmbBcNuV5*%L2SWqn_c<Anvqt`NPqR4|`W-nPmt
z-Ks>8M8^lJ%=&ojp&^`*0f02Z9hqUl9xpL#hQ<OBbk8iWjGdg_voJ;;zT)J+K1eK7
zK!uAAa4Y^SZ?Rwd(<o)jbr@<wWmM0U)uKfP#qH0>_mX#MK=JEu_yvtZ??bXH{6&Ze
zi+eMp^&fU2T+0a(qF3PsPHit~)ZoFYhxg#e70-Aa$Cy9Hu7`>)U(Dex%g`ZSk?iwU
z?NIZiDGk=H(_rVDE-n>&iM?-=f6&}tQ7V@)IR%Rf{wNdMbC1Rdnx&S{+=`E)F6Tzh
zYnuhe+zFJe*&OXIia5$0owFLs6z)l!;4~du7Sx*M6lkPBEQs#=;gZSTP@YDUFemi1
zc0TGA*u;^wmnzA5<s{#SN6{i;C~uF%n)9uR(d>lAM{IOg9(DcWyF$fQd#A?oI{nKV
zsqo{w%|B}|e)WGt{cnF~VqBAXM-$hNPgr!xfQr%J%lBjF+9a;&4Z}s+=;+9-m?eI+
z^8@Y<<@Tal?tGgVlEmfH3KknTLMI?P6{XPnhGu++8L5$<(nQZqJRmf3_~p)e8?S~T
ze;4L4exiC*#&Ow^-mB6cs^CigknD6h!A?<pW&_5Mvo%}`DrO1`%6qhJUf!iIFUICZ
zMvV?QP==H=CwSY+GLD=|u<+B{C#YA2g0!ov;Qe#!N#*SyX$EhNs>zUU9N)3!m)`PT
zuqNu${(}9sIw&*QW44z-07Kaw`%5z8@RSF8^?HrCt)^2^liN~FTSE&WjcCZ=!u~l-
zf-}yDL!7JB&+O<q18S?X1c#yIlrCv#;|fW5p#wATH@WC7Moi9(!-sv9zktG-u0Q8$
zZ_%lF^VUk>B3UKQ+CcibF%?4^t;M~hJx>S=iSHz@Pk(lG2K%JV$|^)B*^Z~SY9Zt8
z(#3dfi0p&gJy_7f!iJ*@-*dgCDKFOScXxYztvQfoy&`7)Ush{qQh$xgrugTmY^7u~
zra!udZyQ3|eHhiEyL14_P6A?lmqJPvdBC)!h&X_UKR$lii0q5$s({%0a#FaO)>h)8
z@mztuvtn^w-;Fv@MnNG@YkHp;M>9i@k)T&e@=us%xK^iwuVuj)fN5%|HB{`Nn@m1s
zGke(Uk4_O+mB_r?FJ;ITrj;*U2>uIAQ~o=e#^Il!X-E@Q-naI1-MunT+G%AK<0jt<
zb}oqOtqgk!sb=-rtuN>Dp?GC)V=z4K7TIAd&O?rPK2a63Oda?IA*Qva0>IZw@i>2?
zdNTib9!fg>ZQ&a?qW7gc<FuNku>qU}2cL)Q?NM5L@cmUy@7VLvyC%p7MeUg@Nr}0c
zc_^vU)fq^4cm}Tl_SVloykRXs&B9;E89R{!&=B+NE=0V83U#Ptil2x=f!%!-X{E_d
zS$lE&wimWGkMwwq2GxYhdy>;O;rshjdjuFNQ+?%^)g<kS=}2=WwcWj2En&Xoxos}*
z-iz_eL6sA#O)NQfn&E&{%6jySBC2bI(x>*JW|fmD-B58B`Od1N3~)5yb{>vb&a^`i
z5JCNM7KV$)+q`VrC0BaEzADwB4oIFurk>poPl;cAyl~)#E?VtHaL9-@UOQcd#0^p~
zRZdknzxbpW0h6o|W7FZjMfOp?K$u+_CW<wa!AuWQBmO37ShO6xR#{YePQ+k8=DyY)
zsFUooFJ?mf6DHEauM87G3$=LC0Bu<=*hxmD&E~x5u#;v;r$?i!<J>VJRClP@)A0X}
zk&_BJ#rPSPYy1VI$tpN8rF%XjqHhGz8BPra0(H!;o3owP|0Kd=_a}tME(3$Si;*p!
zk=W~Qga>3eGE!|SHbEi-4XiWYjLWW2&-8j#^C5%1PHEAk`S3~>$JYqPw)~+RaNRqQ
zTOfN{fU&qY>~4u+%EQ8lz_4R*8;sI7jYT>_>Z~|Yd+I^8@`&2UyB|Z%ERZ+D^GS{&
ze^ez0z-l;$XK!PMI%RqOiLHr(Lf;hvq#pt$-84HL6T>|CAHr)mJj~{Du~^aq7ZN|e
z(15inr%;#|OF0M8ozQ}?9bb@-+L57s+%W$zs@SSrKw+*d=Zq$21)RUOzZvlsBPz;t
zTKK!A#SlTS_!wKBb{n^CxW?rZCRZL+X<Z}?QnsU+8aSkz9uPR<6e4Z==<Yh)W40BS
zE8hr)FCDW_6N4xmwl=f@Wn@ASfjPHaRS=Q@Q-#TxjwZjWDCJ0IdTZqzyStt$3iD7c
zT&5Rbau`4=xtM!E)d?A;h{{Q-FICjLL*;ObP7(`SfGe66ksMHxyoWnR7SZwWCUi6U
zYtTqXxnuFA%2ao%@~&N#Zh>JRp$UAK#^{S}XKsgrf8q|<E`oYg+42z;9R1Umoa`GZ
ztiNr(@mF6n`Vp@};lo3&;d5nSSo~fDO(9j6DOeTbGFV~rIl;zOPMRjlJXk2Uk56%g
zxs01XXK?>i!%;s&EJd1bh?&eO#B>!%H3b<fWLOlL=jPMgGJMWw8}j|jEg;kTR;6fN
zVN_nIy0zd3+%3Aq-OQalI{)5n`3H)ibB6_Z9c4gi)+gJlNmoQpXI?^r=MuBYFrVa1
zR|Vg^GF!XE55?PiB^gMBq2YpbZTq}$Rf48f?F)hOB%1{>t7nV@>_#Ei72Dc^k4N>O
zx`HEgTg$7<KVAT0rzx(f^<ie#ZiZ6HU*kefuN2O9pOfp@rYq&WP62}G<UWEW24nea
z!|tU#lZY6lE{w{Y9c#l;X7BG-2VK-_L>wqGs)NjqoQG0u3Rg$pj=`}GLGB7`fK+3@
z(h%0QAKZ6*^AegfVC_^)YL)<bw7%iqUY^hzFP-@dsBG^9)BZ9WmHeTW>y&Y6&8O~~
zPv3CkE(OeUD5pL=#YpE$`vpWHbmhJ-H*_mLxW*Q7CeA?gM)q_=C_`soEGI$sAyDj{
zUvABczxnl?ve^UjBOqvZH#pqy%HmrDx!+lz!)q@nb{cjX2*&m0!RWDezPf57NP_5S
z`Sam7T`0(&Q*Pzm_&M!GxJ0<Xcf21HaysMDf+I_F;wyGxcuxf{0b8ex-<g@*EPSy4
zcSS}8i;e?Ud_=PQ`PBQ=YRVqshguY8aI1at9vgZ5cigp~T$AC@J<rGa)m^*&uexi;
z$A2m{Ow!vA3qk0dVPX-ZmZ+!{ISx!4>YH*4<y(H8`sCk<)N{urV0;7%h%<IzJ5Y%*
z*AoX;)Ejys$`#JT+@!rcE5T92qBCi?tFE4kUhtucIWW~IVsACR8nq#;OA<;j?v|I|
z`7_wGN-ky%PjfSj0gT51^D5q>IVSjKcv8~P0FF~*NW8ep)o>P+IXUg5CiJl|HTi&w
zQrBXmK6kz)pR~4)t)4i6Utz3PZl2Zj^f&9qVKo5KdR@Pe#Wlx);wk2uDFMy-O3D}1
z>y!2^xD0mT(n5TwEtss?XJt-er^~ohB(z9+wPveF$M?9|i0MLePOyCY$1{zwNVQMA
zul8#z=d;xXr?zz;U8I&cT=6hJUn;GE<@mO!v?#0QAZScxtak9Eo5uF$r&<%Z7`7nq
z)kwDqtA#PM{@4#Mr?(RPe==B494&VVncjQugBkc%Ku&_*59R$*S?MmLrEQxtkvv87
zOY_{K8VifM1~e)42z6-cg;$)-wT|fM7Xwuo)|#l8Ymlx+XKV25wf<%-a)}I{Sn-TW
z4-9*a>fjiglDNlDvc_<kN`H&Zyw}ycublsGwA2mC`+!<5xJN$emA~cYbOC{<s>8N5
z)jg(t;)22qX^s>U+bM`!JI>r}drg&yV-Zj!+4l^LTaj715CtA^g9A%${b~A{%{4Mu
z@7jClarIsq&-BIYbdr^tiKBW914}vM-Jj`}D5SmUNHXXSO#C*wYvtX$HmYEvnwgx$
z)!sc$@hM`c---Wf{Bt5Jj4<tRmLsYvW_{_Xj6<qU>J+fAMYIHKTIXE+7+dpu<XVPQ
z)@)kuhBy8!d_}$eTBTSs)A;C#B(gx_#*Ta<Ci45YQG}~`c#XD3byQVUSUDqkc?v)_
z#l3A8bCdE6qe+$0_!t&%X>1-np*Txy;H0vv`aoA};mrD(<q620>BoE1JL(Ak$jv^x
z3O8#v_h*^VVXdSL@07?dJWYtPu(TO|)}XN>eSM#nJORw%x}7}xinn$Uzk?;ZBI;<K
zk}^-~?4$ge8{n+6?>A)Gx+$@A@1q!K{A9Hw4{_-qs&WzM4%<&etmVhrn=<DW7!FsX
zp9|HuPQ1#}cD{$AVf=dzhy<*BuFrD&TrD@>87~P7{7oL)*4Bhh)-JEBnjJF*sJM0T
zM){_}RsCx8S&ZjOVDg4!>Tk=H_8YcBz7^~We3f%Ac$T(J^2W(Y#kmkKLo0XO28gYy
zTkNEuV`d}$c)%@ZwgehsJ!DhUjbWoM>T}K3e*tMlB))>Mdg+W?7)k$wz46pJ;}kDG
z;|-5mIXVL?7jXKVr?}DCc<j#?Bu5Qce6Yi(p}z|m>9rYdUCa{4?Mvh4;|nKH(dt_5
zt>UM^vDXLCBl^y951^FgLoA}&_!L%wDg{#HeY62tKIr{OssJX5ddT#tep|n$YKXVU
zwwU+39WeDsGpY-(1a3NN_{DN$S6pv+5j{1(iUrBZ)>iW({L#CvRXO0>3=6&WGm{p&
zwes0@)Y+PlpY3pKx1vs0&HQ$OYm!YqDfz3w*lvOj^Fg%ubq57Oku_O6IS1+tw@h8%
z<cD{#q2$xo4LHHh^`<uTsKv-vX0(vckre1=3ZI`WRYB^MWguR({$0cZ)$qlE!pcN>
z(ipt2YI9&|DFHdf$rbge#CDB(t2Fq^fPboWG^w+UdxZzYULsmb#q}7v@_kP?Fhz4T
zf3JP!?FXOnM6F`Z&G3e3?~&X)H<8;D1y%bv*|y0d9y)l-XT|Em*sN&U5P=(_hG-n1
zW;A}KW<J>K`|kJGl*L=SUc1S+X>syFq|n;g?IghF#YaA`DoD@Av&^qhedwhl4d!;j
z8&W#wrv8vDGmc0$zfR5g!i^Lwvp~?X)l#XE^E4Hp`4{JjLxoix%Z{LFQ1ZV0HVf^p
zHBr3xFv9Axv2D!ZUt^5gS&SOD&20A6{F@lN%VWwtwam32{@Q`9pYg<V6J(%l=FbG0
z*vo-zT@6{4p8FliH(XYr#akH7lV9V{4Lk)c@78sv`?%_WPn|47U{a#eJ(q8iqh*>d
zH%QQxh3wH^fI5){8INvZ6lkT$jt34=vQwl20yT?|k_5#|(Cb7NK4=t#57_DG(G%>N
zxpvLSOn-H*V=9vF-P~DWWF=kzG`jliTwe8!tu{J8SL8*}HFRszo0sv*_ioO0Os;6i
z3QLk=l->wlQedHST`HCufVwLwkFSF}%Wp)<xqjB4LEHZ9iwG7s=Sx_Ik5@$W`cwQs
zGCEoy#_`jsXa$_RkQFzjXTyuL2z)P<D9fjosHLOjPe*9onX$6QTyLiNs85&Ze`cEF
zF3SFb{VE%4`Zf2MXl!_sY0TWJa5oM889Gz@P3Wwi+JDpNRY0{|i;T9lP*Q(93nz4!
zMf*bs4@L^qJ>TA`I#6Zq)=lW_1Q~?a3E4PcSTgd~*QooJaas`<rEULO5YABWrt_a+
zIvA0jqlmjXL{xyfiTyd9!}fL%ow5yH9EwnD!}|I7zG9;n3Pw>TKbsVj<Pe0{Z5+~h
zP7>|Gz!{WTW5ubsKE<Y<;stqkS(&D)FGd7rCPcxQvIKOCULQ~$HZ&AkU2Z9-c-}(A
zQ#2b@L}z4vSy2*axs&oH@J#v(hM;dkx)wA#iI5!~uZLUDBO<;*3?%l1O@T7ziTRc*
zYiBMZ%`A7uCz9tEkW<0Xa~y0!5D4UeS_n;YLHAl}zjvU^X*g4n`3~v(<w5k4JO7J^
zNmo}N->yFgm%2XPxnd!8q7($g@>P`0VV|-lWo%qLeJHZZbh0zN7@f|-5W9RaHvM?{
zAd?MV*U8O=p3kMyZ@ZEkq$mCg7sCi}R9|j2-at<`!z+t@Lmkl?a2B!unBuTz=Jze)
z;yZY2^|gXh`YTe<wqmKQ#9=O@_{!ZmK6=;4oK4y4DsEEwjmmVAn3BH-${)3NV+sGj
zWMPTl_{&A!;7!2p2g_6JS06eQg2r%1!m*!F)X;Ox+do~>I!Uv$KXg&pd#W>hxt)u?
z!8i)E691ld45lSS0Rg_^3#z#(E-KkyP{RR^$V>b6>J}62$dXD@DpKh=jw;}_cGKXm
zP*j9TX0N|s>H)>L3^MOVzJQ^GD4t7mhPoInTrN@7)Lpk5!G|BK#$4SjY{-_Xo(7Qp
zpgI<+|Be)JcPSgsdT|71yy$b+mCGQ^7T48w2UJGA1XJJLmru<|&2IIyOw;pFZM?D7
zW5U`UHf{n75Tf``lgUoC>6QNu)<X5^Pa?Y#Qwb3X(B{ssW<bSc9wuxA4p&6Q2b5_4
z4y)z;C)z5Dhbr|~VoDDV3g&LL@Vt>Bf=047<@E$nF$UXel`7trs}Ccj=gt>Z{D=x%
z{e=Hr>lXsW0A}#2+z-(EFW>lrtv-%Fa|E`}0A^}jvE_CN3x|IuPn$Uai*cTdDkd(K
z;R^$z(u5#o{K}hN_f-w2Ca%EYI1d;Sxbs~YP`Q`!k?24w9VI4v@=k1Lo4xYS97|=p
z87Cm4(>z=nQeqIM-ObAO90b&D|D8nd|JG}0Vs^Zt2lccY*1v!REa??`ZddOecfE^s
z)?@#kG4Yb)`{y5o?pNjkyQk#b3PH#%{RrN3hSa>Z-$96f3r_qOp69{y7vl0u^}BYN
zYiu~X2h%F+{IR~yiw?wWqvLCdDOzYIbOblfRSN==z~tFYF4vz|0BxdNo_{Vr{*T7d
z|NkF%8^6-cB7A@0fVtFlsIhG9QP{u*4Pe4uy_X2%Sx%sKUzn4J_8)!f)p6_&{W_fK
zeut~M6d``0e>k|&w4gkOLvaQbpc>x)u=73d<39L94qt3H3v(tAV0!NL-=-1%`R?Ni
zwGvhCSNQC*MZuC6ugbG1bC3<-4lT-Eynxf<kFiJFgdP`sNng8b$VNpFyta6f62+G@
z?%6;AoMtZ}aQpsWe1J()yGS7nOf9!4meNx__`gYv3oYUVFN=&?S{wUfNtMPkXtuO=
zJTiVuHlB6)g!w7|^p^gQ+sEU(M3m!^*;FawGNS!C>KR;p+~%hf*C<`?%`L+X>mxS;
zc3f%K4=AF5#a#U7Ym_?28rVf@5aMb`EMr!0*zZ87;U7I(AOmm3-Te{`_(1pZ%Wxma
zyphwhmbiQ(0J(O93SFYPYije~U|C3Cmt2AZjH6V$JLHi5n4xTh1mM914<ALTw
zerUMBF3-)~f1bVm9-I?s^B~6NBvL5s3Lu4WWqy0Ml-~=F3eJuIq5SVS+xgQCbA*;a
z6K0mhT?Th*z#eIVEao@wFSPh_Q~z)?2Y8-zLc7epn&GB}mTt?m;<W_eFx1HN^v~}O
zh+F=7h>#UP2G&98rb%gyUunu^2!PFb*y;b~?KghcZC$Ie>y5OZjm+NSnTecHeg%35
zU=V*kgQ4Gdym}58ba9|n>hK=Lzclb+*8W;d$j!&2M*2%8rSI|b9m?N^nm-7e)0Ql5
zqS}*CX#zi^DqaZuI63in@$pp@(n6176TLQdZIGgio>rnzsQ**xSW|nFq_paYoJHNy
zOz{CPISf`b^kn+rdO_^oGItk!A;?Y+P;G2(9_E|gx|t*NX>#i&XjuvQcr`zTYk|^w
z!`#4-Gk9tNx9)Xqf)i;FlTrw;WtrQq)f}aqyuv>+c}xG9>%VzR{{Ohipx2Mh5=bqh
zvwJbHpL~tj)cx?+uI)Achncg}W)lx9_IK(>=`8|6k7ch$FEI|0X?64<KxIt2yB*5J
z8)4@r2Dn#*{$Ko*{#W|M8W_LgNPF?HwT@!L=4n1%x%Cf>en>%QpY1$%sOJ0+yUYDm
za|phm=jg}FT>7&9M7lB_*USct^N{Z^FVwS^1SEq^M@jCvsXrog*V^uoUb!Yup};AP
zYZ%wzFA*B8x{gH_!`-nTJpD1OTY6D?g?=NoY?JzXsfORmHvH%Jzdkl7cv?#z7*l{;
z54+Up;zBYb;W^pz`NSKGupiM0muT6+3$5-4RHLdth{rsg+EkA9<5CtnYd_yMyR&UU
z`oTzOyrO1Y-!JW;mMc@@YDS~uniry+%STPpe{6g?S*j%VjP*a0@;m*If8;_$$*gks
zmv&#gJ-UAd=#&ilIFg(D7G;X$n{ko2H95i^ncAQEQcA}`+R~&66cNOA5wag5+0v&z
zPIM`(FzreZ%G{MAaT`-i+hCGT7fqT0>^BK9|CzwZe~$h4ii7?$e!%$US-Qevq~H(X
z9Cj-o8=xi6(ZMpR)fUXW7Lh3Nte6;CS{yZi%S=j@4LJQ)D6iv}JHY3~|H*yMzw!<G
z&$O|d=K!S9Efh`%4v?8uA;ffVcC`ybLq>osv@R8QIP_&FH3v%466G0a?iWzCP;BnG
z{<QnzLyu|8!{+-bzAskxRo;bpZE$Jsfv&`aS{8z^fwlAJ(ZO51EF~uw&SL$OdyfX<
z>uw8}3vj7ck3orL89u$lZ#3jFx^~WcdLOer|LNqZvyad)ssj)#d8Vr#a-zRe3Al73
z$j{g7-8RsK{}*=DsQ<_RCpIrKIX=oey4NY9yCNkI;+o_x>#toFGL6rzPPTsx9vga2
z7CAoMU)x1KDm*+$7PaS|lfPXc*dsUf5-VIxF{~h~3UQ=(n=T;TEqqvtrTroy<hZ74
zc^I)-4vQB@JYf?b-aJV3?L5OfTdL;$r@y5BkuS3U9P`&#UH=)E|9j(t_RG}&1EP$6
A9{>OV

diff --git a/modules/ocl/doc/introduction.rst b/modules/ocl/doc/introduction.rst
deleted file mode 100644
index 2c050cb..0000000
--- a/modules/ocl/doc/introduction.rst
+++ /dev/null
@@ -1,73 +0,0 @@
-OpenCL Module Introduction
-==========================
-
-.. highlight:: cpp
-
-General Information
--------------------
-
-The OpenCV OCL module contains a set of classes and functions that implement and accelerate OpenCV functionality on OpenCL compatible devices. OpenCL is a Khronos standard, implemented by a variety of devices (CPUs, GPUs, FPGAs, ARM), abstracting the exact hardware details, while enabling vendors to provide native implementation for maximal acceleration on their hardware. The standard enjoys wide industry support, and the end user of the module will enjoy the data parallelism benefits that the specific platform/hardware may be capable of, in a platform/hardware independent manner.
-
-While in the future we hope to validate (and enable) the OCL module in all OpenCL capable devices, we currently develop and test on GPU devices only. This includes both discrete GPUs (NVidia, AMD), as well as integrated chips (AMD APU and Intel HD devices). Performance of any particular algorithm will depend on the particular platform characteristics and capabilities. However, currently, accuracy and  mathematical correctness has been verified to be identical to that of the pure CPU implementation on all tested GPU devices and platforms (both Windows and Linux).
-
-
-The OpenCV OCL module includes utility functions, low-level vision primitives, and high-level algorithms. The utility functions and low-level primitives provide a powerful infrastructure for developing fast vision algorithms taking advantage of OCL, whereas the high-level functionality (samples) includes some state-of-the-art algorithms (including LK Optical flow, and Face detection) ready to be used by the application developers. The module is also accompanied by an extensive performance and accuracy test suite.
-
-The OpenCV OCL module is designed for ease of use and does not require any knowledge of OpenCL. At a minimum level, it can be viewed as a set of accelerators, that can take advantage of the high compute throughput that GPU/APU devices can provide. However, it can also be viewed as a starting point to really integrate the built-in functionality with your own custom OpenCL kernels, with or without modifying the source of OpenCV-OCL. Of course, knowledge of OpenCL will certainly help, however we hope that OpenCV-OCL module, and the kernels it contains in source code, can be very useful as a means of actually learning openCL. Such a knowledge would be necessary to further fine-tune any of the existing OpenCL kernels, or for extending the framework with new kernels. As of OpenCV 2.4.4, we introduce interoperability with OpenCL, enabling easy use of custom OpenCL kernels within the OpenCV framework.
-
-To correctly run the OCL module, you need to have the OpenCL runtime provided by the device vendor, typically the device driver.
-
-To enable OCL support, configure OpenCV using CMake with ``WITH_OPENCL=ON``. When the flag is set and if OpenCL SDK is installed, the full-featured OpenCV OCL module is built. Otherwise, the module may be not built. If you have AMD'S FFT and BLAS library, you can select it with ``WITH_OPENCLAMDFFT=ON``, ``WITH_OPENCLAMDBLAS=ON``.
-
-The ocl module can be found under the "modules" directory. In "modules/ocl/src" you can find the source code for the cpp class that wrap around the direct kernel invocation. The kernels themselves can be found in "modules/ocl/src/opencl".  Samples can be found under "samples/ocl". Accuracy tests can be found in "modules/ocl/test", and performance tests under "module/ocl/perf".
-
-
-
-Right now, the user can select OpenCL device by specifying the environment variable ``OPENCV_OPENCL_DEVICE``. Variable format:
-
-.. code-block:: cpp
-
-    <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<DeviceName or ID>
-
-**Note:** Device ID range is: 0..9 (only one digit, 10 - it is a part of name)
-
-Samples:
-
-.. code-block:: cpp
-
-    '' = ':' = '::' = ':GPU|CPU:'
-    'AMD:GPU|CPU:'
-    'AMD::Tahiti'
-    ':GPU:1'
-    ':CPU:2'
-
-Also the user can use ``cv::ocl::setDevice`` function (with ``cv::ocl::getOpenCLPlatforms`` and ``cv::ocl::getOpenCLDevices``). This function initializes OpenCL runtime and setup the passed device as computing device.
-
-In the current version, all the thread share the same context and device so the multi-devices are not supported. We will add this feature soon. If a function support 4-channel operator, it should support 3-channel operator as well, because All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in ``oclMat``. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and ``oclMat``.
-
-Developer Notes
--------------------
-
-In a heterogeneous device environment, there may be cost associated with data transfer. This would be the case, for example, when data needs to be moved from host memory (accessible to the CPU), to device memory (accessible to a discrete GPU). in the case of integrated graphics chips, there may be performance issues, relating to memory coherency between access from the GPU "part" of the integrated device, or the CPU "part." For best performance, in either case, it is recommended that you do not introduce data transfers between CPU and the discrete GPU, except in the beginning and the end of the algorithmic pipeline.
-
-Some tidbits:
-
-1. OpenCL version should be larger than 1.1 with FULL PROFILE.
-
-2. Currently there's only one OpenCL context and command queue. We hope to implement multi device and multi queue support in the future.
-
-3. Many kernels use 256 as its workgroup size if possible, so the max work group size of the device must larger than 256. All GPU devices we are aware of indeed support 256 workitems in a workgroup, however non GPU devices may not. This will be improved in the future.
-
-4. If the device does not support double arithmetic, then functions' implementation generates an error.
-
-5. The ``oclMat`` uses buffer object, not image object.
-
-6. All the 3-channel matrices (i.e. RGB image) are represented by 4-channel matrices in ``oclMat``, with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and ``oclMat``.
-
-7. All the matrix in ``oclMat`` is aligned in column (now the alignment factor for ``step`` is 32+ byte). It means, m.cols * m.elemSize() <= m.step.
-
-8. Data transfer between Mat and ``oclMat``: If the CPU matrix is aligned in column, we will use faster API to transfer between Mat and ``oclMat``, otherwise, we will use clEnqueueRead/WriteBufferRect to transfer data to guarantee the alignment. 3-channel matrix is an exception, it's directly transferred to a temp buffer and then padded to 4-channel matrix(also aligned) when uploading and do the reverse operation when downloading.
-
-9. Data transfer between Mat and ``oclMat``: ROI is a feature of OpenCV, which allow users process a sub rectangle of a matrix. When a CPU matrix which has ROI will be transfered to GPU, the whole matrix will be transfered and set ROI as CPU's. In a word, we always transfer the whole matrix despite whether it has ROI or not.
-
-10. All the kernel file should locate in "modules/ocl/src/opencl/" with the extension ".cl". All the kernel files are transformed to pure characters at compilation time in opencl_kernels.cpp, and the file name without extension is the name of the program sources.
diff --git a/modules/ocl/doc/matrix_reductions.rst b/modules/ocl/doc/matrix_reductions.rst
deleted file mode 100644
index 41161d8..0000000
--- a/modules/ocl/doc/matrix_reductions.rst
+++ /dev/null
@@ -1,106 +0,0 @@
-Matrix Reductions
-=============================
-
-.. highlight:: cpp
-
-ocl::absSum
----------------
-Returns the sum of absolute values for matrix elements.
-
-.. ocv:function:: Scalar ocl::absSum(const oclMat &m)
-
-    :param m: The Source image of all depth.
-
-Counts the abs sum of matrix elements for each channel. Supports all data types.
-
-ocl::countNonZero
----------------------
-Returns the number of non-zero elements in src
-
-.. ocv:function:: int ocl::countNonZero(const oclMat &src)
-
-    :param src: Single-channel array
-
-Counts non-zero array elements. Supports all data types.
-
-ocl::min
-------------------
-
-.. ocv:function:: void ocl::min(const oclMat &src1, const oclMat &src2, oclMat &dst)
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-Computes element-wise minima of two arrays. Supports all data types.
-
-ocl::max
-------------------
-
-.. ocv:function:: void ocl::max(const oclMat &src1, const oclMat &src2, oclMat &dst)
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-Computes element-wise maxima of two arrays. Supports all data types.
-
-ocl::minMax
-------------------
-Returns void
-
-.. ocv:function:: void ocl::minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat())
-
-    :param src: Single-channel array
-
-    :param minVal: Pointer to returned minimum value, should not be NULL
-
-    :param maxVal: Pointer to returned maximum value, should not be NULL
-
-    :param mask: The optional mask used to select a sub-array
-
-Finds global minimum and maximum in a whole array or sub-array. Supports all data types.
-
-ocl::minMaxLoc
-------------------
-Returns void
-
-.. ocv:function:: void ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,const oclMat &mask = oclMat())
-
-    :param src: Single-channel array
-
-    :param minVal: Pointer to returned minimum value, should not be NULL
-
-    :param maxVal: Pointer to returned maximum value, should not be NULL
-
-    :param minLoc: Pointer to returned minimum location (in 2D case), should not be NULL
-
-    :param maxLoc: Pointer to returned maximum location (in 2D case) should not be NULL
-
-    :param mask: The optional mask used to select a sub-array
-
-The functions minMaxLoc find minimum and maximum element values and their positions. The extremums are searched across the whole array, or, if mask is not an empty array, in the specified array region. The functions do not work with multi-channel arrays.
-
-ocl::sqrSum
-------------------
-Returns the squared sum of matrix elements for each channel
-
-.. ocv:function:: Scalar ocl::sqrSum(const oclMat &m)
-
-    :param m: The Source image of all depth.
-
-Counts the squared sum of matrix elements for each channel. Supports all data types.
-
-ocl::sum
-------------------
-Returns the sum of matrix elements for each channel
-
-.. ocv:function:: Scalar ocl::sum(const oclMat &m)
-
-    :param m: The Source image of all depth.
-
-Counts the sum of matrix elements for each channel.
diff --git a/modules/ocl/doc/ml_machine_learning.rst b/modules/ocl/doc/ml_machine_learning.rst
deleted file mode 100644
index ad0e303..0000000
--- a/modules/ocl/doc/ml_machine_learning.rst
+++ /dev/null
@@ -1,106 +0,0 @@
-ml.Machine Learning
-=============================
-
-.. highlight:: cpp
-
-ocl::KNearestNeighbour
---------------------------
-.. ocv:class:: ocl::KNearestNeighbour : public ocl::CvKNearest
-
-The class implements K-Nearest Neighbors model as described in the beginning of this section.
-
-ocl::KNearestNeighbour
---------------------------
-Computes the weighted sum of two arrays. ::
-
-    class CV_EXPORTS KNearestNeighbour: public CvKNearest
-    {
-    public:
-        KNearestNeighbour();
-        ~KNearestNeighbour();
-
-        bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
-            bool isRegression = false, int max_k = 32, bool updateBase = false);
-
-        void clear();
-
-        void find_nearest(const oclMat& samples, int k, oclMat& lables);
-
-    private:
-        /* hidden */
-    };
-
-ocl::KNearestNeighbour::train
----------------------------------
-Trains the model.
-
-.. ocv:function:: bool ocl::KNearestNeighbour::train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)), bool isRegression = false, int max_k = 32, bool updateBase = false)
-
-    :param isRegression: Type of the problem: ``true`` for regression and ``false`` for classification.
-
-    :param maxK: Number of maximum neighbors that may be passed to the method :ocv:func:`CvKNearest::find_nearest`.
-
-    :param updateBase: Specifies whether the model is trained from scratch (``update_base=false``), or it is updated using the new training data (``update_base=true``). In the latter case, the parameter ``maxK`` must not be larger than the original value.
-
-The method trains the K-Nearest model. It follows the conventions of the generic :ocv:func:`CvStatModel::train` approach with the following limitations:
-
-* Only ``CV_ROW_SAMPLE`` data layout is supported.
-* Input variables are all ordered.
-* Output variables can be either categorical ( ``is_regression=false`` ) or ordered ( ``is_regression=true`` ).
-* Variable subsets (``var_idx``) and missing measurements are not supported.
-
-ocl::KNearestNeighbour::find_nearest
-----------------------------------------
-Finds the neighbors and predicts responses for input vectors.
-
-.. ocv:function:: void ocl::KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lables )
-
-    :param samples: Input samples stored by rows. It is a single-precision floating-point matrix of :math:`number\_of\_samples \times number\_of\_features` size.
-
-    :param k: Number of used nearest neighbors. It must satisfy constraint: :math:`k \le` :ocv:func:`CvKNearest::get_max_k`.
-
-    :param labels: Vector with results of prediction (regression or classification) for each input sample. It is a single-precision floating-point vector with ``number_of_samples`` elements.
-
-ocl::kmeans
----------------
-Finds centers of clusters and groups input samples around the clusters.
-
-.. ocv:function:: double ocl::kmeans(const oclMat &src, int K, oclMat &bestLabels, TermCriteria criteria, int attemps, int flags, oclMat &centers)
-
-    :param src: Floating-point matrix of input samples, one row per sample.
-
-    :param K: Number of clusters to split the set by.
-
-    :param bestLabels: Input/output integer array that stores the cluster indices for every sample.
-
-    :param criteria: The algorithm termination criteria, that is, the maximum number of iterations and/or the desired accuracy. The accuracy is specified as ``criteria.epsilon``. As soon as each of the cluster centers moves by less than ``criteria.epsilon`` on some iteration, the algorithm stops.
-
-    :param attempts: Flag to specify the number of times the algorithm is executed using different initial labellings. The algorithm returns the labels that yield the best compactness (see the last function parameter).
-
-    :param flags: Flag that can take the following values:
-
-            * **KMEANS_RANDOM_CENTERS** Select random initial centers in each attempt.
-
-            * **KMEANS_PP_CENTERS** Use ``kmeans++`` center initialization by Arthur and Vassilvitskii [Arthur2007].
-
-            * **KMEANS_USE_INITIAL_LABELS** During the first (and possibly the only) attempt, use the user-supplied labels instead of computing them from the initial centers. For the second and further attempts, use the random or semi-random centers. Use one of  ``KMEANS_*_CENTERS``  flag to specify the exact method.
-
-    :param centers: Output matrix of the cluster centers, one row per each cluster center.
-
-ocl::distanceToCenters
-----------------------
-For each samples in ``source``, find its closest neighour in ``centers``.
-
-.. ocv:function:: void ocl::distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR)
-
-    :param src: Floating-point matrix of input samples. One row per sample.
-
-    :param centers: Floating-point matrix of center candidates. One row per center.
-
-    :param distType: Distance metric to calculate distances. Supports ``NORM_L1`` and ``NORM_L2SQR``.
-
-    :param dists: The output distances calculated from each sample to the best matched center.
-
-    :param labels: The output index of best matched center for each row of sample.
-
-The method is a utility function which maybe used for multiple clustering algorithms such as K-means.
diff --git a/modules/ocl/doc/object_detection.rst b/modules/ocl/doc/object_detection.rst
deleted file mode 100644
index 0539e77..0000000
--- a/modules/ocl/doc/object_detection.rst
+++ /dev/null
@@ -1,95 +0,0 @@
-Object Detection
-=============================
-
-.. highlight:: cpp
-
-ocl::OclCascadeClassifier
------------------------------
-.. ocv:class:: ocl::OclCascadeClassifier : public CascadeClassifier
-
-Cascade classifier class used for object detection. Supports HAAR cascade classifier  in the form of cross link ::
-
-    class CV_EXPORTS OclCascadeClassifier : public CascadeClassifier
-    {
-    public:
-            void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
-                                              double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
-                                              Size minSize = Size(), Size maxSize = Size());
-    };
-
-.. note::
-
-   (Ocl) A face detection example using cascade classifiers can be found at opencv_source_code/samples/ocl/facedetect.cpp
-
-ocl::OclCascadeClassifier::detectMultiScale
-------------------------------------------------------
-Detects objects of different sizes in the input image.
-
-.. ocv:function:: void ocl::OclCascadeClassifier::detectMultiScale(oclMat &image, std::vector<cv::Rect>& faces, double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, Size minSize = Size(), Size maxSize = Size())
-
-    :param faces: Vector of rectangles where each rectangle contains the detected object.
-
-    :param image:  Matrix of type CV_8U containing an image where objects should be detected.
-
-    :param scaleFactor: Parameter specifying how much the image size is reduced at each image scale.
-
-    :param minNeighbors: Parameter specifying how many neighbors each candidate rectangle should have to retain it.
-
-    :param flags: Parameter with the same meaning for an old cascade as in the function ``cvHaarDetectObjects``. It is not used for a new cascade.
-
-    :param minSize: Minimum possible object size. Objects smaller than that are ignored.
-
-    :param maxSize: Maximum possible object size. Objects larger than that are ignored.
-
-The function provides a very similar interface with that in CascadeClassifier class, except using oclMat as input image.
-
-ocl::MatchTemplateBuf
--------------------------
-.. ocv:struct:: ocl::MatchTemplateBuf
-
-Class providing memory buffers for :ocv:func:`ocl::matchTemplate` function, plus it allows to adjust some specific parameters. ::
-
-    struct CV_EXPORTS MatchTemplateBuf
-    {
-        Size user_block_size;
-        oclMat imagef, templf;
-        std::vector<oclMat> images;
-        std::vector<oclMat> image_sums;
-        std::vector<oclMat> image_sqsums;
-    };
-
-You can use field `user_block_size` to set specific block size for :ocv:func:`ocl::matchTemplate` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
-
-ocl::matchTemplate
-----------------------
-Computes a proximity map for a raster template and an image where the template is searched for.
-
-.. ocv:function:: void ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
-
-.. ocv:function:: void ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf &buf)
-
-    :param image: Source image.  ``CV_32F`` and  ``CV_8U`` depth images (1..4 channels) are supported for now.
-
-    :param templ: Template image with the size and type the same as  ``image`` .
-
-    :param result: Map containing comparison results ( ``CV_32FC1`` ). If  ``image`` is  *W x H*  and ``templ`` is  *w x h*, then  ``result`` must be *W-w+1 x H-h+1*.
-
-    :param method: Specifies the way to compare the template with the image.
-
-    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`ocl::MatchTemplateBuf`.
-
-    The following methods are supported for the ``CV_8U`` depth images for now:
-
-    * ``CV_TM_SQDIFF``
-    * ``CV_TM_SQDIFF_NORMED``
-    * ``CV_TM_CCORR``
-    * ``CV_TM_CCORR_NORMED``
-    * ``CV_TM_CCOEFF``
-    * ``CV_TM_CCOEFF_NORMED``
-
-    The following methods are supported for the ``CV_32F`` images for now:
-
-    * ``CV_TM_SQDIFF``
-    * ``CV_TM_CCORR``
-
-.. seealso:: :ocv:func:`matchTemplate`
diff --git a/modules/ocl/doc/ocl.rst b/modules/ocl/doc/ocl.rst
deleted file mode 100644
index 76c1f88..0000000
--- a/modules/ocl/doc/ocl.rst
+++ /dev/null
@@ -1,21 +0,0 @@
-***************************************
-ocl. OpenCL-accelerated Computer Vision
-***************************************
-
-.. toctree::
-    :maxdepth: 1
-
-    introduction
-    structures_and_utility_functions
-    data_structures
-    operations_on_matrices
-    matrix_reductions
-    image_filtering
-    image_processing
-    ml_machine_learning
-    object_detection
-    feature_detection_and_description
-    video_analysis
-    camera_calibration_and_3D_reconstruction
-..    camera_calibration_and_3d_reconstruction
-..    video
diff --git a/modules/ocl/doc/operations_on_matrices.rst b/modules/ocl/doc/operations_on_matrices.rst
deleted file mode 100644
index 1763d33..0000000
--- a/modules/ocl/doc/operations_on_matrices.rst
+++ /dev/null
@@ -1,602 +0,0 @@
-Operations on Matrics
-=============================
-
-.. highlight:: cpp
-
-ocl::abs
-------------------
-Returns void
-
-.. ocv:function:: void ocl::abs(const oclMat& src, oclMat& dst)
-
-    :param src: input array.
-
-    :param dst: destination array, it will have the same size and same type as ``src``.
-
-Computes per-element absolute values of the input array. Supports all data types.
-
-ocl::absdiff
-------------------
-Returns void
-
-.. ocv:function:: void ocl::absdiff(const oclMat& src1, const oclMat& src2, oclMat& dst)
-
-.. ocv:function:: void ocl::absdiff(const oclMat& src1, const Scalar& s, oclMat& dst)
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param s: scalar, the second input parameter.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-Computes per-element absolute difference between two arrays or between array and a scalar. Supports all data types.
-
-ocl::add
-------------------
-Returns void
-
-.. ocv:function:: void ocl::add(const oclMat & src1, const oclMat & src2, oclMat & dst, const oclMat & mask = oclMat())
-
-.. ocv:function:: void ocl::add(const oclMat & src1, const Scalar & s, oclMat & dst, const oclMat & mask = oclMat())
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param s: scalar, the second input parameter
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-    :param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
-
-Computes per-element additon between two arrays or between array and a scalar. Supports all data types.
-
-ocl::addWeighted
---------------------
-Computes the weighted sum of two arrays.
-
-.. ocv:function:: void ocl::addWeighted(const oclMat& src1, double  alpha, const oclMat& src2, double beta, double gama, oclMat& dst)
-
-    :param src1: First source array.
-
-    :param alpha: Weight for the first array elements.
-
-    :param src2: Second source array of the same size and channel number as  ``src1`` .
-
-    :param beta: Weight for the second array elements.
-
-    :param dst: Destination array that has the same size and number of channels as the input arrays.
-
-    :param gamma: Scalar added to each sum.
-
-The function ``addWeighted`` calculates the weighted sum of two arrays as follows:
-
-.. math::
-
-    \texttt{c} (I)= \texttt{saturate} ( \texttt{a} (I)* \texttt{alpha} +  \texttt{b} (I)* \texttt{beta} +  \texttt{gamma} )
-
-where ``I`` is a multi-dimensional index of array elements. In case of multi-channel arrays, each channel is processed independently.
-
-.. seealso:: :ocv:func:`addWeighted`
-
-ocl::bitwise_and
-------------------
-Returns void
-
-.. ocv:function:: void ocl::bitwise_and(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat())
-
-.. ocv:function:: void ocl::bitwise_and(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat())
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param s: scalar, the second input parameter.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-    :param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
-
-Computes per-element bitwise_and between two arrays or between array and a scalar. Supports all data types.
-
-ocl::bitwise_not
-------------------
-Returns void
-
-.. ocv:function:: void ocl::bitwise_not(const oclMat &src, oclMat &dst)
-
-    :param src: the input array.
-
-    :param dst: the destination array, it will have the same size and same type as ``src``.
-
-The functions bitwise not compute per-element bit-wise inversion of the source array. Supports all data types.
-
-ocl::bitwise_or
-------------------
-Returns void
-
-.. ocv:function:: void ocl::bitwise_or(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat())
-
-.. ocv:function:: void ocl::bitwise_or(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat())
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param s: scalar, the second input parameter.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-    :param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
-
-Computes per-element bitwise_or between two arrays or between array and a scalar. Supports all data types.
-
-ocl::bitwise_xor
-------------------
-Returns void
-
-.. ocv:function:: void ocl::bitwise_xor(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat())
-
-.. ocv:function:: void ocl::bitwise_xor(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat())
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param sc: scalar, the second input parameter.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-    :param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
-
-Computes per-element bitwise_xor between two arrays or between array and a scalar. Supports all data types.
-
-ocl::cartToPolar
-------------------
-Returns void
-
-.. ocv:function:: void ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false)
-
-    :param x: the array of x-coordinates; must be single-precision or double-precision floating-point array.
-
-    :param y: the array of y-coordinates; it must have the same size and same type as ``x``.
-
-    :param magnitude: the destination array of magnitudes of the same size and same type as ``x``.
-
-    :param angle: the destination array of angles of the same size and same type as ``x``. The angles are measured in radians (0 to 2pi) or in degrees (0 to 360 degrees).
-
-    :param angleInDegrees: the flag indicating whether the angles are measured in radians, which is default mode, or in degrees.
-
-Calculates the magnitude and angle of 2D vectors. Supports only ``CV_32F`` and ``CV_64F`` data types.
-
-ocl::compare
-------------------
-Returns void
-
-.. ocv:function:: void ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop)
-
-    :param src1: the first source array.
-
-    :param src2: the second source array; must have the same size and same type as ``src1``.
-
-    :param dst: the destination array; will have the same size as ``src1`` and type ``CV_8UC1``.
-
-    :param cmpop: the flag specifying the relation between the elements to be checked.
-
-Performs per-element comparison of two arrays or an array and scalar value. Supports all data types.
-
-ocl::dft
-------------
-Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix.
-
-.. ocv:function:: void ocl::dft(const oclMat& src, oclMat& dst, Size dft_size = Size(), int flags = 0)
-
-    :param src: source matrix (real or complex).
-
-    :param dst: destination matrix (real or complex).
-
-    :param dft_size: size of original input, which is used for transformation from complex to real.
-
-    :param flags: optional flags:
-
-        * **DFT_ROWS** transforms each individual row of the source matrix.
-
-        * **DFT_COMPLEX_OUTPUT** performs a forward transformation of 1D or 2D real array. The result, though being a complex array, has complex-conjugate symmetry (*CCS*, see the function description below for details). Such an array can be packed into a real array of the same size as input, which is the fastest option and which is what the function does by default. However, you may wish to get a full complex array (for simpler spectrum analysis, and so on). Pass the flag to enable the function to produce a full-size complex output array.
-
-        * **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively).
-
-        * **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real.
-
-Use to handle real matrices (``CV_32FC1``) and complex matrices in the interleaved format (``CV_32FC2``).
-
-The ``dft_size`` must be powers of ``2``, ``3`` and ``5``. Real to complex dft output is not the same with cpu version. Real to complex and complex to real does not support ``DFT_ROWS``.
-
-.. seealso:: :ocv:func:`dft`
-
-ocl::divide
-------------------
-Returns void
-
-.. ocv:function:: void ocl::divide(const oclMat& src1, const oclMat& src2, oclMat& dst, double scale = 1)
-
-.. ocv:function:: void ocl::divide(double scale, const oclMat& src1, oclMat& dst)
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-    :param scale: scalar factor.
-
-Computes per-element divide between two arrays or between array and a scalar. Supports all data types.
-
-ocl::exp
-------------------
-Returns void
-
-.. ocv:function:: void ocl::exp(const oclMat &src, oclMat &dst)
-
-    :param src: the first source array.
-
-    :param dst: the dst array; must have the same size and same type as ``src``.
-
-The function exp calculates the exponent of every element of the input array. Supports only ``CV_32FC1`` and ``CV_64F`` data types.
-
-ocl::flip
-------------------
-Returns void
-
-.. ocv:function:: void ocl::flip(const oclMat& src, oclMat& dst, int flipCode)
-
-    :param src: source image.
-
-    :param dst: destination image.
-
-    :param flipCode: specifies how to flip the array: 0 means flipping around the x-axis, positive (e.g., 1) means flipping around y-axis, and negative (e.g., -1) means flipping around both axes.
-
-The function flip flips the array in one of three different ways (row and column indices are 0-based). Supports all data types.
-
-ocl::gemm
-------------------
-Performs generalized matrix multiplication.
-
-.. ocv:function:: void ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha, const oclMat& src3, double beta, oclMat& dst, int flags = 0)
-
-    :param src1: first multiplied input matrix that should be ``CV_32FC1`` type.
-
-    :param src2: second multiplied input matrix of the same type as ``src1``.
-
-    :param alpha: weight of the matrix product.
-
-    :param src3: third optional delta matrix added to the matrix product. It should have the same type as ``src1`` and ``src2``.
-
-    :param beta: weight of ``src3``.
-
-    :param dst: destination matrix. It has the proper size and the same type as input matrices.
-
-    :param flags: operation flags:
-
-            * **GEMM_1_T** transpose ``src1``.
-            * **GEMM_2_T** transpose ``src2``.
-
-.. seealso:: :ocv:func:`gemm`
-
-ocl::log
-------------------
-Returns void
-
-.. ocv:function:: void ocl::log(const oclMat &src, oclMat &dst)
-
-    :param src: the first source array.
-
-    :param dst: the dst array; must have the same size and same type as ``src``.
-
-The function log calculates the log of every element of the input array. Supports only ``CV_32FC1`` and ``CV_64F`` data types.
-
-ocl::LUT
-------------------
-Returns void
-
-.. ocv:function:: void ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
-
-    :param src: source array of 8-bit elements.
-
-    :param lut: look-up table of 256 elements. In the case of multi-channel source array, the table should either have a single channel (in this case the same table is used for all channels) or the same number of channels as in the source array.
-
-    :param dst: destination array; will have the same size and the same number of channels as ``src``, and the same depth as ``lut``.
-
-Performs a look-up table transform of an array.
-
-ocl::magnitude
-------------------
-Returns void
-
-.. ocv:function:: void ocl::magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude)
-
-    :param x: the floating-point array of x-coordinates of the vectors.
-
-    :param y: the floating-point array of y-coordinates of the vectors; must have the same size as ``x``.
-
-    :param magnitude: the destination array; will have the same size and same type as ``x``.
-
-The function magnitude calculates magnitude of 2D vectors formed from the corresponding elements of ``x`` and ``y`` arrays. Supports only ``CV_32F`` and ``CV_64F`` data types.
-
-ocl::meanStdDev
-------------------
-Returns void
-
-.. ocv:function:: void ocl::meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev)
-
-    :param mtx: source image.
-
-    :param mean: the output parameter: computed mean value.
-
-    :param stddev: the output parameter: computed standard deviation.
-
-The functions meanStdDev compute the mean and the standard deviation M of array elements, independently for each channel, and return it via the output parameters. Supports all data types.
-
-ocl::merge
-------------------
-Returns void
-
-.. ocv:function:: void ocl::merge(const vector<oclMat> &src, oclMat &dst)
-
-    :param src: The source array or vector of the single-channel matrices to be merged. All the matrices in src must have the same size and the same type.
-
-    :param dst: The destination array; will have the same size and the same depth as src, the number of channels will match the number of source matrices.
-
-Composes a multi-channel array from several single-channel arrays. Supports all data types.
-
-ocl::multiply
-------------------
-Returns void
-
-.. ocv:function:: void ocl::multiply(const oclMat& src1, const oclMat& src2, oclMat& dst, double scale = 1)
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-    :param scale: optional scale factor.
-
-Computes per-element multiply between two arrays or between array and a scalar. Supports all data types.
-
-ocl::norm
-------------------
-Returns the calculated norm
-
-.. ocv:function:: double ocl::norm(const oclMat &src1, int normType = NORM_L2)
-
-.. ocv:function:: double ocl::norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2)
-
-    :param src1: the first source array.
-
-    :param src2: the second source array of the same size and the same type as ``src1``.
-
-    :param normType: type of the norm.
-
-The functions ``norm`` calculate an absolute norm of ``src1`` (when there is no ``src2`` ):
-
-.. math::
-
-    norm =  \forkthree{\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  $\texttt{normType} = \texttt{NORM\_INF}$ }
-    { \| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  $\texttt{normType} = \texttt{NORM\_L1}$ }
-    { \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  $\texttt{normType} = \texttt{NORM\_L2}$ }
-
-or an absolute or relative difference norm if ``src2`` is there:
-
-.. math::
-
-    norm =  \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  $\texttt{normType} = \texttt{NORM\_INF}$ }
-    { \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  $\texttt{normType} = \texttt{NORM\_L1}$ }
-    { \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  $\texttt{normType} = \texttt{NORM\_L2}$ }
-
-or
-
-.. math::
-
-    norm =  \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  $\texttt{normType} = \texttt{NORM\_RELATIVE\_INF}$ }
-    { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  $\texttt{normType} = \texttt{NORM\_RELATIVE\_L1}$ }
-    { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  $\texttt{normType} = \texttt{NORM\_RELATIVE\_L2}$ }
-
-The functions ``norm`` return the calculated norm.
-
-A multi-channel input arrays are treated as a single-channel, that is, the results for all channels are combined.
-
-ocl::oclMat::convertTo
---------------------------
-Returns void
-
-.. ocv:function:: void ocl::oclMat::convertTo(oclMat &m, int rtype, double alpha = 1, double beta = 0) const
-
-    :param m: the destination matrix. If it does not have a proper size or type before the operation, it will be reallocated.
-
-    :param rtype: the desired destination matrix type, or rather, the depth (since the number of channels will be the same with the source one). If rtype is negative, the destination matrix will have the same type as the source.
-
-    :param alpha: optional scale factor.
-
-    :param beta: optional delta added to the scaled values.
-
-The method converts source pixel values to the target datatype. Saturate cast is applied in the end to avoid possible overflows. Supports all data types.
-
-ocl::oclMat::copyTo
------------------------
-Returns void
-
-.. ocv:function:: void ocl::oclMat::copyTo(oclMat &m, const oclMat &mask = oclMat()) const
-
-    :param m: The destination matrix. If it does not have a proper size or type before the operation, it will be reallocated.
-
-    :param mask: The operation mask. Its non-zero elements indicate, which matrix elements need to be copied.
-
-Copies the matrix to another one. Supports all data types.
-
-ocl::oclMat::setTo
-----------------------
-Returns oclMat
-
-.. ocv:function:: oclMat& ocl::oclMat::setTo(const Scalar &s, const oclMat &mask = oclMat())
-
-    :param s: Assigned scalar, which is converted to the actual array type.
-
-    :param mask: The operation mask of the same size as ``*this`` and type ``CV_8UC1``.
-
-Sets all or some of the array elements to the specified value. This is the advanced variant of Mat::operator=(const Scalar s) operator. Supports all data types.
-
-ocl::phase
-------------------
-Returns void
-
-.. ocv:function:: void ocl::phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false)
-
-    :param x: the source floating-point array of x-coordinates of 2D vectors
-
-    :param y: the source array of y-coordinates of 2D vectors; must have the same size and the same type as ``x``.
-
-    :param angle: the destination array of vector angles; it will have the same size and same type as ``x``.
-
-    :param angleInDegrees: when it is true, the function will compute angle in degrees, otherwise they will be measured in radians.
-
-The function phase computes the rotation angle of each 2D vector that is formed from the corresponding elements of ``x`` and ``y``. Supports only ``CV_32FC1`` and ``CV_64FC1`` data type.
-
-ocl::polarToCart
-------------------
-Returns void
-
-.. ocv:function:: void ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false)
-
-    :param magnitude: the source floating-point array of magnitudes of 2D vectors. It can be an empty matrix (=Mat()) - in this case the function assumes that all the magnitudes are = 1. If it's not empty, it must have the same size and same type as ``angle``.
-
-    :param angle: the source floating-point array of angles of the 2D vectors.
-
-    :param x: the destination array of x-coordinates of 2D vectors; will have the same size and the same type as ``angle``.
-
-    :param y: the destination array of y-coordinates of 2D vectors; will have the same size and the same type as ``angle``.
-
-    :param angleInDegrees: the flag indicating whether the angles are measured in radians, which is default mode, or in degrees.
-
-The function polarToCart computes the cartesian coordinates of each 2D vector represented by the corresponding elements of magnitude and angle. Supports only ``CV_32F`` and ``CV_64F`` data types.
-
-ocl::pow
-------------------
-Returns void
-
-.. ocv:function:: void ocl::pow(const oclMat &x, double p, oclMat &y)
-
-    :param x: the source array.
-
-    :param p: the exponent of power; the source floating-point array of angles of the 2D vectors.
-
-    :param y: the destination array, should be the same type as the source.
-
-The function pow raises every element of the input array to ``p``. Supports only ``CV_32FC1`` and ``CV_64FC1`` data types.
-
-ocl::setIdentity
-------------------
-Returns void
-
-.. ocv:function:: void ocl::setIdentity(oclMat& src, const Scalar & val = Scalar(1))
-
-    :param src: matrix to initialize (not necessarily square).
-
-    :param val: value to assign to diagonal elements.
-
-The function initializes a scaled identity matrix.
-
-ocl::sortByKey
-------------------
-Returns void
-
-.. ocv:function:: void ocl::sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false)
-
-    :param keys: the keys to be used as sorting indices.
-
-    :param values: the array of values.
-
-    :param isGreaterThan: determine sorting order.
-
-    :param method: supported sorting methods:
-
-            * **SORT_BITONIC**   bitonic sort, only support power-of-2 buffer size.
-            * **SORT_SELECTION** selection sort, currently cannot sort duplicate keys.
-            * **SORT_MERGE**     merge sort.
-            * **SORT_RADIX**     radix sort, only support signed int/float keys(``CV_32S``/``CV_32F``).
-
-Returns the sorted result of all the elements in values based on equivalent keys.
-
-The element unit in the values to be sorted is determined from the data type, i.e., a ``CV_32FC2`` input ``{a1a2, b1b2}`` will be considered as two elements, regardless its matrix dimension.
-
-Both keys and values will be sorted inplace.
-
-Keys needs to be a **single** channel ``oclMat``.
-
-Example::
-
-    input -
-    keys   = {2,    3,   1}   (CV_8UC1)
-    values = {10,5, 4,3, 6,2} (CV_8UC2)
-    sortByKey(keys, values, SORT_SELECTION, false);
-    output -
-    keys   = {1,    2,   3}   (CV_8UC1)
-    values = {6,2, 10,5, 4,3} (CV_8UC2)
-
-ocl::split
-------------------
-Returns void
-
-.. ocv:function:: void ocl::split(const oclMat &src, vector<oclMat> &dst)
-
-    :param src: The source multi-channel array
-
-    :param dst: The destination array or vector of arrays; The number of arrays must match src.channels(). The arrays themselves will be reallocated if needed.
-
-The functions split split multi-channel array into separate single-channel arrays. Supports all data types.
-
-ocl::sqrt
-------------------
-Returns void
-
-.. ocv:function:: void ocl::sqrt(const oclMat &src, oclMat &dst)
-
-    :param src: the first source array.
-
-    :param dst: the dst array; must have the same size and same type as ``src``.
-
-The function ``sqrt`` calculates the square root of each input array element. Supports only ``CV_32FC1`` and ``CV_64F`` data types.
-
-ocl::subtract
-------------------
-Returns void
-
-.. ocv:function:: void ocl::subtract(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat())
-
-.. ocv:function:: void ocl::subtract(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat())
-
-    :param src1: the first input array.
-
-    :param src2: the second input array, must be the same size and same type as ``src1``.
-
-    :param s: scalar, the second input parameter.
-
-    :param dst: the destination array, it will have the same size and same type as ``src1``.
-
-    :param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
-
-Computes per-element subtract between two arrays or between array and a scalar. Supports all data types.
-
-ocl::transpose
-------------------
-Returns void
-
-.. ocv:function:: void ocl::transpose(const oclMat &src, oclMat &dst)
-
-    :param src: the source array.
-
-    :param dst: the destination array of the same type as ``src``.
-
-Transposes a matrix (in case when ``src`` == ``dst`` and matrix is square the operation are performed inplace).
diff --git a/modules/ocl/doc/structures_and_utility_functions.rst b/modules/ocl/doc/structures_and_utility_functions.rst
deleted file mode 100644
index 1d1265c..0000000
--- a/modules/ocl/doc/structures_and_utility_functions.rst
+++ /dev/null
@@ -1,56 +0,0 @@
-Data Structures and Utility Functions
-========================================
-
-.. highlight:: cpp
-
-ocl::getOpenCLPlatforms
------------------------
-Returns the list of OpenCL platforms
-
-.. ocv:function:: int ocl::getOpenCLPlatforms( PlatformsInfo& platforms )
-
-    :param platforms: Output variable
-
-ocl::getOpenCLDevices
----------------------
-Returns the list of devices
-
-.. ocv:function:: int ocl::getOpenCLDevices( DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU, const PlatformInfo* platform = NULL )
-
-    :param devices: Output variable
-
-    :param deviceType: Bitmask of ``CVCL_DEVICE_TYPE_GPU``, ``CVCL_DEVICE_TYPE_CPU`` or ``CVCL_DEVICE_TYPE_DEFAULT``.
-
-    :param platform: Specifies preferrable platform
-
-ocl::setDevice
---------------
-Initialize OpenCL computation context
-
-.. ocv:function:: void ocl::setDevice( const DeviceInfo* info )
-
-    :param info: device info
-
-ocl::initializeContext
---------------------------------
-Alternative way to initialize OpenCL computation context.
-
-.. ocv:function:: void ocl::initializeContext(void* pClPlatform, void* pClContext, void* pClDevice)
-
-    :param pClPlatform: selected ``platform_id`` (via pointer, parameter type is ``cl_platform_id*``)
-
-    :param pClContext: selected ``cl_context`` (via pointer, parameter type is ``cl_context*``)
-
-    :param pClDevice: selected ``cl_device_id`` (via pointer, parameter type is ``cl_device_id*``)
-
-This function can be used for context initialization with D3D/OpenGL interoperability.
-
-ocl::setBinaryPath
-------------------
-Returns void
-
-.. ocv:function:: void ocl::setBinaryPath(const char *path)
-
-    :param path: the path of OpenCL kernel binaries
-
-If you call this function and set a valid path, the OCL module will save the compiled kernel to the address in the first time and reload the binary since that. It can save compilation time at the runtime.
diff --git a/modules/ocl/doc/video_analysis.rst b/modules/ocl/doc/video_analysis.rst
deleted file mode 100644
index 83ba200..0000000
--- a/modules/ocl/doc/video_analysis.rst
+++ /dev/null
@@ -1,561 +0,0 @@
-Video Analysis
-=============================
-
-.. highlight:: cpp
-
-ocl::GoodFeaturesToTrackDetector_OCL
-----------------------------------------
-.. ocv:class:: ocl::GoodFeaturesToTrackDetector_OCL
-
-Class used for strong corners detection on an image. ::
-
-    class GoodFeaturesToTrackDetector_OCL
-    {
-    public:
-        explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
-            int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
-
-        //! return 1 rows matrix with CV_32FC2 type
-        void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
-        //! download points of type Point2f to a vector. the vector's content will be erased
-        void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
-
-        int maxCorners;
-        double qualityLevel;
-        double minDistance;
-
-        int blockSize;
-        bool useHarrisDetector;
-        double harrisK;
-        void releaseMemory()
-        {
-            Dx_.release();
-            Dy_.release();
-            eig_.release();
-            minMaxbuf_.release();
-            tmpCorners_.release();
-        }
-    };
-
-The class finds the most prominent corners in the image.
-
-.. seealso:: :ocv:func:`goodFeaturesToTrack()`
-
-ocl::GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL
--------------------------------------------------------------------------
-Constructor.
-
-.. ocv:function:: ocl::GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04)
-
-    :param maxCorners: Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned.
-
-    :param qualityLevel: Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see  :ocv:func:`ocl::cornerMinEigenVal` ) or the Harris function response (see  :ocv:func:`ocl::cornerHarris` ). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the  ``qualityLevel=0.01`` , then all the corners with the quality measure less than 15 are rejected.
-
-    :param minDistance: Minimum possible Euclidean distance between the returned corners.
-
-    :param blockSize: Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See  :ocv:func:`cornerEigenValsAndVecs` .
-
-    :param useHarrisDetector: Parameter indicating whether to use a Harris detector (see :ocv:func:`ocl::cornerHarris`) or :ocv:func:`ocl::cornerMinEigenVal`.
-
-    :param harrisK: Free parameter of the Harris detector.
-
-ocl::GoodFeaturesToTrackDetector_OCL::operator ()
--------------------------------------------------------
-Finds the most prominent corners in the image.
-
-.. ocv:function:: void ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat())
-
-    :param image: Input 8-bit, single-channel image.
-
-    :param corners: Output vector of detected corners (it will be one row matrix with CV_32FC2 type).
-
-    :param mask: Optional region of interest. If the image is not empty (it needs to have the type  ``CV_8UC1``  and the same size as  ``image`` ), it  specifies the region in which the corners are detected.
-
-.. seealso:: :ocv:func:`goodFeaturesToTrack`
-
-ocl::GoodFeaturesToTrackDetector_OCL::releaseMemory
---------------------------------------------------------
-Releases inner buffers memory.
-
-.. ocv:function:: void ocl::GoodFeaturesToTrackDetector_OCL::releaseMemory()
-
-ocl::FarnebackOpticalFlow
--------------------------------
-.. ocv:class:: ocl::FarnebackOpticalFlow
-
-Class computing a dense optical flow using the Gunnar Farneback's algorithm. ::
-
-    class CV_EXPORTS FarnebackOpticalFlow
-    {
-    public:
-        FarnebackOpticalFlow();
-
-        int numLevels;
-        double pyrScale;
-        bool fastPyramids;
-        int winSize;
-        int numIters;
-        int polyN;
-        double polySigma;
-        int flags;
-
-        void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
-
-        void releaseMemory();
-
-    private:
-        /* hidden */
-    };
-
-ocl::FarnebackOpticalFlow::operator ()
-------------------------------------------
-Computes a dense optical flow using the Gunnar Farneback's algorithm.
-
-.. ocv:function:: void ocl::FarnebackOpticalFlow::operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
-
-    :param frame0: First 8-bit gray-scale input image
-    :param frame1: Second 8-bit gray-scale input image
-    :param flowx: Flow horizontal component
-    :param flowy: Flow vertical component
-
-.. seealso:: :ocv:func:`calcOpticalFlowFarneback`
-
-ocl::FarnebackOpticalFlow::releaseMemory
---------------------------------------------
-Releases unused auxiliary memory buffers.
-
-.. ocv:function:: void ocl::FarnebackOpticalFlow::releaseMemory()
-
-
-ocl::PyrLKOpticalFlow
--------------------------
-.. ocv:class:: ocl::PyrLKOpticalFlow
-
-Class used for calculating an optical flow. ::
-
-    class PyrLKOpticalFlow
-    {
-    public:
-        PyrLKOpticalFlow();
-
-        void sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts,
-            oclMat& status, oclMat* err = 0);
-
-        void dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err = 0);
-
-        Size winSize;
-        int maxLevel;
-        int iters;
-        double derivLambda;
-        bool useInitialFlow;
-        float minEigThreshold;
-        bool getMinEigenVals;
-
-        void releaseMemory();
-
-    private:
-        /* hidden */
-    };
-
-The class can calculate an optical flow for a sparse feature set or dense optical flow using the iterative Lucas-Kanade method with pyramids.
-
-.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
-
-ocl::PyrLKOpticalFlow::sparse
----------------------------------
-Calculate an optical flow for a sparse feature set.
-
-.. ocv:function:: void ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err = 0)
-
-    :param prevImg: First 8-bit input image (supports both grayscale and color images).
-
-    :param nextImg: Second input image of the same size and the same type as  ``prevImg`` .
-
-    :param prevPts: Vector of 2D points for which the flow needs to be found. It must be one row matrix with CV_32FC2 type.
-
-    :param nextPts: Output vector of 2D points (with single-precision floating-point coordinates) containing the calculated new positions of input features in the second image. When ``useInitialFlow`` is true, the vector must have the same size as in the input.
-
-    :param status: Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the flow for the corresponding features has been found. Otherwise, it is set to 0.
-
-    :param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
-
-.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
-
-
-ocl::PyrLKOpticalFlow::dense
----------------------------------
-Calculate dense optical flow.
-
-.. ocv:function:: void ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err = 0)
-
-    :param prevImg: First 8-bit grayscale input image.
-
-    :param nextImg: Second input image of the same size and the same type as  ``prevImg`` .
-
-    :param u: Horizontal component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
-
-    :param v: Vertical component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
-
-    :param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
-
-
-ocl::PyrLKOpticalFlow::releaseMemory
-----------------------------------------
-Releases inner buffers memory.
-
-.. ocv:function:: void ocl::PyrLKOpticalFlow::releaseMemory()
-
-ocl::interpolateFrames
---------------------------
-Interpolates frames (images) using provided optical flow (displacement field).
-
-.. ocv:function:: void ocl::interpolateFrames(const oclMat& frame0, const oclMat& frame1, const oclMat& fu, const oclMat& fv, const oclMat& bu, const oclMat& bv, float pos, oclMat& newFrame, oclMat& buf)
-
-    :param frame0: First frame (32-bit floating point images, single channel).
-
-    :param frame1: Second frame. Must have the same type and size as ``frame0`` .
-
-    :param fu: Forward horizontal displacement.
-
-    :param fv: Forward vertical displacement.
-
-    :param bu: Backward horizontal displacement.
-
-    :param bv: Backward vertical displacement.
-
-    :param pos: New frame position.
-
-    :param newFrame: Output image.
-
-    :param buf: Temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat: occlusion masks for first frame, occlusion masks for second, interpolated forward horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, interpolated backward vertical flow.
-
-ocl::KalmanFilter
---------------------
-.. ocv:class:: ocl::KalmanFilter
-
-Kalman filter class. ::
-
-    class CV_EXPORTS KalmanFilter
-    {
-    public:
-        KalmanFilter();
-        //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
-        KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
-        //! re-initializes Kalman filter. The previous content is destroyed.
-        void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
-
-        const oclMat& predict(const oclMat& control=oclMat());
-        const oclMat& correct(const oclMat& measurement);
-
-        oclMat statePre; //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
-        oclMat statePost; //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
-        oclMat transitionMatrix; //!< state transition matrix (A)
-        oclMat controlMatrix; //!< control matrix (B) (not used if there is no control)
-        oclMat measurementMatrix; //!< measurement matrix (H)
-        oclMat processNoiseCov; //!< process noise covariance matrix (Q)
-        oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
-        oclMat errorCovPre; //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
-        oclMat gain; //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
-        oclMat errorCovPost; //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
-    private:
-        /* hidden */
-    };
-
-ocl::KalmanFilter::KalmanFilter
-----------------------------------
-The constructors.
-
-.. ocv:function:: ocl::KalmanFilter::KalmanFilter()
-
-.. ocv:function:: ocl::KalmanFilter::KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F)
-
-    The full constructor.
-
-    :param dynamParams: Dimensionality of the state.
-
-    :param measureParams: Dimensionality of the measurement.
-
-    :param controlParams: Dimensionality of the control vector.
-
-    :param type: Type of the created matrices that should be ``CV_32F`` or ``CV_64F``.
-
-
-ocl::KalmanFilter::init
----------------------------
-Re-initializes Kalman filter. The previous content is destroyed.
-
-.. ocv:function:: void ocl::KalmanFilter::init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F)
-
-    :param dynamParams: Dimensionalityensionality of the state.
-
-    :param measureParams: Dimensionality of the measurement.
-
-    :param controlParams: Dimensionality of the control vector.
-
-    :param type: Type of the created matrices that should be ``CV_32F`` or ``CV_64F``.
-
-
-ocl::KalmanFilter::predict
-------------------------------
-Computes a predicted state.
-
-.. ocv:function:: const oclMat& ocl::KalmanFilter::predict(const oclMat& control=oclMat())
-
-    :param control: The optional input control
-
-
-ocl::KalmanFilter::correct
------------------------------
-Updates the predicted state from the measurement.
-
-.. ocv:function:: const oclMat& ocl::KalmanFilter::correct(const oclMat& measurement)
-
-    :param measurement: The measured system parameters
-
-
-ocl::BackgroundSubtractor
-----------------------------
-.. ocv:class:: ocl::BackgroundSubtractor
-
-Base class for background/foreground segmentation. ::
-
-    class CV_EXPORTS BackgroundSubtractor
-    {
-    public:
-        //! the virtual destructor
-        virtual ~BackgroundSubtractor();
-        //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
-        virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
-
-        //! computes a background image
-        virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
-    };
-
-
-The class is only used to define the common interface for the whole family of background/foreground segmentation algorithms.
-
-
-ocl::BackgroundSubtractor::operator()
------------------------------------------
-Computes a foreground mask.
-
-.. ocv:function:: void ocl::BackgroundSubtractor::operator()(const oclMat& image, oclMat& fgmask, float learningRate)
-
-    :param image: Next video frame.
-
-    :param fgmask: The output foreground mask as an 8-bit binary image.
-
-
-ocl::BackgroundSubtractor::getBackgroundImage
--------------------------------------------------
-Computes a background image.
-
-.. ocv:function:: void ocl::BackgroundSubtractor::getBackgroundImage(oclMat& backgroundImage) const
-
-    :param backgroundImage: The output background image.
-
-.. note:: Sometimes the background image can be very blurry, as it contain the average background statistics.
-
-ocl::MOG
-------------
-.. ocv:class:: ocl::MOG : public ocl::BackgroundSubtractor
-
-Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm. ::
-
-    class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
-    {
-    public:
-        //! the default constructor
-        MOG(int nmixtures = -1);
-
-        //! re-initiaization method
-        void initialize(Size frameSize, int frameType);
-
-        //! the update operator
-        void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
-
-        //! computes a background image which are the mean of all background gaussians
-        void getBackgroundImage(oclMat& backgroundImage) const;
-
-        //! releases all inner buffers
-        void release();
-
-        int history;
-        float varThreshold;
-        float backgroundRatio;
-        float noiseSigma;
-
-    private:
-        /* hidden */
-    };
-
-The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2001]_.
-
-.. seealso:: :ocv:class:`BackgroundSubtractorMOG`
-
-
-ocl::MOG::MOG
----------------------
-The constructor.
-
-.. ocv:function:: ocl::MOG::MOG(int nmixtures = -1)
-
-    :param nmixtures: Number of Gaussian mixtures.
-
-Default constructor sets all parameters to default values.
-
-
-ocl::MOG::operator()
-------------------------
-Updates the background model and returns the foreground mask.
-
-.. ocv:function:: void ocl::MOG::operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f)
-
-    :param frame: Next video frame.
-
-    :param fgmask: The output foreground mask as an 8-bit binary image.
-
-
-ocl::MOG::getBackgroundImage
---------------------------------
-Computes a background image.
-
-.. ocv:function:: void ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const
-
-    :param backgroundImage: The output background image.
-
-
-ocl::MOG::release
----------------------
-Releases all inner buffer's memory.
-
-.. ocv:function:: void ocl::MOG::release()
-
-
-ocl::MOG2
--------------
-.. ocv:class:: ocl::MOG2 : public ocl::BackgroundSubtractor
-
-  Gaussian Mixture-based Background/Foreground Segmentation Algorithm.
-
-  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2004]_. ::
-
-    class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
-    {
-    public:
-        //! the default constructor
-        MOG2(int nmixtures = -1);
-
-        //! re-initiaization method
-        void initialize(Size frameSize, int frameType);
-
-        //! the update operator
-        void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
-
-        //! computes a background image which are the mean of all background gaussians
-        void getBackgroundImage(oclMat& backgroundImage) const;
-
-        //! releases all inner buffers
-        void release();
-
-        int history;
-
-        float varThreshold;
-
-        float backgroundRatio;
-
-        float varThresholdGen;
-
-        float fVarInit;
-        float fVarMin;
-        float fVarMax;
-
-        float fCT;
-
-        bool bShadowDetection;
-        unsigned char nShadowDetection;
-        float fTau;
-
-    private:
-        /* hidden */
-    };
-
-  .. ocv:member:: float backgroundRatio
-
-      Threshold defining whether the component is significant enough to be included into the background model. ``cf=0.1 => TB=0.9`` is default. For ``alpha=0.001``, it means that the mode should exist for approximately 105 frames before it is considered foreground.
-
-  .. ocv:member:: float varThreshold
-
-      Threshold for the squared Mahalanobis distance that helps decide when a sample is close to the existing components (corresponds to ``Tg``). If it is not close to any component, a new component is generated. ``3 sigma => Tg=3*3=9`` is default. A smaller ``Tg`` value generates more components. A higher ``Tg`` value may result in a small number of components but they can grow too large.
-
-  .. ocv:member:: float fVarInit
-
-      Initial variance for the newly generated components. It affects the speed of adaptation. The parameter value is based on your estimate of the typical standard deviation from the images. OpenCV uses 15 as a reasonable value.
-
-  .. ocv:member:: float fVarMin
-
-      Parameter used to further control the variance.
-
-  .. ocv:member:: float fVarMax
-
-      Parameter used to further control the variance.
-
-  .. ocv:member:: float fCT
-
-      Complexity reduction parameter. This parameter defines the number of samples needed to accept to prove the component exists. ``CT=0.05`` is a default value for all the samples. By setting ``CT=0`` you get an algorithm very similar to the standard Stauffer&Grimson algorithm.
-
-  .. ocv:member:: uchar nShadowDetection
-
-      The value for marking shadow pixels in the output foreground mask. Default value is 127.
-
-  .. ocv:member:: float fTau
-
-      Shadow threshold. The shadow is detected if the pixel is a darker version of the background. ``Tau`` is a threshold defining how much darker the shadow can be. ``Tau= 0.5`` means that if a pixel is more than twice darker then it is not shadow. See [ShadowDetect2003]_.
-
-  .. ocv:member:: bool bShadowDetection
-
-      Parameter defining whether shadow detection should be enabled.
-
-
-.. seealso:: :ocv:class:`BackgroundSubtractorMOG2`
-
-
-ocl::MOG2::MOG2
------------------------
-The constructor.
-
-.. ocv:function:: ocl::MOG2::MOG2(int nmixtures = -1)
-
-    :param nmixtures: Number of Gaussian mixtures.
-
-Default constructor sets all parameters to default values.
-
-
-ocl::MOG2::operator()
--------------------------
-Updates the background model and returns the foreground mask.
-
-.. ocv:function:: void ocl::MOG2::operator()( const oclMat& frame, oclMat& fgmask, float learningRate=-1.0f)
-
-    :param frame: Next video frame.
-
-    :param fgmask: The output foreground mask as an 8-bit binary image.
-
-
-ocl::MOG2::getBackgroundImage
----------------------------------
-Computes a background image.
-
-.. ocv:function:: void ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
-
-    :param backgroundImage: The output background image.
-
-
-ocl::MOG2::release
-----------------------
-Releases all inner buffer's memory.
-
-.. ocv:function:: void ocl::MOG2::release()
-
-
-.. [ShadowDetect2003] Prati, Mikic, Trivedi and Cucchiarra. *Detecting Moving Shadows...*. IEEE PAMI, 2003
diff --git a/modules/ocl/include/opencv2/ocl.hpp b/modules/ocl/include/opencv2/ocl.hpp
deleted file mode 100644
index 190b0a3..0000000
--- a/modules/ocl/include/opencv2/ocl.hpp
+++ /dev/null
@@ -1,2230 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-
-#ifndef __OPENCV_OCL_HPP__
-#define __OPENCV_OCL_HPP__
-
-#include <memory>
-#include <vector>
-
-#include "opencv2/core.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/objdetect.hpp"
-#include "opencv2/ml.hpp"
-
-namespace cv
-{
-    namespace ocl
-    {
-        enum DeviceType
-        {
-            CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
-            CVCL_DEVICE_TYPE_CPU         = (1 << 1),
-            CVCL_DEVICE_TYPE_GPU         = (1 << 2),
-            CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
-            //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
-            CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
-        };
-
-        enum DevMemRW
-        {
-            DEVICE_MEM_R_W = 0,
-            DEVICE_MEM_R_ONLY,
-            DEVICE_MEM_W_ONLY
-        };
-
-        enum DevMemType
-        {
-            DEVICE_MEM_DEFAULT = 0,
-            DEVICE_MEM_AHP,         //alloc host pointer
-            DEVICE_MEM_UHP,         //use host pointer
-            DEVICE_MEM_CHP,         //copy host pointer
-            DEVICE_MEM_PM           //persistent memory
-        };
-
-        // these classes contain OpenCL runtime information
-
-        struct PlatformInfo;
-
-        struct DeviceInfo
-        {
-        public:
-            int _id; // reserved, don't use it
-
-            DeviceType deviceType;
-            std::string deviceProfile;
-            std::string deviceVersion;
-            std::string deviceName;
-            std::string deviceVendor;
-            int deviceVendorId;
-            std::string deviceDriverVersion;
-            std::string deviceExtensions;
-
-            size_t maxWorkGroupSize;
-            std::vector<size_t> maxWorkItemSizes;
-            int maxComputeUnits;
-            size_t localMemorySize;
-            size_t maxMemAllocSize;
-
-            int deviceVersionMajor;
-            int deviceVersionMinor;
-
-            bool haveDoubleSupport;
-            bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
-            bool isIntelDevice;
-
-            std::string compilationExtraOptions;
-
-            const PlatformInfo* platform;
-
-            DeviceInfo();
-            ~DeviceInfo();
-        };
-
-        struct PlatformInfo
-        {
-            int _id; // reserved, don't use it
-
-            std::string platformProfile;
-            std::string platformVersion;
-            std::string platformName;
-            std::string platformVendor;
-            std::string platformExtensons;
-
-            int platformVersionMajor;
-            int platformVersionMinor;
-
-            std::vector<const DeviceInfo*> devices;
-
-            PlatformInfo();
-            ~PlatformInfo();
-        };
-
-        //////////////////////////////// Initialization & Info ////////////////////////
-        typedef std::vector<const PlatformInfo*> PlatformsInfo;
-
-        CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
-
-        typedef std::vector<const DeviceInfo*> DevicesInfo;
-
-        CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
-                const PlatformInfo* platform = NULL);
-
-        // set device you want to use
-        CV_EXPORTS void setDevice(const DeviceInfo* info);
-
-        // Initialize from OpenCL handles directly.
-        // Argument types is (pointers): cl_platform_id*, cl_context*, cl_device_id*
-        CV_EXPORTS void initializeContext(void* pClPlatform, void* pClContext, void* pClDevice);
-
-        enum FEATURE_TYPE
-        {
-            FEATURE_CL_DOUBLE = 1,
-            FEATURE_CL_UNIFIED_MEM,
-            FEATURE_CL_VER_1_2,
-            FEATURE_CL_INTEL_DEVICE
-        };
-
-        // Represents OpenCL context, interface
-        class CV_EXPORTS Context
-        {
-        protected:
-            Context() { }
-            ~Context() { }
-        public:
-            static Context *getContext();
-
-            bool supportsFeature(FEATURE_TYPE featureType) const;
-            const DeviceInfo& getDeviceInfo() const;
-
-            const void* getOpenCLContextPtr() const;
-            const void* getOpenCLCommandQueuePtr() const;
-            const void* getOpenCLDeviceIDPtr() const;
-        };
-
-        inline const void *getClContextPtr()
-        {
-            return Context::getContext()->getOpenCLContextPtr();
-        }
-
-        inline const void *getClCommandQueuePtr()
-        {
-            return Context::getContext()->getOpenCLCommandQueuePtr();
-        }
-
-        CV_EXPORTS bool supportsFeature(FEATURE_TYPE featureType);
-
-        CV_EXPORTS void finish();
-
-        enum BINARY_CACHE_MODE
-        {
-            CACHE_NONE    = 0,        // do not cache OpenCL binary
-            CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode
-            CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode
-            CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE // cache opencl binary
-        };
-        //! Enable or disable OpenCL program binary caching onto local disk
-        // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
-        // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
-        // binary file, which will be reused when the OpenCV executable is started again.
-        //
-        // This feature is enabled by default.
-        CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
-
-        //! set where binary cache to be saved to
-        CV_EXPORTS void setBinaryPath(const char *path);
-
-        struct ProgramSource
-        {
-            const char* name;
-            const char* programStr;
-            const char* programHash;
-
-            // Cache in memory by name (should be unique). Caching on disk disabled.
-            inline ProgramSource(const char* _name, const char* _programStr)
-                : name(_name), programStr(_programStr), programHash(NULL)
-            {
-            }
-
-            // Cache in memory by name (should be unique). Caching on disk uses programHash mark.
-            inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash)
-                : name(_name), programStr(_programStr), programHash(_programHash)
-            {
-            }
-        };
-
-        //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
-        //! Deprecated, will be replaced
-        CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt,
-                const cv::ocl::ProgramSource& source, String kernelName,
-                size_t globalThreads[3], size_t localThreads[3],
-                std::vector< std::pair<size_t, const void *> > &args,
-                int channels, int depth, const char *build_options);
-
-        class CV_EXPORTS oclMatExpr;
-        //////////////////////////////// oclMat ////////////////////////////////
-        class CV_EXPORTS oclMat
-        {
-        public:
-            //! default constructor
-            oclMat();
-            //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
-            oclMat(int rows, int cols, int type);
-            oclMat(Size size, int type);
-            //! constucts oclMatrix and fills it with the specified value _s.
-            oclMat(int rows, int cols, int type, const Scalar &s);
-            oclMat(Size size, int type, const Scalar &s);
-            //! copy constructor
-            oclMat(const oclMat &m);
-
-            //! constructor for oclMatrix headers pointing to user-allocated data
-            oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
-            oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
-
-            //! creates a matrix header for a part of the bigger matrix
-            oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
-            oclMat(const oclMat &m, const Rect &roi);
-
-            //! builds oclMat from Mat. Perfom blocking upload to device.
-            explicit oclMat (const Mat &m);
-
-            //! destructor - calls release()
-            ~oclMat();
-
-            //! assignment operators
-            oclMat &operator = (const oclMat &m);
-            //! assignment operator. Perfom blocking upload to device.
-            oclMat &operator = (const Mat &m);
-            oclMat &operator = (const oclMatExpr& expr);
-
-            //! pefroms blocking upload data to oclMat.
-            void upload(const cv::Mat &m);
-
-
-            //! downloads data from device to host memory. Blocking calls.
-            operator Mat() const;
-            void download(cv::Mat &m) const;
-
-            //! convert to _InputArray
-            operator _InputArray();
-
-            //! convert to _OutputArray
-            operator _OutputArray();
-
-            //! returns a new oclMatrix header for the specified row
-            oclMat row(int y) const;
-            //! returns a new oclMatrix header for the specified column
-            oclMat col(int x) const;
-            //! ... for the specified row span
-            oclMat rowRange(int startrow, int endrow) const;
-            oclMat rowRange(const Range &r) const;
-            //! ... for the specified column span
-            oclMat colRange(int startcol, int endcol) const;
-            oclMat colRange(const Range &r) const;
-
-            //! returns deep copy of the oclMatrix, i.e. the data is copied
-            oclMat clone() const;
-
-            //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
-            // It calls m.create(this->size(), this->type()).
-            // It supports any data type
-            void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
-
-            //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
-            void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
-
-            void assignTo( oclMat &m, int type = -1 ) const;
-
-            //! sets every oclMatrix element to s
-            oclMat& operator = (const Scalar &s);
-            //! sets some of the oclMatrix elements to s, according to the mask
-            oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
-            //! creates alternative oclMatrix header for the same data, with different
-            // number of channels and/or different number of rows. see cvReshape.
-            oclMat reshape(int cn, int rows = 0) const;
-
-            //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
-            // previous data is unreferenced if needed.
-            void create(int rows, int cols, int type);
-            void create(Size size, int type);
-
-            //! allocates new oclMatrix with specified device memory type.
-            void createEx(int rows, int cols, int type,
-                          DevMemRW rw_type, DevMemType mem_type);
-            void createEx(Size size, int type, DevMemRW rw_type,
-                          DevMemType mem_type);
-
-            //! decreases reference counter;
-            // deallocate the data when reference counter reaches 0.
-            void release();
-
-            //! swaps with other smart pointer
-            void swap(oclMat &mat);
-
-            //! locates oclMatrix header within a parent oclMatrix. See below
-            void locateROI( Size &wholeSize, Point &ofs ) const;
-            //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
-            oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
-            //! extracts a rectangular sub-oclMatrix
-            // (this is a generalized form of row, rowRange etc.)
-            oclMat operator()( Range rowRange, Range colRange ) const;
-            oclMat operator()( const Rect &roi ) const;
-
-            oclMat& operator+=( const oclMat& m );
-            oclMat& operator-=( const oclMat& m );
-            oclMat& operator*=( const oclMat& m );
-            oclMat& operator/=( const oclMat& m );
-
-            //! returns true if the oclMatrix data is continuous
-            // (i.e. when there are no gaps between successive rows).
-            // similar to CV_IS_oclMat_CONT(cvoclMat->type)
-            bool isContinuous() const;
-            //! returns element size in bytes,
-            // similar to CV_ELEM_SIZE(cvMat->type)
-            size_t elemSize() const;
-            //! returns the size of element channel in bytes.
-            size_t elemSize1() const;
-            //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
-            int type() const;
-            //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
-            //! 3 channels element actually use 4 channel space
-            int ocltype() const;
-            //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
-            int depth() const;
-            //! returns element type, similar to CV_MAT_CN(cvMat->type)
-            int channels() const;
-            //! returns element type, return 4 for 3 channels element,
-            //!becuase 3 channels element actually use 4 channel space
-            int oclchannels() const;
-            //! returns step/elemSize1()
-            size_t step1() const;
-            //! returns oclMatrix size:
-            // width == number of columns, height == number of rows
-            Size size() const;
-            //! returns true if oclMatrix data is NULL
-            bool empty() const;
-
-            //! matrix transposition
-            oclMat t() const;
-
-            /*! includes several bit-fields:
-              - the magic signature
-              - continuity flag
-              - depth
-              - number of channels
-              */
-            int flags;
-            //! the number of rows and columns
-            int rows, cols;
-            //! a distance between successive rows in bytes; includes the gap if any
-            size_t step;
-            //! pointer to the data(OCL memory object)
-            uchar *data;
-
-            //! pointer to the reference counter;
-            // when oclMatrix points to user-allocated data, the pointer is NULL
-            int *refcount;
-
-            //! helper fields used in locateROI and adjustROI
-            //datastart and dataend are not used in current version
-            uchar *datastart;
-            uchar *dataend;
-
-            //! OpenCL context associated with the oclMat object.
-            Context *clCxt; // TODO clCtx
-            //add offset for handle ROI, calculated in byte
-            int offset;
-            //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
-            int wholerows;
-            int wholecols;
-        };
-
-        // convert InputArray/OutputArray to oclMat references
-        CV_EXPORTS oclMat& getOclMatRef(InputArray src);
-        CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
-
-        ///////////////////// mat split and merge /////////////////////////////////
-        //! Compose a multi-channel array from several single-channel arrays
-        // Support all types
-        CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
-        CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
-
-        //! Divides multi-channel array into several single-channel arrays
-        // Support all types
-        CV_EXPORTS void split(const oclMat &src, oclMat *dst);
-        CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
-
-        ////////////////////////////// Arithmetics ///////////////////////////////////
-
-        //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama)
-        // supports all data types
-        CV_EXPORTS void addWeighted(const oclMat &src1, double  alpha, const oclMat &src2, double beta, double gama, oclMat &dst);
-
-        //! adds one matrix to another (dst = src1 + src2)
-        // supports all data types
-        CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
-        //! adds scalar to a matrix (dst = src1 + s)
-        // supports all data types
-        CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
-
-        //! subtracts one matrix from another (dst = src1 - src2)
-        // supports all data types
-        CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
-        //! subtracts scalar from a matrix (dst = src1 - s)
-        // supports all data types
-        CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
-
-        //! computes element-wise product of the two arrays (dst = src1 * scale * src2)
-        // supports all data types
-        CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
-        //! multiplies matrix to a number (dst = scalar * src)
-        // supports all data types
-        CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
-
-        //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2)
-        // supports all data types
-        CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
-        //! computes element-wise quotient of the two arrays (dst = scale / src)
-        // supports all data types
-        CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst);
-
-        //! computes element-wise minimum of the two arrays (dst = min(src1, src2))
-        // supports all data types
-        CV_EXPORTS void min(const oclMat &src1, const oclMat &src2, oclMat &dst);
-
-        //! computes element-wise maximum of the two arrays (dst = max(src1, src2))
-        // supports all data types
-        CV_EXPORTS void max(const oclMat &src1, const oclMat &src2, oclMat &dst);
-
-        //! compares elements of two arrays (dst = src1 <cmpop> src2)
-        // supports all data types
-        CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop);
-
-        //! transposes the matrix
-        // supports all data types
-        CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
-
-        //! computes element-wise absolute values of an array (dst = abs(src))
-        // supports all data types
-        CV_EXPORTS void abs(const oclMat &src, oclMat &dst);
-
-        //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2))
-        // supports all data types
-        CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst);
-        //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s))
-        // supports all data types
-        CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst);
-
-        //! computes mean value and standard deviation of all or selected array elements
-        // supports all data types
-        CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
-
-        //! computes norm of array
-        // supports NORM_INF, NORM_L1, NORM_L2
-        // supports all data types
-        CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
-
-        //! computes norm of the difference between two arrays
-        // supports NORM_INF, NORM_L1, NORM_L2
-        // supports all data types
-        CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
-
-        //! reverses the order of the rows, columns or both in a matrix
-        // supports all types
-        CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode);
-
-        //! computes sum of array elements
-        // support all types
-        CV_EXPORTS Scalar sum(const oclMat &m);
-        CV_EXPORTS Scalar absSum(const oclMat &m);
-        CV_EXPORTS Scalar sqrSum(const oclMat &m);
-
-        //! finds global minimum and maximum array elements and returns their values
-        // support all C1 types
-        CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
-
-        //! finds global minimum and maximum array elements and returns their values with locations
-        // support all C1 types
-        CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
-                                  const oclMat &mask = oclMat());
-
-        //! counts non-zero array elements
-        // support all types
-        CV_EXPORTS int countNonZero(const oclMat &src);
-
-        //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
-        // destination array will have the depth type as lut and the same channels number as source
-        //It supports 8UC1 8UC4 only
-        CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
-
-        //! only 8UC1 and 256 bins is supported now
-        CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
-        //! only 8UC1 and 256 bins is supported now
-        CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
-
-        //! only 8UC1 is supported now
-        CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
-
-        //! bilateralFilter
-        // supports 8UC1 8UC4
-        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
-
-        //! Applies an adaptive bilateral filter to the input image
-        //  Unlike the usual bilateral filter that uses fixed value for sigmaColor,
-        //  the adaptive version calculates the local variance in he ksize neighborhood
-        //  and use this as sigmaColor, for the value filtering. However, the local standard deviation is
-        //  clamped to the maxSigmaColor.
-        //  supports 8UC1, 8UC3
-        CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor=20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
-
-        //! computes exponent of each matrix element (dst = e**src)
-        // supports only CV_32FC1, CV_64FC1 type
-        CV_EXPORTS void exp(const oclMat &src, oclMat &dst);
-
-        //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src))
-        // supports only CV_32FC1, CV_64FC1 type
-        CV_EXPORTS void log(const oclMat &src, oclMat &dst);
-
-        //! computes square root of each matrix element
-        // supports only CV_32FC1, CV_64FC1 type
-        CV_EXPORTS void sqrt(const oclMat &src, oclMat &dst);
-
-        //! computes magnitude of each (x(i), y(i)) vector
-        // supports only CV_32F, CV_64F type
-        CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
-
-        //! computes angle (angle(i)) of each (x(i), y(i)) vector
-        // supports only CV_32F, CV_64F type
-        CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
-
-        //! the function raises every element of tne input array to p
-        // support only CV_32F, CV_64F type
-        CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
-
-        //! converts Cartesian coordinates to polar
-        // supports only CV_32F CV_64F type
-        CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
-
-        //! converts polar coordinates to Cartesian
-        // supports only CV_32F CV_64F type
-        CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
-
-        //! perfroms per-elements bit-wise inversion
-        // supports all types
-        CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
-
-        //! calculates per-element bit-wise disjunction of two arrays
-        // supports all types
-        CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
-        CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
-
-        //! calculates per-element bit-wise conjunction of two arrays
-        // supports all types
-        CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
-        CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
-
-        //! calculates per-element bit-wise "exclusive or" operation
-        // supports all types
-        CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
-        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
-
-        //! Logical operators
-        CV_EXPORTS oclMat operator ~ (const oclMat &);
-        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
-        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
-        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
-
-
-        //! Mathematics operators
-        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
-        CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
-        CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
-        CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
-
-        struct CV_EXPORTS ConvolveBuf
-        {
-            Size result_size;
-            Size block_size;
-            Size user_block_size;
-            Size dft_size;
-
-            oclMat image_spect, templ_spect, result_spect;
-            oclMat image_block, templ_block, result_data;
-
-            void create(Size image_size, Size templ_size);
-            static Size estimateBlockSize(Size result_size, Size templ_size);
-        };
-
-        //! computes convolution of two images, may use discrete Fourier transform
-        // support only CV_32FC1 type
-        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
-        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
-
-        //! Performs a per-element multiplication of two Fourier spectrums.
-        //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
-        //! support only CV_32FC2 type
-        CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
-
-        CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0);
-
-        //! initializes a scaled identity matrix
-        CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
-
-        //! fills the output array with repeated copies of the input array
-        CV_EXPORTS void repeat(const oclMat & src, int ny, int nx, oclMat & dst);
-
-        //////////////////////////////// Filter Engine ////////////////////////////////
-
-        /*!
-          The Base Class for 1D or Row-wise Filters
-
-          This is the base class for linear or non-linear filters that process 1D data.
-          In particular, such filters are used for the "horizontal" filtering parts in separable filters.
-          */
-        class CV_EXPORTS BaseRowFilter_GPU
-        {
-        public:
-            BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
-            virtual ~BaseRowFilter_GPU() {}
-            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
-            int ksize, anchor, bordertype;
-        };
-
-        /*!
-          The Base Class for Column-wise Filters
-
-          This is the base class for linear or non-linear filters that process columns of 2D arrays.
-          Such filters are used for the "vertical" filtering parts in separable filters.
-          */
-        class CV_EXPORTS BaseColumnFilter_GPU
-        {
-        public:
-            BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
-            virtual ~BaseColumnFilter_GPU() {}
-            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
-            int ksize, anchor, bordertype;
-        };
-
-        /*!
-          The Base Class for Non-Separable 2D Filters.
-
-          This is the base class for linear or non-linear 2D filters.
-          */
-        class CV_EXPORTS BaseFilter_GPU
-        {
-        public:
-            BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
-                : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
-            virtual ~BaseFilter_GPU() {}
-            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
-            Size ksize;
-            Point anchor;
-            int borderType;
-        };
-
-        /*!
-          The Base Class for Filter Engine.
-
-          The class can be used to apply an arbitrary filtering operation to an image.
-          It contains all the necessary intermediate buffers.
-          */
-        class CV_EXPORTS FilterEngine_GPU
-        {
-        public:
-            virtual ~FilterEngine_GPU() {}
-
-            virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
-        };
-
-        //! returns the non-separable filter engine with the specified filter
-        CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
-
-        //! returns the primitive row filter with the specified kernel
-        CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
-                int anchor = -1, int bordertype = BORDER_DEFAULT);
-
-        //! returns the primitive column filter with the specified kernel
-        CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
-                int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
-
-        //! returns the separable linear filter engine
-        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
-                const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
-
-        //! returns the separable filter engine with the specified filters
-        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
-                const Ptr<BaseColumnFilter_GPU> &columnFilter);
-
-        //! returns the Gaussian filter engine
-        CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
-
-        //! returns filter engine for the generalized Sobel operator
-        CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) );
-
-        //! applies Laplacian operator to the image
-        // supports only ksize = 1 and ksize = 3
-        CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1,
-                double delta=0, int borderType=BORDER_DEFAULT);
-
-        //! returns 2D box filter
-        // dst type must be the same as source type
-        CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
-                const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
-        //! returns box filter engine
-        CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
-                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
-        //! returns 2D filter with the specified kernel
-        // supports: dst type must be the same as source type
-        CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
-                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
-        //! returns the non-separable linear filter engine
-        // supports: dst type must be the same as source type
-        CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
-                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
-        //! smooths the image using the normalized box filter
-        CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
-                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
-        //! returns 2D morphological filter
-        //! only MORPH_ERODE and MORPH_DILATE are supported
-        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
-        // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
-        CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
-                Point anchor = Point(-1, -1));
-
-        //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
-        CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
-                const Point &anchor = Point(-1, -1), int iterations = 1);
-
-        //! a synonym for normalized box filter
-        static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
-                                int borderType = BORDER_CONSTANT)
-        {
-            boxFilter(src, dst, -1, ksize, anchor, borderType);
-        }
-
-        //! applies non-separable 2D linear filter to the image
-        CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
-                                 Point anchor = Point(-1, -1), double delta = 0.0, int borderType = BORDER_DEFAULT);
-
-        //! applies separable 2D linear filter to the image
-        CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
-                                    Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
-
-        //! applies generalized Sobel operator to the image
-        // dst.type must equalize src.type
-        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
-        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
-        CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
-
-        //! applies the vertical or horizontal Scharr operator to the image
-        // dst.type must equalize src.type
-        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
-        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
-        CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
-
-        //! smooths the image using Gaussian filter.
-        // dst.type must equalize src.type
-        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
-        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
-        CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
-
-        //! erodes the image (applies the local minimum operator)
-        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
-        CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
-
-                               int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
-
-
-        //! dilates the image (applies the local maximum operator)
-        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
-        CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
-
-                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
-
-
-        //! applies an advanced morphological operation to the image
-        CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
-
-                                      int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
-
-
-        ////////////////////////////// Image processing //////////////////////////////
-        //! Does mean shift filtering on GPU.
-        CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
-                                           TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
-
-        //! Does mean shift procedure on GPU.
-        CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
-                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
-
-        //! Does mean shift segmentation with elimiation of small regions.
-        CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
-                                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
-
-        //! applies fixed threshold to the image.
-        // supports CV_8UC1 and CV_32FC1 data type
-        // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
-        CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
-
-        //! resizes the image
-        // Supports INTER_NEAREST, INTER_LINEAR
-        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
-        CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
-
-        //! Applies a generic geometrical transformation to an image.
-
-        // Supports INTER_NEAREST, INTER_LINEAR.
-        // Map1 supports CV_16SC2, CV_32FC2  types.
-        // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
-        CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
-
-        //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
-        // supports CV_8UC1, CV_8UC4, CV_32SC1 types
-        CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
-
-        //! Smoothes image using median filter
-        // The source 1- or 4-channel image. m should be 3 or 5, the image depth should be CV_8U or CV_32F.
-        CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
-
-        //! warps the image using affine transformation
-        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
-        CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
-
-        //! warps the image using perspective transformation
-        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
-        CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
-
-        //! computes the integral image and integral for the squared image
-        // sum will support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
-        // supports only CV_8UC1 source type
-        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1 );
-        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, int sdepth=-1 );
-        CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
-        CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
-            int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
-        CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
-        CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
-            int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
-
-
-        /////////////////////////////////// ML ///////////////////////////////////////////
-
-        //! Compute closest centers for each lines in source and lable it after center's index
-        // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
-        // supports NORM_L1 and NORM_L2 distType
-        // if indices is provided, only the indexed rows will be calculated and their results are in the same
-        // order of indices
-        CV_EXPORTS void distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR);
-
-        //!Does k-means procedure on GPU
-        // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
-        CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
-                                     TermCriteria criteria, int attemps, int flags, oclMat &centers);
-
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
-        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        class CV_EXPORTS OclCascadeClassifier : public  cv::CascadeClassifier
-        {
-        public:
-            void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
-                double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
-                Size minSize = Size(), Size maxSize = Size());
-        };
-
-        /////////////////////////////// Pyramid /////////////////////////////////////
-        CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
-
-        //! upsamples the source image and then smoothes it
-        CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
-
-        //! performs linear blending of two images
-        //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
-        // supports only CV_8UC1 source type
-        CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
-
-        //! computes vertical sum, supports only CV_32FC1 images
-        CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
-
-        ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
-        struct CV_EXPORTS MatchTemplateBuf
-        {
-            Size user_block_size;
-            oclMat imagef, templf;
-            std::vector<oclMat> images;
-            std::vector<oclMat> image_sums;
-            std::vector<oclMat> image_sqsums;
-        };
-
-        //! computes the proximity map for the raster template and the image where the template is searched for
-        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
-        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
-        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
-
-        //! computes the proximity map for the raster template and the image where the template is searched for
-        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
-        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
-        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
-
-
-
-        ///////////////////////////////////////////// Canny /////////////////////////////////////////////
-        struct CV_EXPORTS CannyBuf;
-
-        //! compute edges of the input image using Canny operator
-        // Support CV_8UC1 only
-        CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-        CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
-        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
-
-        struct CV_EXPORTS CannyBuf
-        {
-            CannyBuf() : counter(1, 1, CV_32S) { }
-            ~CannyBuf()
-            {
-                release();
-            }
-            explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(1, 1, CV_32S)
-            {
-                create(image_size, apperture_size);
-            }
-            CannyBuf(const oclMat &dx_, const oclMat &dy_);
-            void create(const Size &image_size, int apperture_size = 3);
-            void release();
-
-            oclMat dx, dy;
-            oclMat dx_buf, dy_buf;
-            oclMat magBuf, mapBuf;
-            oclMat trackBuf1, trackBuf2;
-            oclMat counter;
-            Ptr<FilterEngine_GPU> filterDX, filterDY;
-        };
-
-        ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
-        //! HoughCircles
-        struct HoughCirclesBuf
-        {
-            oclMat edges;
-            oclMat accum;
-            oclMat srcPoints;
-            oclMat centers;
-            CannyBuf cannyBuf;
-        };
-
-        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
-        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
-        CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
-
-
-        ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
-        //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
-        //! Param dft_size is the size of DFT transform.
-        //!
-        //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
-        // support src type of CV32FC1, CV32FC2
-        // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
-        // dft_size is the size of original input, which is used for transformation from complex to real.
-        // dft_size must be powers of 2, 3 and 5
-        // real to complex dft requires at least v1.8 clAmdFft
-        // real to complex dft output is not the same with cpu version
-        // real to complex and complex to real does not support DFT_ROWS
-        CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0);
-
-        //! implements generalized matrix product algorithm GEMM from BLAS
-        // The functionality requires clAmdBlas library
-        // only support type CV_32FC1
-        // flag GEMM_3_T is not supported
-        CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
-                             const oclMat &src3, double beta, oclMat &dst, int flags = 0);
-
-        //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
-
-        struct CV_EXPORTS HOGDescriptor
-
-        {
-
-            enum { DEFAULT_WIN_SIGMA = -1 };
-
-            enum { DEFAULT_NLEVELS = 64 };
-
-            enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
-
-
-
-            HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
-
-                          Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
-
-                          int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
-
-                          double threshold_L2hys = 0.2, bool gamma_correction = true,
-
-                          int nlevels = DEFAULT_NLEVELS);
-
-
-
-            size_t getDescriptorSize() const;
-
-            size_t getBlockHistogramSize() const;
-
-
-
-            void setSVMDetector(const std::vector<float> &detector);
-
-
-
-            static std::vector<float> getDefaultPeopleDetector();
-
-            static std::vector<float> getPeopleDetector48x96();
-
-            static std::vector<float> getPeopleDetector64x128();
-
-
-
-            void detect(const oclMat &img, std::vector<Point> &found_locations,
-
-                        double hit_threshold = 0, Size win_stride = Size(),
-
-                        Size padding = Size());
-
-
-
-            void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
-
-                                  double hit_threshold = 0, Size win_stride = Size(),
-
-                                  Size padding = Size(), double scale0 = 1.05,
-
-                                  int group_threshold = 2);
-
-
-
-            void getDescriptors(const oclMat &img, Size win_stride,
-
-                                oclMat &descriptors,
-
-                                int descr_format = DESCR_FORMAT_COL_BY_COL);
-
-
-
-            Size win_size;
-
-            Size block_size;
-
-            Size block_stride;
-
-            Size cell_size;
-
-            int nbins;
-
-            double win_sigma;
-
-            double threshold_L2hys;
-
-            bool gamma_correction;
-
-            int nlevels;
-
-
-
-        protected:
-
-            // initialize buffers; only need to do once in case of multiscale detection
-
-            void init_buffer(const oclMat &img, Size win_stride);
-
-
-
-            void computeBlockHistograms(const oclMat &img);
-
-            void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
-
-
-
-            double getWinSigma() const;
-
-            bool checkDetectorSize() const;
-
-
-
-            static int numPartsWithin(int size, int part_size, int stride);
-
-            static Size numPartsWithin(Size size, Size part_size, Size stride);
-
-
-
-            // Coefficients of the separating plane
-
-            float free_coef;
-
-            oclMat detector;
-
-
-
-            // Results of the last classification step
-
-            oclMat labels;
-
-            Mat labels_host;
-
-
-
-            // Results of the last histogram evaluation step
-
-            oclMat block_hists;
-
-
-
-            // Gradients conputation results
-
-            oclMat grad, qangle;
-
-
-
-            // scaled image
-
-            oclMat image_scale;
-
-
-
-            // effect size of input image (might be different from original size after scaling)
-
-            Size effect_size;
-
-        };
-
-
-        ////////////////////////feature2d_ocl/////////////////
-        /****************************************************************************************\
-        *                                      Distance                                          *
-        \****************************************************************************************/
-        template<typename T>
-        struct CV_EXPORTS Accumulator
-        {
-            typedef T Type;
-        };
-        template<> struct Accumulator<unsigned char>
-        {
-            typedef float Type;
-        };
-        template<> struct Accumulator<unsigned short>
-        {
-            typedef float Type;
-        };
-        template<> struct Accumulator<char>
-        {
-            typedef float Type;
-        };
-        template<> struct Accumulator<short>
-        {
-            typedef float Type;
-        };
-
-        /*
-         * Manhattan distance (city block distance) functor
-         */
-        template<class T>
-        struct CV_EXPORTS L1
-        {
-            enum { normType = NORM_L1 };
-            typedef T ValueType;
-            typedef typename Accumulator<T>::Type ResultType;
-
-            ResultType operator()( const T *a, const T *b, int size ) const
-            {
-                return normL1<ValueType, ResultType>(a, b, size);
-            }
-        };
-
-        /*
-         * Euclidean distance functor
-         */
-        template<class T>
-        struct CV_EXPORTS L2
-        {
-            enum { normType = NORM_L2 };
-            typedef T ValueType;
-            typedef typename Accumulator<T>::Type ResultType;
-
-            ResultType operator()( const T *a, const T *b, int size ) const
-            {
-                return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
-            }
-        };
-
-        /*
-         * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
-         * bit count of A exclusive XOR'ed with B
-         */
-        struct CV_EXPORTS Hamming
-        {
-            enum { normType = NORM_HAMMING };
-            typedef unsigned char ValueType;
-            typedef int ResultType;
-
-            /** this will count the bits in a ^ b
-             */
-            ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
-            {
-                return normHamming(a, b, size);
-            }
-        };
-
-        ////////////////////////////////// BruteForceMatcher //////////////////////////////////
-
-        class CV_EXPORTS BruteForceMatcher_OCL_base
-        {
-        public:
-            enum DistType {L1Dist = 0, L2Dist, HammingDist};
-            explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
-
-            // Add descriptors to train descriptor collection
-            void add(const std::vector<oclMat> &descCollection);
-
-            // Get train descriptors collection
-            const std::vector<oclMat> &getTrainDescriptors() const;
-
-            // Clear train descriptors collection
-            void clear();
-
-            // Return true if there are not train descriptors in collection
-            bool empty() const;
-
-            // Return true if the matcher supports mask in match methods
-            bool isMaskSupported() const;
-
-            // Find one best match for each query descriptor
-            void matchSingle(const oclMat &query, const oclMat &train,
-                             oclMat &trainIdx, oclMat &distance,
-                             const oclMat &mask = oclMat());
-
-            // Download trainIdx and distance and convert it to CPU vector with DMatch
-            static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
-            // Convert trainIdx and distance to vector with DMatch
-            static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
-
-            // Find one best match for each query descriptor
-            void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
-
-            // Make gpu collection of trains and masks in suitable format for matchCollection function
-            void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
-
-            // Find one best match from train collection for each query descriptor
-            void matchCollection(const oclMat &query, const oclMat &trainCollection,
-                                 oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-                                 const oclMat &masks = oclMat());
-
-            // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
-            static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
-            // Convert trainIdx, imgIdx and distance to vector with DMatch
-            static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
-
-            // Find one best match from train collection for each query descriptor.
-            void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
-
-            // Find k best matches for each query descriptor (in increasing order of distances)
-            void knnMatchSingle(const oclMat &query, const oclMat &train,
-                                oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
-                                const oclMat &mask = oclMat());
-
-            // Download trainIdx and distance and convert it to vector with DMatch
-            // compactResult is used when mask is not empty. If compactResult is false matches
-            // vector will have the same size as queryDescriptors rows. If compactResult is true
-            // matches vector will not contain matches for fully masked out query descriptors.
-            static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
-                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-            // Convert trainIdx and distance to vector with DMatch
-            static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
-                                        std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-            // Find k best matches for each query descriptor (in increasing order of distances).
-            // compactResult is used when mask is not empty. If compactResult is false matches
-            // vector will have the same size as queryDescriptors rows. If compactResult is true
-            // matches vector will not contain matches for fully masked out query descriptors.
-            void knnMatch(const oclMat &query, const oclMat &train,
-                          std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
-                          bool compactResult = false);
-
-            // Find k best matches from train collection for each query descriptor (in increasing order of distances)
-            void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
-                                     oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-                                     const oclMat &maskCollection = oclMat());
-
-            // Download trainIdx and distance and convert it to vector with DMatch
-            // compactResult is used when mask is not empty. If compactResult is false matches
-            // vector will have the same size as queryDescriptors rows. If compactResult is true
-            // matches vector will not contain matches for fully masked out query descriptors.
-            static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
-                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-            // Convert trainIdx and distance to vector with DMatch
-            static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
-                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-            // Find k best matches  for each query descriptor (in increasing order of distances).
-            // compactResult is used when mask is not empty. If compactResult is false matches
-            // vector will have the same size as queryDescriptors rows. If compactResult is true
-            // matches vector will not contain matches for fully masked out query descriptors.
-            void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
-                          const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
-
-            // Find best matches for each query descriptor which have distance less than maxDistance.
-            // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
-            // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
-            // because it didn't have enough memory.
-            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
-            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-            // Matches doesn't sorted.
-            void radiusMatchSingle(const oclMat &query, const oclMat &train,
-                                   oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
-                                   const oclMat &mask = oclMat());
-
-            // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
-            // matches will be sorted in increasing order of distances.
-            // compactResult is used when mask is not empty. If compactResult is false matches
-            // vector will have the same size as queryDescriptors rows. If compactResult is true
-            // matches vector will not contain matches for fully masked out query descriptors.
-            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
-                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-            // Convert trainIdx, nMatches and distance to vector with DMatch.
-            static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
-                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-            // Find best matches for each query descriptor which have distance less than maxDistance
-            // in increasing order of distances).
-            void radiusMatch(const oclMat &query, const oclMat &train,
-                             std::vector< std::vector<DMatch> > &matches, float maxDistance,
-                             const oclMat &mask = oclMat(), bool compactResult = false);
-
-            // Find best matches for each query descriptor which have distance less than maxDistance.
-            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
-            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-            // Matches doesn't sorted.
-            void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
-                                       const std::vector<oclMat> &masks = std::vector<oclMat>());
-
-            // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
-            // matches will be sorted in increasing order of distances.
-            // compactResult is used when mask is not empty. If compactResult is false matches
-            // vector will have the same size as queryDescriptors rows. If compactResult is true
-            // matches vector will not contain matches for fully masked out query descriptors.
-            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
-                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-            // Convert trainIdx, nMatches and distance to vector with DMatch.
-            static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
-                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-            // Find best matches from train collection for each query descriptor which have distance less than
-            // maxDistance (in increasing order of distances).
-            void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
-                             const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
-
-            DistType distType;
-
-        private:
-            std::vector<oclMat> trainDescCollection;
-        };
-
-        template <class Distance>
-        class CV_EXPORTS BruteForceMatcher_OCL;
-
-        template <typename T>
-        class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
-        {
-        public:
-            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
-            explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
-        };
-        template <typename T>
-        class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
-        {
-        public:
-            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
-            explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
-        };
-        template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
-        {
-        public:
-            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
-            explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
-        };
-
-        class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
-        {
-        public:
-            explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
-        };
-
-        class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
-        {
-        public:
-            explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
-                int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
-
-            //! return 1 rows matrix with CV_32FC2 type
-            void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
-            //! download points of type Point2f to a vector. the vector's content will be erased
-            void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
-
-            int maxCorners;
-            double qualityLevel;
-            double minDistance;
-
-            int blockSize;
-            bool useHarrisDetector;
-            double harrisK;
-            void releaseMemory()
-            {
-                Dx_.release();
-                Dy_.release();
-                eig_.release();
-                minMaxbuf_.release();
-                tmpCorners_.release();
-            }
-        private:
-            oclMat Dx_;
-            oclMat Dy_;
-            oclMat eig_;
-            oclMat eig_minmax_;
-            oclMat minMaxbuf_;
-            oclMat tmpCorners_;
-            oclMat counter_;
-        };
-
-        inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
-            int blockSize_, bool useHarrisDetector_, double harrisK_)
-        {
-            maxCorners = maxCorners_;
-            qualityLevel = qualityLevel_;
-            minDistance = minDistance_;
-            blockSize = blockSize_;
-            useHarrisDetector = useHarrisDetector_;
-            harrisK = harrisK_;
-        }
-
-        ////////////////////////////////// FAST Feature Detector //////////////////////////////////
-        class CV_EXPORTS FAST_OCL
-        {
-        public:
-            enum
-            {
-                X_ROW = 0,
-                Y_ROW,
-                RESPONSE_ROW,
-                ROWS_COUNT
-            };
-
-            // all features have same size
-            static const int FEATURE_SIZE = 7;
-
-            explicit FAST_OCL(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05);
-
-            //! finds the keypoints using FAST detector
-            //! supports only CV_8UC1 images
-            void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
-            void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints);
-
-            //! download keypoints from device to host memory
-            static void downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-            //! convert keypoints to KeyPoint vector
-            static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
-
-            //! release temporary buffer's memory
-            void release();
-
-            bool nonmaxSupression;
-
-            int threshold;
-
-            //! max keypoints = keypointsRatio * img.size().area()
-            double keypointsRatio;
-
-            //! find keypoints and compute it's response if nonmaxSupression is true
-            //! return count of detected keypoints
-            int calcKeyPointsLocation(const oclMat& image, const oclMat& mask);
-
-            //! get final array of keypoints
-            //! performs nonmax supression if needed
-            //! return final count of keypoints
-            int getKeyPoints(oclMat& keypoints);
-
-        private:
-            oclMat kpLoc_;
-            int count_;
-
-            oclMat score_;
-
-            oclMat d_keypoints_;
-
-            int calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints);
-            int nonmaxSupressionOCL(oclMat& keypoints);
-        };
-
-        ////////////////////////////////// BRIEF Feature Descriptor //////////////////////////////////
-
-        class CV_EXPORTS BRIEF_OCL
-        {
-        public:
-            static const int PATCH_SIZE = 48;
-            static const int KERNEL_SIZE = 9;
-
-            explicit BRIEF_OCL(int _bytes = 32);
-
-            //!computes the brief descriptor for a set of given keypoints
-            //! supports only CV_8UC1 images
-            void compute(const oclMat& image, const oclMat& keypoints, oclMat& mask, oclMat& descriptors) const;
-
-            static int getBorderSize();
-        protected:
-
-            int bytes;
-        };
-
-        ////////////////////////////////// ORB Descriptor Extractor //////////////////////////////////
-        class CV_EXPORTS ORB_OCL
-        {
-        public:
-            enum
-            {
-                X_ROW = 0,
-                Y_ROW,
-                RESPONSE_ROW,
-                ANGLE_ROW,
-                OCTAVE_ROW,
-                SIZE_ROW,
-                ROWS_COUNT
-            };
-
-            enum
-            {
-                DEFAULT_FAST_THRESHOLD = 20
-            };
-
-            //! Constructor
-            explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
-                             int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
-
-            //! Compute the ORB features on an image
-            //! image - the image to compute the features (supports only CV_8UC1 images)
-            //! mask - the mask to apply
-            //! keypoints - the resulting keypoints
-            void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints);
-            void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
-
-            //! Compute the ORB features and descriptors on an image
-            //! image - the image to compute the features (supports only CV_8UC1 images)
-            //! mask - the mask to apply
-            //! keypoints - the resulting keypoints
-            //! descriptors - descriptors array
-            void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors);
-            void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors);
-
-            //! download keypoints from device to host memory
-            static void downloadKeyPoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-            //! convert keypoints to KeyPoint vector
-            static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-            //! returns the descriptor size in bytes
-            inline int descriptorSize() const { return kBytes; }
-            inline int descriptorType() const { return CV_8U; }
-            inline int defaultNorm() const { return NORM_HAMMING; }
-
-            inline void setFastParams(int threshold, bool nonmaxSupression = true)
-            {
-                fastDetector_.threshold = threshold;
-                fastDetector_.nonmaxSupression = nonmaxSupression;
-            }
-
-            //! release temporary buffer's memory
-            void release();
-
-            //! if true, image will be blurred before descriptors calculation
-            bool blurForDescriptor;
-
-        private:
-            enum { kBytes = 32 };
-
-            void buildScalePyramids(const oclMat& image, const oclMat& mask);
-
-            void computeKeyPointsPyramid();
-
-            void computeDescriptors(oclMat& descriptors);
-
-            void mergeKeyPoints(oclMat& keypoints);
-
-            int nFeatures_;
-            float scaleFactor_;
-            int nLevels_;
-            int edgeThreshold_;
-            int firstLevel_;
-            int WTA_K_;
-            int scoreType_;
-            int patchSize_;
-
-            // The number of desired features per scale
-            std::vector<size_t> n_features_per_level_;
-
-            // Points to compute BRIEF descriptors from
-            oclMat pattern_;
-
-            std::vector<oclMat> imagePyr_;
-            std::vector<oclMat> maskPyr_;
-
-            oclMat buf_;
-
-            std::vector<oclMat> keyPointsPyr_;
-            std::vector<int> keyPointsCount_;
-
-            FAST_OCL fastDetector_;
-
-            Ptr<ocl::FilterEngine_GPU> blurFilter;
-
-            oclMat d_keypoints_;
-
-            oclMat uMax_;
-        };
-
-        /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
-
-        class CV_EXPORTS PyrLKOpticalFlow
-        {
-        public:
-            PyrLKOpticalFlow()
-            {
-                winSize = Size(21, 21);
-                maxLevel = 3;
-                iters = 30;
-                derivLambda = 0.5;
-                useInitialFlow = false;
-                minEigThreshold = 1e-4f;
-                getMinEigenVals = false;
-                isDeviceArch11_ = false;
-            }
-
-            void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
-                        oclMat &status, oclMat *err = 0);
-
-            void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
-
-            Size winSize;
-            int maxLevel;
-            int iters;
-            double derivLambda;
-            bool useInitialFlow;
-            float minEigThreshold;
-            bool getMinEigenVals;
-
-            void releaseMemory()
-            {
-                dx_calcBuf_.release();
-                dy_calcBuf_.release();
-
-                prevPyr_.clear();
-                nextPyr_.clear();
-
-                dx_buf_.release();
-                dy_buf_.release();
-            }
-
-        private:
-            void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
-
-            void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
-
-            oclMat dx_calcBuf_;
-            oclMat dy_calcBuf_;
-
-            std::vector<oclMat> prevPyr_;
-            std::vector<oclMat> nextPyr_;
-
-            oclMat dx_buf_;
-            oclMat dy_buf_;
-
-            oclMat uPyr_[2];
-            oclMat vPyr_[2];
-
-            bool isDeviceArch11_;
-        };
-
-        class CV_EXPORTS FarnebackOpticalFlow
-        {
-        public:
-            FarnebackOpticalFlow();
-
-            int numLevels;
-            double pyrScale;
-            bool fastPyramids;
-            int winSize;
-            int numIters;
-            int polyN;
-            double polySigma;
-            int flags;
-
-            void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
-
-            void releaseMemory();
-
-        private:
-            void prepareGaussian(
-                int n, double sigma, float *g, float *xg, float *xxg,
-                double &ig11, double &ig03, double &ig33, double &ig55);
-
-            void setPolynomialExpansionConsts(int n, double sigma);
-
-            void updateFlow_boxFilter(
-                const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
-                oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
-
-            void updateFlow_gaussianBlur(
-                const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
-                oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
-
-            oclMat frames_[2];
-            oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
-            std::vector<oclMat> pyramid0_, pyramid1_;
-        };
-
-        //////////////// build warping maps ////////////////////
-        //! builds plane warping maps
-        CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
-        //! builds cylindrical warping maps
-        CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
-        //! builds spherical warping maps
-        CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
-        //! builds Affine warping maps
-        CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
-
-        //! builds Perspective warping maps
-        CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
-
-        ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
-        //! Interpolate frames (images) using provided optical flow (displacement field).
-        //! frame0   - frame 0 (32-bit floating point images, single channel)
-        //! frame1   - frame 1 (the same type and size)
-        //! fu       - forward horizontal displacement
-        //! fv       - forward vertical displacement
-        //! bu       - backward horizontal displacement
-        //! bv       - backward vertical displacement
-        //! pos      - new frame position
-        //! newFrame - new frame
-        //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
-        //!            occlusion masks            0, occlusion masks            1,
-        //!            interpolated forward flow  0, interpolated forward flow  1,
-        //!            interpolated backward flow 0, interpolated backward flow 1
-        //!
-        CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
-                                          const oclMat &fu, const oclMat &fv,
-                                          const oclMat &bu, const oclMat &bv,
-                                          float pos, oclMat &newFrame, oclMat &buf);
-
-        //! computes moments of the rasterized shape or a vector of points
-        //! _array should be a vector a points standing for the contour
-        CV_EXPORTS Moments ocl_moments(InputArray contour);
-        //! src should be a general image uploaded to the GPU.
-        //! the supported oclMat type are CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 and CV_64FC1
-        //! to use type of CV_64FC1, the GPU should support CV_64FC1
-        CV_EXPORTS Moments ocl_moments(oclMat& src, bool binary);
-
-        class CV_EXPORTS StereoBM_OCL
-        {
-        public:
-            enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
-
-            enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
-
-            //! the default constructor
-            StereoBM_OCL();
-            //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
-            StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
-
-            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
-            //! Output disparity has CV_8U type.
-            void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
-
-            //! Some heuristics that tries to estmate
-            // if current GPU will be faster then CPU in this algorithm.
-            // It queries current active device.
-            static bool checkIfGpuCallReasonable();
-
-            int preset;
-            int ndisp;
-            int winSize;
-
-            // If avergeTexThreshold  == 0 => post procesing is disabled
-            // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
-            // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
-            // i.e. input left image is low textured.
-            float avergeTexThreshold;
-        private:
-            oclMat minSSD, leBuf, riBuf;
-        };
-
-        class CV_EXPORTS StereoBeliefPropagation
-        {
-        public:
-            enum { DEFAULT_NDISP  = 64 };
-            enum { DEFAULT_ITERS  = 5  };
-            enum { DEFAULT_LEVELS = 5  };
-            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
-            explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
-                                             int iters  = DEFAULT_ITERS,
-                                             int levels = DEFAULT_LEVELS,
-                                             int msg_type = CV_16S);
-            StereoBeliefPropagation(int ndisp, int iters, int levels,
-                                    float max_data_term, float data_weight,
-                                    float max_disc_term, float disc_single_jump,
-                                    int msg_type = CV_32F);
-            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
-            void operator()(const oclMat &data, oclMat &disparity);
-            int ndisp;
-            int iters;
-            int levels;
-            float max_data_term;
-            float data_weight;
-            float max_disc_term;
-            float disc_single_jump;
-            int msg_type;
-        private:
-            oclMat u, d, l, r, u2, d2, l2, r2;
-            std::vector<oclMat> datas;
-            oclMat out;
-        };
-
-        class CV_EXPORTS StereoConstantSpaceBP
-        {
-        public:
-            enum { DEFAULT_NDISP    = 128 };
-            enum { DEFAULT_ITERS    = 8   };
-            enum { DEFAULT_LEVELS   = 4   };
-            enum { DEFAULT_NR_PLANE = 4   };
-            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
-            explicit StereoConstantSpaceBP(
-                int ndisp    = DEFAULT_NDISP,
-                int iters    = DEFAULT_ITERS,
-                int levels   = DEFAULT_LEVELS,
-                int nr_plane = DEFAULT_NR_PLANE,
-                int msg_type = CV_32F);
-            StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
-                float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
-                int min_disp_th = 0,
-                int msg_type = CV_32F);
-            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
-            int ndisp;
-            int iters;
-            int levels;
-            int nr_plane;
-            float max_data_term;
-            float data_weight;
-            float max_disc_term;
-            float disc_single_jump;
-            int min_disp_th;
-            int msg_type;
-            bool use_local_init_data_cost;
-        private:
-            oclMat u[2], d[2], l[2], r[2];
-            oclMat disp_selected_pyr[2];
-            oclMat data_cost;
-            oclMat data_cost_selected;
-            oclMat temp;
-            oclMat out;
-        };
-
-        // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
-        //
-        // see reference:
-        //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
-        //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
-        class CV_EXPORTS OpticalFlowDual_TVL1_OCL
-        {
-        public:
-            OpticalFlowDual_TVL1_OCL();
-
-            void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
-
-            void collectGarbage();
-
-            /**
-            * Time step of the numerical scheme.
-            */
-            double tau;
-
-            /**
-            * Weight parameter for the data term, attachment parameter.
-            * This is the most relevant parameter, which determines the smoothness of the output.
-            * The smaller this parameter is, the smoother the solutions we obtain.
-            * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
-            */
-            double lambda;
-
-            /**
-            * Weight parameter for (u - v)^2, tightness parameter.
-            * It serves as a link between the attachment and the regularization terms.
-            * In theory, it should have a small value in order to maintain both parts in correspondence.
-            * The method is stable for a large range of values of this parameter.
-            */
-            double theta;
-
-            /**
-            * Number of scales used to create the pyramid of images.
-            */
-            int nscales;
-
-            /**
-            * Number of warpings per scale.
-            * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
-            * This is a parameter that assures the stability of the method.
-            * It also affects the running time, so it is a compromise between speed and accuracy.
-            */
-            int warps;
-
-            /**
-            * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
-            * A small value will yield more accurate solutions at the expense of a slower convergence.
-            */
-            double epsilon;
-
-            /**
-            * Stopping criterion iterations number used in the numerical scheme.
-            */
-            int iterations;
-
-            bool useInitialFlow;
-
-        private:
-            void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
-
-            std::vector<oclMat> I0s;
-            std::vector<oclMat> I1s;
-            std::vector<oclMat> u1s;
-            std::vector<oclMat> u2s;
-
-            oclMat I1x_buf;
-            oclMat I1y_buf;
-
-            oclMat I1w_buf;
-            oclMat I1wx_buf;
-            oclMat I1wy_buf;
-
-            oclMat grad_buf;
-            oclMat rho_c_buf;
-
-            oclMat p11_buf;
-            oclMat p12_buf;
-            oclMat p21_buf;
-            oclMat p22_buf;
-
-            oclMat diff_buf;
-            oclMat norm_buf;
-        };
-        // current supported sorting methods
-        enum
-        {
-            SORT_BITONIC,   // only support power-of-2 buffer size
-            SORT_SELECTION, // cannot sort duplicate keys
-            SORT_MERGE,
-            SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
-        };
-        //! Returns the sorted result of all the elements in input based on equivalent keys.
-        //
-        //  The element unit in the values to be sorted is determined from the data type,
-        //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
-        //  matrix dimension.
-        //  both keys and values will be sorted inplace
-        //  Key needs to be single channel oclMat.
-        //
-        //  Example:
-        //  input -
-        //    keys   = {2,    3,   1}   (CV_8UC1)
-        //    values = {10,5, 4,3, 6,2} (CV_8UC2)
-        //  sortByKey(keys, values, SORT_SELECTION, false);
-        //  output -
-        //    keys   = {1,    2,   3}   (CV_8UC1)
-        //    values = {6,2, 10,5, 4,3} (CV_8UC2)
-        CV_EXPORTS void sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
-        /*!Base class for MOG and MOG2!*/
-        class CV_EXPORTS BackgroundSubtractor
-        {
-        public:
-            //! the virtual destructor
-            virtual ~BackgroundSubtractor();
-            //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
-            virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
-
-            //! computes a background image
-            virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
-        };
-                /*!
-        Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
-
-        The class implements the following algorithm:
-        "An improved adaptive background mixture model for real-time tracking with shadow detection"
-        P. KadewTraKuPong and R. Bowden,
-        Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
-        http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
-        */
-        class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
-        {
-        public:
-            //! the default constructor
-            MOG(int nmixtures = -1);
-
-            //! re-initiaization method
-            void initialize(Size frameSize, int frameType);
-
-            //! the update operator
-            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
-
-            //! computes a background image which are the mean of all background gaussians
-            void getBackgroundImage(oclMat& backgroundImage) const;
-
-            //! releases all inner buffers
-            void release();
-
-            int history;
-            float varThreshold;
-            float backgroundRatio;
-            float noiseSigma;
-
-        private:
-            int nmixtures_;
-
-            Size frameSize_;
-            int frameType_;
-            int nframes_;
-
-            oclMat weight_;
-            oclMat sortKey_;
-            oclMat mean_;
-            oclMat var_;
-        };
-
-        /*!
-        The class implements the following algorithm:
-        "Improved adaptive Gausian mixture model for background subtraction"
-        Z.Zivkovic
-        International Conference Pattern Recognition, UK, August, 2004.
-        http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
-        */
-        class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
-        {
-        public:
-            //! the default constructor
-            MOG2(int nmixtures = -1);
-
-            //! re-initiaization method
-            void initialize(Size frameSize, int frameType);
-
-            //! the update operator
-            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
-
-            //! computes a background image which are the mean of all background gaussians
-            void getBackgroundImage(oclMat& backgroundImage) const;
-
-            //! releases all inner buffers
-            void release();
-
-            // parameters
-            // you should call initialize after parameters changes
-
-            int history;
-
-            //! here it is the maximum allowed number of mixture components.
-            //! Actual number is determined dynamically per pixel
-            float varThreshold;
-            // threshold on the squared Mahalanobis distance to decide if it is well described
-            // by the background model or not. Related to Cthr from the paper.
-            // This does not influence the update of the background. A typical value could be 4 sigma
-            // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
-
-            /////////////////////////
-            // less important parameters - things you might change but be carefull
-            ////////////////////////
-
-            float backgroundRatio;
-            // corresponds to fTB=1-cf from the paper
-            // TB - threshold when the component becomes significant enough to be included into
-            // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
-            // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
-            // it is considered foreground
-            // float noiseSigma;
-            float varThresholdGen;
-
-            //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
-            //when a sample is close to the existing components. If it is not close
-            //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
-            //Smaller Tg leads to more generated components and higher Tg might make
-            //lead to small number of components but they can grow too large
-            float fVarInit;
-            float fVarMin;
-            float fVarMax;
-
-            //initial variance  for the newly generated components.
-            //It will will influence the speed of adaptation. A good guess should be made.
-            //A simple way is to estimate the typical standard deviation from the images.
-            //I used here 10 as a reasonable value
-            // min and max can be used to further control the variance
-            float fCT; //CT - complexity reduction prior
-            //this is related to the number of samples needed to accept that a component
-            //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
-            //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
-
-            //shadow detection parameters
-            bool bShadowDetection; //default 1 - do shadow detection
-            unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
-            float fTau;
-            // Tau - shadow threshold. The shadow is detected if the pixel is darker
-            //version of the background. Tau is a threshold on how much darker the shadow can be.
-            //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
-            //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
-
-        private:
-            int nmixtures_;
-
-            Size frameSize_;
-            int frameType_;
-            int nframes_;
-
-            oclMat weight_;
-            oclMat variance_;
-            oclMat mean_;
-
-            oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
-        };
-
-        /*!***************Kalman Filter*************!*/
-        class CV_EXPORTS KalmanFilter
-        {
-        public:
-            KalmanFilter();
-            //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
-            KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
-            //! re-initializes Kalman filter. The previous content is destroyed.
-            void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
-
-            const oclMat& predict(const oclMat& control=oclMat());
-            const oclMat& correct(const oclMat& measurement);
-
-            oclMat statePre;           //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
-            oclMat statePost;          //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
-            oclMat transitionMatrix;   //!< state transition matrix (A)
-            oclMat controlMatrix;      //!< control matrix (B) (not used if there is no control)
-            oclMat measurementMatrix;  //!< measurement matrix (H)
-            oclMat processNoiseCov;    //!< process noise covariance matrix (Q)
-            oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
-            oclMat errorCovPre;        //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
-            oclMat gain;               //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
-            oclMat errorCovPost;       //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
-        private:
-            oclMat temp1;
-            oclMat temp2;
-            oclMat temp3;
-            oclMat temp4;
-            oclMat temp5;
-        };
-
-        /*!***************K Nearest Neighbour*************!*/
-        class CV_EXPORTS KNearestNeighbour: public CvKNearest
-        {
-        public:
-            KNearestNeighbour();
-            ~KNearestNeighbour();
-
-            bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
-                bool isRegression = false, int max_k = 32, bool updateBase = false);
-
-            void clear();
-
-            void find_nearest(const oclMat& samples, int k, oclMat& lables);
-
-        private:
-            oclMat samples_ocl;
-        };
-
-        /*!***************  SVM  *************!*/
-        class CV_EXPORTS CvSVM_OCL : public CvSVM
-        {
-        public:
-            CvSVM_OCL();
-
-            CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses,
-                      const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
-                      CvSVMParams params=CvSVMParams());
-            CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const;
-            CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const;
-            CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
-            float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
-
-        protected:
-            float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const;
-            void create_kernel();
-            void create_solver();
-        };
-
-        /*!***************  END  *************!*/
-    }
-}
-#if defined _MSC_VER && _MSC_VER >= 1200
-#  pragma warning( push)
-#  pragma warning( disable: 4267)
-#endif
-#include "opencv2/ocl/matrix_operations.hpp"
-#if defined _MSC_VER && _MSC_VER >= 1200
-#  pragma warning( pop)
-#endif
-
-#endif /* __OPENCV_OCL_HPP__ */
diff --git a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp
deleted file mode 100644
index 410adbd..0000000
--- a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp
+++ /dev/null
@@ -1,490 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OCL_MATRIX_OPERATIONS_HPP__
-#define __OPENCV_OCL_MATRIX_OPERATIONS_HPP__
-
-#include "opencv2/ocl.hpp"
-
-namespace cv
-{
-
-    namespace ocl
-    {
-
-        enum
-        {
-            MAT_ADD = 1,
-            MAT_SUB,
-            MAT_MUL,
-            MAT_DIV,
-            MAT_NOT,
-            MAT_AND,
-            MAT_OR,
-            MAT_XOR
-        };
-
-        class CV_EXPORTS oclMatExpr
-        {
-            public:
-                oclMatExpr() : a(oclMat()), b(oclMat()), op(0) {}
-                oclMatExpr(const oclMat& _a, const oclMat& _b, int _op)
-                    : a(_a), b(_b), op(_op) {}
-                operator oclMat() const;
-                void assign(oclMat& m) const;
-
-            protected:
-                oclMat a, b;
-                int op;
-        };
-        ////////////////////////////////////////////////////////////////////////
-        //////////////////////////////// oclMat ////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////
-
-        inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) {}
-
-        inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
-        {
-            if( _rows > 0 && _cols > 0 )
-                create( _rows, _cols, _type );
-        }
-
-        inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
-        {
-            if( _size.height > 0 && _size.width > 0 )
-                create( _size.height, _size.width, _type );
-        }
-
-        inline oclMat::oclMat(int _rows, int _cols, int _type, const Scalar &_s)
-            : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
-        {
-            if(_rows > 0 && _cols > 0)
-            {
-                create(_rows, _cols, _type);
-                *this = _s;
-            }
-        }
-
-        inline oclMat::oclMat(Size _size, int _type, const Scalar &_s)
-            : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
-        {
-            if( _size.height > 0 && _size.width > 0 )
-            {
-                create( _size.height, _size.width, _type );
-                *this = _s;
-            }
-        }
-
-        inline oclMat::oclMat(const oclMat &m)
-            : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data),
-              refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
-        {
-            if( refcount )
-                CV_XADD(refcount, 1);
-        }
-
-        inline oclMat::oclMat(int _rows, int _cols, int _type, void *_data, size_t _step)
-            : flags(0), rows(0), cols(0), step(0), data(0), refcount(0),
-              datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
-        {
-            cv::Mat m(_rows, _cols, _type, _data, _step);
-            upload(m);
-            //size_t minstep = cols * elemSize();
-            //if( step == Mat::AUTO_STEP )
-            //{
-            //    step = minstep;
-            //    flags |= Mat::CONTINUOUS_FLAG;
-            //}
-            //else
-            //{
-            //    if( rows == 1 ) step = minstep;
-            //    CV_DbgAssert( step >= minstep );
-            //    flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
-            //}
-            //dataend += step * (rows - 1) + minstep;
-        }
-
-        inline oclMat::oclMat(Size _size, int _type, void *_data, size_t _step)
-            : flags(0), rows(0), cols(0),
-              step(0), data(0), refcount(0),
-              datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
-        {
-            cv::Mat m(_size, _type, _data, _step);
-            upload(m);
-            //size_t minstep = cols * elemSize();
-            //if( step == Mat::AUTO_STEP )
-            //{
-            //    step = minstep;
-            //    flags |= Mat::CONTINUOUS_FLAG;
-            //}
-            //else
-            //{
-            //    if( rows == 1 ) step = minstep;
-            //    CV_DbgAssert( step >= minstep );
-            //    flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
-            //}
-            //dataend += step * (rows - 1) + minstep;
-        }
-
-
-        inline oclMat::oclMat(const oclMat &m, const Range &rRange, const Range &cRange)
-        {
-            flags = m.flags;
-            step = m.step;
-            refcount = m.refcount;
-            data = m.data;
-            datastart = m.datastart;
-            dataend = m.dataend;
-            clCxt = m.clCxt;
-            wholerows = m.wholerows;
-            wholecols = m.wholecols;
-            offset = m.offset;
-            if( rRange == Range::all() )
-                rows = m.rows;
-            else
-            {
-                CV_Assert( 0 <= rRange.start && rRange.start <= rRange.end && rRange.end <= m.rows );
-                rows = rRange.size();
-                offset += step * rRange.start;
-            }
-
-            if( cRange == Range::all() )
-                cols = m.cols;
-            else
-            {
-                CV_Assert( 0 <= cRange.start && cRange.start <= cRange.end && cRange.end <= m.cols );
-                cols = cRange.size();
-                offset += cRange.start * elemSize();
-                flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
-            }
-
-            if( rows == 1 )
-                flags |= Mat::CONTINUOUS_FLAG;
-
-            if( refcount )
-                CV_XADD(refcount, 1);
-            if( rows <= 0 || cols <= 0 )
-                rows = cols = 0;
-        }
-
-        inline oclMat::oclMat(const oclMat &m, const Rect &roi)
-            : flags(m.flags), rows(roi.height), cols(roi.width),
-              step(m.step), data(m.data), refcount(m.refcount),
-              datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
-        {
-            flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
-            offset += roi.y * step + roi.x * elemSize();
-            CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.wholecols &&
-                       0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.wholerows );
-            if( refcount )
-                CV_XADD(refcount, 1);
-            if( rows <= 0 || cols <= 0 )
-                rows = cols = 0;
-        }
-
-        inline oclMat::oclMat(const Mat &m)
-            : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0)
-        {
-            //clCxt = Context::getContext();
-            upload(m);
-        }
-
-        inline oclMat::~oclMat()
-        {
-            release();
-        }
-
-        inline oclMat &oclMat::operator = (const oclMat &m)
-        {
-            if( this != &m )
-            {
-                if( m.refcount )
-                    CV_XADD(m.refcount, 1);
-                release();
-                clCxt = m.clCxt;
-                flags = m.flags;
-                rows = m.rows;
-                cols = m.cols;
-                step = m.step;
-                data = m.data;
-                datastart = m.datastart;
-                dataend = m.dataend;
-                offset = m.offset;
-                wholerows = m.wholerows;
-                wholecols = m.wholecols;
-                refcount = m.refcount;
-            }
-            return *this;
-        }
-
-        inline oclMat &oclMat::operator = (const Mat &m)
-        {
-            //clCxt = Context::getContext();
-            upload(m);
-            return *this;
-        }
-
-        inline oclMat& oclMat::operator = (const oclMatExpr& expr)
-        {
-            expr.assign(*this);
-            return *this;
-        }
-
-        /* Fixme! To be supported in OpenCL later. */
-#if 0
-        template <class T> inline oclMat::operator DevMem2D_<T>() const
-        {
-            return DevMem2D_<T>(rows, cols, (T *)data, step);
-        }
-        template <class T> inline oclMat::operator PtrStep_<T>() const
-        {
-            return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this));
-        }
-#endif
-
-        //CPP: void oclMat::upload(const Mat& m);
-
-        inline oclMat::operator Mat() const
-        {
-            Mat m;
-            download(m);
-            return m;
-        }
-
-        //CPP void oclMat::download(cv::Mat& m) const;
-
-        inline oclMat oclMat::row(int y) const
-        {
-            return oclMat(*this, Range(y, y + 1), Range::all());
-        }
-        inline oclMat oclMat::col(int x) const
-        {
-            return oclMat(*this, Range::all(), Range(x, x + 1));
-        }
-        inline oclMat oclMat::rowRange(int startrow, int endrow) const
-        {
-            return oclMat(*this, Range(startrow, endrow), Range::all());
-        }
-        inline oclMat oclMat::rowRange(const Range &r) const
-        {
-            return oclMat(*this, r, Range::all());
-        }
-        inline oclMat oclMat::colRange(int startcol, int endcol) const
-        {
-            return oclMat(*this, Range::all(), Range(startcol, endcol));
-        }
-        inline oclMat oclMat::colRange(const Range &r) const
-        {
-            return oclMat(*this, Range::all(), r);
-        }
-
-        inline oclMat oclMat::clone() const
-        {
-            oclMat m;
-            copyTo(m);
-            return m;
-        }
-
-        //CPP void oclMat::copyTo( oclMat& m ) const;
-        //CPP void oclMat::copyTo( oclMat& m, const oclMat& mask  ) const;
-        //CPP void oclMat::convertTo( oclMat& m, int rtype, double alpha=1, double beta=0 ) const;
-
-        inline void oclMat::assignTo( oclMat &m, int mtype ) const
-        {
-            if( mtype < 0 )
-                m = *this;
-            else
-                convertTo(m, mtype);
-        }
-
-        //CPP oclMat& oclMat::operator = (const Scalar& s);
-        //CPP oclMat& oclMat::setTo(const Scalar& s, const oclMat& mask=oclMat());
-        //CPP oclMat oclMat::reshape(int _cn, int _rows=0) const;
-        inline void oclMat::create(Size _size, int _type)
-        {
-            create(_size.height, _size.width, _type);
-        }
-        //CPP void oclMat::create(int _rows, int _cols, int _type);
-        //CPP void oclMat::release();
-
-        inline void oclMat::swap(oclMat &b)
-        {
-            std::swap( flags, b.flags );
-            std::swap( rows, b.rows );
-            std::swap( cols, b.cols );
-            std::swap( step, b.step );
-            std::swap( data, b.data );
-            std::swap( datastart, b.datastart );
-            std::swap( dataend, b.dataend );
-            std::swap( refcount, b.refcount );
-            std::swap( offset, b.offset );
-            std::swap( clCxt,  b.clCxt );
-            std::swap( wholerows, b.wholerows );
-            std::swap( wholecols, b.wholecols );
-        }
-
-        inline void oclMat::locateROI( Size &wholeSize, Point &ofs ) const
-        {
-            size_t esz = elemSize();//, minstep;
-            //ptrdiff_t delta1 = offset;//, delta2 = dataend - datastart;
-            CV_DbgAssert( step > 0 );
-            if( offset == 0 )
-                ofs.x = ofs.y = 0;
-            else
-            {
-                ofs.y = (int)(offset / step);
-                ofs.x = (int)((offset - step * ofs.y) / esz);
-                //CV_DbgAssert( data == datastart + ofs.y*step + ofs.x*esz );
-            }
-            //minstep = (ofs.x + cols)*esz;
-            //wholeSize.height = (int)((delta2 - minstep)/step + 1);
-            //wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
-            //wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
-            //wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
-            wholeSize.height = wholerows;
-            wholeSize.width = wholecols;
-        }
-
-        inline oclMat &oclMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
-        {
-            Size wholeSize;
-            Point ofs;
-            size_t esz = elemSize();
-            locateROI( wholeSize, ofs );
-            int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
-            int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
-            offset += (row1 - ofs.y) * step + (col1 - ofs.x) * esz;
-            rows = row2 - row1;
-            cols = col2 - col1;
-            if( esz * cols == step || rows == 1 )
-                flags |= Mat::CONTINUOUS_FLAG;
-            else
-                flags &= ~Mat::CONTINUOUS_FLAG;
-            return *this;
-        }
-
-        inline oclMat oclMat::operator()( Range rRange, Range cRange ) const
-        {
-            return oclMat(*this, rRange, cRange);
-        }
-        inline oclMat oclMat::operator()( const Rect &roi ) const
-        {
-            return oclMat(*this, roi);
-        }
-
-        inline bool oclMat::isContinuous() const
-        {
-            return (flags & Mat::CONTINUOUS_FLAG) != 0;
-        }
-        inline size_t oclMat::elemSize() const
-        {
-            return CV_ELEM_SIZE((CV_MAKE_TYPE(type(), oclchannels())));
-        }
-        inline size_t oclMat::elemSize1() const
-        {
-            return CV_ELEM_SIZE1(flags);
-        }
-        inline int oclMat::type() const
-        {
-            return CV_MAT_TYPE(flags);
-        }
-        inline int oclMat::ocltype() const
-        {
-            return CV_MAKE_TYPE(depth(), oclchannels());
-        }
-        inline int oclMat::depth() const
-        {
-            return CV_MAT_DEPTH(flags);
-        }
-        inline int oclMat::channels() const
-        {
-            return CV_MAT_CN(flags);
-        }
-        inline int oclMat::oclchannels() const
-        {
-            return (CV_MAT_CN(flags)) == 3 ? 4 : (CV_MAT_CN(flags));
-        }
-        inline size_t oclMat::step1() const
-        {
-            return step / elemSize1();
-        }
-        inline Size oclMat::size() const
-        {
-            return Size(cols, rows);
-        }
-        inline bool oclMat::empty() const
-        {
-            return data == 0;
-        }
-
-        inline oclMat oclMat::t() const
-        {
-            oclMat tmp;
-            transpose(*this, tmp);
-            return tmp;
-        }
-
-        static inline void swap( oclMat &a, oclMat &b )
-        {
-            a.swap(b);
-        }
-
-        inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat &m)
-        {
-            if (m.type() == type && m.rows >= rows && m.cols >= cols)
-                m = m(Rect(0, 0, cols, rows));
-            else
-                m.create(rows, cols, type);
-        }
-
-        inline void ensureSizeIsEnough(Size size, int type, oclMat &m)
-        {
-            ensureSizeIsEnough(size.height, size.width, type, m);
-        }
-
-
-    } /* end of namespace ocl */
-
-} /* end of namespace cv */
-
-#endif /* __OPENCV_OCL_MATRIX_OPERATIONS_HPP__ */
diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
deleted file mode 100644
index 3dd4654..0000000
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ /dev/null
@@ -1,48 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef __OPENCV_BUILD
-#error this is a compatibility header which should not be used inside the OpenCV library
-#endif
-
-#include "opencv2/ocl.hpp"
diff --git a/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp b/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
deleted file mode 100644
index ee0f703..0000000
--- a/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
+++ /dev/null
@@ -1,154 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if !defined(DUMP_MESSAGE_STDOUT) && !defined(DUMP_PROPERTY_XML)
-#error Invalid usage
-#endif
-
-#if !defined(DUMP_PROPERTY_XML)
-#define DUMP_PROPERTY_XML(...)
-#endif
-
-#if !defined(DUMP_MESSAGE_STDOUT)
-#define DUMP_MESSAGE_STDOUT(...)
-#endif
-
-#include <sstream>
-
-static std::string bytesToStringRepr(size_t value)
-{
-    size_t b = value % 1024;
-    value /= 1024;
-
-    size_t kb = value % 1024;
-    value /= 1024;
-
-    size_t mb = value % 1024;
-    value /= 1024;
-
-    size_t gb = value;
-
-    std::ostringstream stream;
-
-    if (gb > 0)
-        stream << gb << " GB ";
-    if (mb > 0)
-        stream << mb << " MB ";
-    if (kb > 0)
-        stream << kb << " kB ";
-    if (b > 0)
-        stream << b << " B";
-
-    return stream.str();
-}
-
-static void dumpOpenCLDevice()
-{
-    using namespace cv::ocl;
-    try
-    {
-        cv::ocl::PlatformsInfo platforms;
-        cv::ocl::getOpenCLPlatforms(platforms);
-        DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
-        const char* deviceTypeStr;
-        for(unsigned int i=0; i < platforms.size(); i++)
-        {
-            DUMP_MESSAGE_STDOUT("    " << platforms.at(i)->platformName);
-            const cv::ocl::DevicesInfo& devices = platforms.at(i)->devices;
-            for(unsigned int j=0; j < devices.size(); j++)
-            {
-                const cv::ocl::DeviceInfo& current_device = *devices.at(j);
-                deviceTypeStr = current_device.deviceType == CVCL_DEVICE_TYPE_CPU
-                            ? ("CPU") : (current_device.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
-                DUMP_MESSAGE_STDOUT( "        " << deviceTypeStr << " : " << current_device.deviceName << " : " << current_device.deviceVersion );
-                DUMP_PROPERTY_XML("cv_ocl_platform_"<< i<<"_device_"<<j, "(Platform=" << current_device.platform->platformName << ")(Type="
-                    << deviceTypeStr <<")(Name="<< current_device.deviceName << ")(Version="<< current_device.deviceVersion<<")");
-            }
-        }
-        DUMP_MESSAGE_STDOUT("Current OpenCL device: ");
-
-        const cv::ocl::DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo();
-
-        DUMP_MESSAGE_STDOUT("    Platform = "<< deviceInfo.platform->platformName);
-        DUMP_PROPERTY_XML("cv_ocl_current_platformName", deviceInfo.platform->platformName);
-
-        deviceTypeStr = deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU
-                        ? "CPU" : (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
-        DUMP_MESSAGE_STDOUT("    Type = "<< deviceTypeStr);
-        DUMP_PROPERTY_XML("cv_ocl_current_deviceType", deviceTypeStr);
-
-        DUMP_MESSAGE_STDOUT("    Name = "<< deviceInfo.deviceName);
-        DUMP_PROPERTY_XML("cv_ocl_current_deviceName", deviceInfo.deviceName);
-
-        DUMP_MESSAGE_STDOUT("    Version = " << deviceInfo.deviceVersion);
-        DUMP_PROPERTY_XML("cv_ocl_current_deviceVersion", deviceInfo.deviceVersion);
-
-        DUMP_MESSAGE_STDOUT("    Compute units = "<< deviceInfo.maxComputeUnits);
-        DUMP_PROPERTY_XML("cv_ocl_current_maxComputeUnits", deviceInfo.maxComputeUnits);
-
-        DUMP_MESSAGE_STDOUT("    Max work group size = "<< deviceInfo.maxWorkGroupSize);
-        DUMP_PROPERTY_XML("cv_ocl_current_maxWorkGroupSize", deviceInfo.maxWorkGroupSize);
-
-        std::string localMemorySizeStr = bytesToStringRepr(deviceInfo.localMemorySize);
-        DUMP_MESSAGE_STDOUT("    Local memory size = "<< localMemorySizeStr.c_str());
-        DUMP_PROPERTY_XML("cv_ocl_current_localMemorySize", deviceInfo.localMemorySize);
-
-        std::string maxMemAllocSizeStr = bytesToStringRepr(deviceInfo.maxMemAllocSize);
-        DUMP_MESSAGE_STDOUT("    Max memory allocation size = "<< maxMemAllocSizeStr.c_str());
-        DUMP_PROPERTY_XML("cv_ocl_current_maxMemAllocSize", deviceInfo.maxMemAllocSize);
-
-        const char* doubleSupportStr = deviceInfo.haveDoubleSupport ? "Yes" : "No";
-        DUMP_MESSAGE_STDOUT("    Double support = "<< doubleSupportStr);
-        DUMP_PROPERTY_XML("cv_ocl_current_haveDoubleSupport", deviceInfo.haveDoubleSupport);
-
-        const char* isUnifiedMemoryStr = deviceInfo.isUnifiedMemory ? "Yes" : "No";
-        DUMP_MESSAGE_STDOUT("    Unified memory = "<< isUnifiedMemoryStr);
-        DUMP_PROPERTY_XML("cv_ocl_current_isUnifiedMemory", deviceInfo.isUnifiedMemory);
-    }
-    catch (...)
-    {
-        DUMP_MESSAGE_STDOUT("OpenCL device not available");
-        DUMP_PROPERTY_XML("cv_ocl", "not available");
-    }
-}
-
-#undef DUMP_MESSAGE_STDOUT
-#undef DUMP_PROPERTY_XML
diff --git a/modules/ocl/include/opencv2/ocl/private/opencl_utils.hpp b/modules/ocl/include/opencv2/ocl/private/opencl_utils.hpp
deleted file mode 100644
index 6d74eb1..0000000
--- a/modules/ocl/include/opencv2/ocl/private/opencl_utils.hpp
+++ /dev/null
@@ -1,115 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__
-#define __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__
-
-#include "opencv2/core/opencl/runtime/opencl_core.hpp"
-#include <vector>
-#include <string>
-
-namespace cl_utils {
-
-inline cl_int getPlatforms(std::vector<cl_platform_id>& platforms)
-{
-    cl_uint n = 0;
-
-    cl_int err = ::clGetPlatformIDs(0, NULL, &n);
-    if (err != CL_SUCCESS)
-        return err;
-
-    platforms.clear(); platforms.resize(n);
-    err = ::clGetPlatformIDs(n, &platforms[0], NULL);
-    if (err != CL_SUCCESS)
-        return err;
-
-    return CL_SUCCESS;
-}
-
-inline cl_int getDevices(cl_platform_id platform, cl_device_type type, std::vector<cl_device_id>& devices)
-{
-    cl_uint n = 0;
-
-    cl_int err = ::clGetDeviceIDs(platform, type, 0, NULL, &n);
-    if (err != CL_SUCCESS)
-        return err;
-
-    devices.clear(); devices.resize(n);
-    err = ::clGetDeviceIDs(platform, type, n, &devices[0], NULL);
-    if (err != CL_SUCCESS)
-        return err;
-
-    return CL_SUCCESS;
-}
-
-
-
-
-template <typename Functor, typename ObjectType, typename T>
-inline cl_int getScalarInfo(Functor f, ObjectType obj, cl_uint name, T& param)
-{
-    return f(obj, name, sizeof(T), &param, NULL);
-}
-
-template <typename Functor, typename ObjectType>
-inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
-{
-    ::size_t required;
-    cl_int err = f(obj, name, 0, NULL, &required);
-    if (err != CL_SUCCESS)
-        return err;
-
-    param.clear();
-    if (required > 0)
-    {
-        std::vector<char> buf(required + 1, char(0));
-        err = f(obj, name, required, &buf[0], NULL);
-        if (err != CL_SUCCESS)
-            return err;
-        param = &buf[0];
-    }
-
-    return CL_SUCCESS;
-}
-
-} // namespace cl_utils
-
-#endif // __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__
diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp
deleted file mode 100644
index b1ceacd..0000000
--- a/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OCL_PRIVATE_UTIL__
-#define __OPENCV_OCL_PRIVATE_UTIL__
-
-#include "opencv2/core/opencl/runtime/opencl_core.hpp"
-#include "opencv2/core/ocl_genbase.hpp"
-
-#include "opencv2/ocl.hpp"
-
-namespace cv
-{
-namespace ocl
-{
-
-inline cl_device_id getClDeviceID(const Context *ctx)
-{
-    return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
-}
-
-inline cl_context getClContext(const Context *ctx)
-{
-    return *(cl_context*)(ctx->getOpenCLContextPtr());
-}
-
-inline cl_command_queue getClCommandQueue(const Context *ctx)
-{
-    return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr());
-}
-
-CV_EXPORTS cv::Mutex& getInitializationMutex();
-
-enum openCLMemcpyKind
-{
-    clMemcpyHostToDevice = 0,
-    clMemcpyDeviceToHost,
-    clMemcpyDeviceToDevice
-};
-///////////////////////////OpenCL call wrappers////////////////////////////
-CV_EXPORTS void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
-        size_t widthInBytes, size_t height);
-CV_EXPORTS void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
-        size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
-CV_EXPORTS void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
-        const void *src, size_t spitch,
-        size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
-CV_EXPORTS void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
-        const void *src, size_t spitch,
-        size_t width, size_t height, int src_offset);
-CV_EXPORTS void openCLFree(void *devPtr);
-CV_EXPORTS cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
-CV_EXPORTS void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
-CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt,
-        const cv::ocl::ProgramEntry* source, String kernelName);
-CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt,
-        const cv::ocl::ProgramEntry* source, String kernelName, const char *build_options);
-CV_EXPORTS cl_kernel openCLGetKernelFromSource(Context *ctx, const cv::ocl::ProgramEntry* source,
-        String kernelName, int channels, int depth, const char *build_options);
-CV_EXPORTS void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
-CV_EXPORTS void openCLExecuteKernel(Context *ctx, cl_kernel kernel, size_t globalThreads[3],
-                          size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args);
-CV_EXPORTS void openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, String kernelName, std::vector< std::pair<size_t, const void *> > &args,
-        int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
-CV_EXPORTS void openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName,
-        size_t globalThreads[3], size_t localThreads[3],
-        std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
-CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
-        size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
-CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
-        size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-        int depth, const char *build_options);
-
-CV_EXPORTS cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
-        const size_t size);
-
-CV_EXPORTS cl_mem openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
-
-enum FLUSH_MODE
-{
-    CLFINISH = 0,
-    CLFLUSH,
-    DISABLE
-};
-
-CV_EXPORTS void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
-        size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
-CV_EXPORTS void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
-        size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-        int depth, const char *build_options, FLUSH_MODE finish_mode = DISABLE);
-
-// bind oclMat to OpenCL image textures
-// note:
-//   1. there is no memory management. User need to explicitly release the resource
-//   2. for faster clamping, there is no buffer padding for the constructed texture
-CV_EXPORTS cl_mem bindTexture(const oclMat &mat);
-CV_EXPORTS void releaseTexture(cl_mem& texture);
-
-//Represents an image texture object
-class CV_EXPORTS TextureCL
-{
-public:
-    TextureCL(cl_mem tex, int r, int c, int t)
-        : tex_(tex), rows(r), cols(c), type(t) {}
-    ~TextureCL()
-    {
-        openCLFree(tex_);
-    }
-    operator cl_mem()
-    {
-        return tex_;
-    }
-    cl_mem const tex_;
-    const int rows;
-    const int cols;
-    const int type;
-private:
-    //disable assignment
-    void operator=(const TextureCL&);
-};
-// bind oclMat to OpenCL image textures and retunrs an TextureCL object
-// note:
-//   for faster clamping, there is no buffer padding for the constructed texture
-CV_EXPORTS Ptr<TextureCL> bindTexturePtr(const oclMat &mat);
-
-CV_EXPORTS bool isCpuDevice();
-
-CV_EXPORTS size_t queryWaveFrontSize(cl_kernel kernel);
-
-
-inline size_t divUp(size_t total, size_t grain)
-{
-    return (total + grain - 1) / grain;
-}
-
-inline size_t roundUp(size_t sz, size_t n)
-{
-    // we don't assume that n is a power of 2 (see alignSize)
-    // equal to divUp(sz, n) * n
-    size_t t = sz + n - 1;
-    size_t rem = t % n;
-    size_t result = t - rem;
-    return result;
-}
-
-}//namespace ocl
-}//namespace cv
-
-#endif //__OPENCV_OCL_PRIVATE_UTIL__
diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp
deleted file mode 100644
index b537ec1..0000000
--- a/modules/ocl/perf/main.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-#define DUMP_PROPERTY_XML(propertyName, propertyValue) \
-    do { \
-        std::stringstream ssName, ssValue;\
-        ssName << propertyName;\
-        ssValue << propertyValue; \
-        ::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \
-    } while (false)
-
-#define DUMP_MESSAGE_STDOUT(msg) \
-    do { \
-        std::cout << msg << std::endl; \
-    } while (false)
-
-
-#include "opencv2/ocl/private/opencl_dumpinfo.hpp"
-
-static const char * impls[] =
-{
-    IMPL_OCL,
-    IMPL_PLAIN,
-#ifdef HAVE_OPENCV_GPU
-    IMPL_GPU
-#endif
-};
-
-
-int main(int argc, char ** argv)
-{
-    ::perf::TestBase::setModulePerformanceStrategy(::perf::PERF_STRATEGY_SIMPLE);
-
-    CV_PERF_TEST_MAIN_INTERNALS(ocl, impls, ::dumpOpenCLDevice())
-}
diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp
deleted file mode 100644
index 592c652..0000000
--- a/modules/ocl/perf/perf_arithm.cpp
+++ /dev/null
@@ -1,1127 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::get;
-using std::tr1::tuple;
-
-///////////// Lut ////////////////////////
-
-typedef Size_MatType LUTFixture;
-
-PERF_TEST_P(LUTFixture, LUT,
-          ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                             OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
-{
-    // getting params
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    // creating src data
-    Mat src(srcSize, type), lut(1, 256, CV_8UC1);
-    int dstType = CV_MAKETYPE(lut.depth(), src.channels());
-    Mat dst(srcSize, dstType);
-
-    randu(lut, 0, 2);
-    declare.in(src, WARMUP_RNG).in(lut).out(dst);
-
-    // select implementation
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclLut(lut), oclDst(srcSize, dstType);
-
-        OCL_TEST_CYCLE() cv::ocl::LUT(oclSrc, oclLut, oclDst);
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::LUT(src, lut, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Exp ////////////////////////
-
-typedef TestBaseWithParam<Size> ExpFixture;
-
-PERF_TEST_P(ExpFixture, Exp, OCL_TYPICAL_MAT_SIZES)
-{
-    // getting params
-    const Size srcSize = GetParam();
-    const double eps = 1e-6;
-
-    // creating src data
-    Mat src(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
-    declare.in(src).out(dst);
-    randu(src, 5, 16);
-
-    // select implementation
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
-
-        OCL_TEST_CYCLE() cv::ocl::exp(oclSrc, oclDst);
-
-        oclDst.download(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::exp(src, dst);
-    }
-    else
-        OCL_PERF_ELSE
-
-    SANITY_CHECK(dst, eps, ERROR_RELATIVE);
-}
-
-///////////// LOG ////////////////////////
-
-typedef TestBaseWithParam<Size> LogFixture;
-
-PERF_TEST_P(LogFixture, Log, OCL_TYPICAL_MAT_SIZES)
-{
-    // getting params
-    const Size srcSize = GetParam();
-    const double eps = 1e-6;
-
-    // creating src data
-    Mat src(srcSize, CV_32F), dst(srcSize, src.type());
-    randu(src, 1, 10);
-    declare.in(src).out(dst);
-
-    if (srcSize == OCL_SIZE_4000)
-        declare.time(3.6);
-
-    // select implementation
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
-
-        OCL_TEST_CYCLE() cv::ocl::log(oclSrc, oclDst);
-
-        oclDst.download(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::log(src, dst);
-    }
-    else
-        OCL_PERF_ELSE
-
-    SANITY_CHECK(dst, eps, ERROR_RELATIVE);
-}
-
-///////////// SQRT ///////////////////////
-
-typedef TestBaseWithParam<Size> SqrtFixture;
-
-PERF_TEST_P(SqrtFixture, Sqrt, OCL_TYPICAL_MAT_SIZES)
-{
-    // getting params
-    const Size srcSize = GetParam();
-    const double eps = 1e-6;
-
-    // creating src data
-    Mat src(srcSize, CV_32F), dst(srcSize, src.type());
-    randu(src, 0, 10);
-    declare.in(src).out(dst);
-
-    // select implementation
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
-
-        OCL_TEST_CYCLE() cv::ocl::sqrt(oclSrc, oclDst);
-
-        oclDst.download(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::sqrt(src, dst);
-    }
-    else
-        OCL_PERF_ELSE
-
-    SANITY_CHECK(dst, eps, ERROR_RELATIVE);
-}
-
-///////////// Add ////////////////////////
-
-typedef Size_MatType AddFixture;
-
-PERF_TEST_P(AddFixture, Add,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    // getting params
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    // creating src data
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    randu(src1, 0, 1);
-    randu(src2, 0, 1);
-    declare.in(src1, src2).out(dst);
-
-    // select implementation
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::add(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::add(src1, src2, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Mul ////////////////////////
-
-typedef Size_MatType MulFixture;
-
-PERF_TEST_P(MulFixture, Mul, ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                                                OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    // getting params
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    // creating src data
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-    declare.in(src1, src2).out(dst);
-
-    // select implementation
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::multiply(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::multiply(src1, src2, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Div ////////////////////////
-
-typedef Size_MatType DivFixture;
-
-PERF_TEST_P(DivFixture, Div,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    // getting params
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    // creating src data
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    declare.in(src1, src2).out(dst);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-
-    if ((srcSize == OCL_SIZE_4000 && type == CV_8UC1) ||
-            (srcSize == OCL_SIZE_2000 && type == CV_8UC4))
-        declare.time(4.2);
-    else if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
-        declare.time(16.6);
-
-    // select implementation
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::divide(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::divide(src1, src2, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Absdiff ////////////////////////
-
-typedef Size_MatType AbsDiffFixture;
-
-PERF_TEST_P(AbsDiffFixture, Absdiff,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    declare.in(src1, src2).in(dst);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::absdiff(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::absdiff(src1, src2, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// CartToPolar ////////////////////////
-
-typedef TestBaseWithParam<Size> CartToPolarFixture;
-
-PERF_TEST_P(CartToPolarFixture, CartToPolar, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-    const double eps = 8e-3;
-
-    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
-            dst1(srcSize, CV_32FC1), dst2(srcSize, CV_32FC1);
-    declare.in(src1, src2).out(dst1, dst2);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-
-    if (srcSize == OCL_SIZE_4000)
-        declare.time(3.6);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
-                oclDst1(srcSize, src1.type()), oclDst2(srcSize, src1.type());
-
-        OCL_TEST_CYCLE() cv::ocl::cartToPolar(oclSrc1, oclSrc2, oclDst1, oclDst2);
-
-        oclDst1.download(dst1);
-        oclDst2.download(dst2);
-
-        SANITY_CHECK(dst1, eps);
-        SANITY_CHECK(dst2, eps);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::cartToPolar(src1, src2, dst1, dst2);
-
-        SANITY_CHECK(dst1, eps);
-        SANITY_CHECK(dst2, eps);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// PolarToCart ////////////////////////
-
-typedef TestBaseWithParam<Size> PolarToCartFixture;
-
-PERF_TEST_P(PolarToCartFixture, PolarToCart, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
-            dst1(srcSize, CV_32FC1), dst2(srcSize, CV_32FC1);
-    declare.in(src1, src2).out(dst1, dst2);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-
-    if (srcSize == OCL_SIZE_4000)
-        declare.time(5.4);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
-                oclDst1(srcSize, src1.type()), oclDst2(srcSize, src1.type());
-
-        OCL_TEST_CYCLE() cv::ocl::polarToCart(oclSrc1, oclSrc2, oclDst1, oclDst2);
-
-        oclDst1.download(dst1);
-        oclDst2.download(dst2);
-
-        SANITY_CHECK(dst1, 5e-5);
-        SANITY_CHECK(dst2, 5e-5);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::polarToCart(src1, src2, dst1, dst2);
-
-        SANITY_CHECK(dst1, 5e-5);
-        SANITY_CHECK(dst2, 5e-5);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Magnitude ////////////////////////
-
-typedef TestBaseWithParam<Size> MagnitudeFixture;
-
-PERF_TEST_P(MagnitudeFixture, Magnitude, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
-            dst(srcSize, CV_32FC1);
-    randu(src1, 0, 1);
-    randu(src2, 0, 1);
-    declare.in(src1, src2).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
-                oclDst(srcSize, src1.type());
-
-        OCL_TEST_CYCLE() cv::ocl::magnitude(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1e-6);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::magnitude(src1, src2, dst);
-
-        SANITY_CHECK(dst, 1e-6);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Transpose ////////////////////////
-
-typedef Size_MatType TransposeFixture;
-
-PERF_TEST_P(TransposeFixture, Transpose,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::transpose(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::transpose(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Flip ////////////////////////
-
-typedef Size_MatType FlipFixture;
-
-PERF_TEST_P(FlipFixture, Flip,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::flip(oclSrc, oclDst, 0);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::flip(src, dst, 0);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// minMax ////////////////////////
-
-typedef Size_MatType minMaxFixture;
-
-PERF_TEST_P(minMaxFixture, minMax,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type);
-    declare.in(src, WARMUP_RNG);
-
-    double min_val = std::numeric_limits<double>::max(),
-            max_val = std::numeric_limits<double>::min();
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src);
-
-        OCL_TEST_CYCLE() cv::ocl::minMax(oclSrc, &min_val, &max_val);
-
-        ASSERT_GE(max_val, min_val);
-        SANITY_CHECK(min_val);
-        SANITY_CHECK(max_val);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        Point min_loc, max_loc;
-
-        TEST_CYCLE() cv::minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
-
-        ASSERT_GE(max_val, min_val);
-        SANITY_CHECK(min_val);
-        SANITY_CHECK(max_val);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// minMaxLoc ////////////////////////
-
-typedef Size_MatType minMaxLocFixture;
-
-PERF_TEST_P(minMaxLocFixture, minMaxLoc,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type);
-    randu(src, 0, 1);
-    declare.in(src);
-
-    double min_val = 0.0, max_val = 0.0;
-    Point min_loc, max_loc;
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src);
-
-        OCL_TEST_CYCLE() cv::ocl::minMaxLoc(oclSrc, &min_val, &max_val, &min_loc, &max_loc);
-
-        ASSERT_GE(max_val, min_val);
-        SANITY_CHECK(min_val);
-        SANITY_CHECK(max_val);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
-
-        ASSERT_GE(max_val, min_val);
-        SANITY_CHECK(min_val);
-        SANITY_CHECK(max_val);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Sum ////////////////////////
-
-typedef Size_MatType SumFixture;
-
-PERF_TEST_P(SumFixture, Sum,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32SC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type);
-    Scalar result;
-    randu(src, 0, 60);
-    declare.in(src);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src);
-
-        OCL_TEST_CYCLE() result = cv::ocl::sum(oclSrc);
-
-        SANITY_CHECK(result);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() result = cv::sum(src);
-
-        SANITY_CHECK(result);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// countNonZero ////////////////////////
-
-typedef Size_MatType countNonZeroFixture;
-
-PERF_TEST_P(countNonZeroFixture, countNonZero,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type);
-    int result = 0;
-    randu(src, 0, 256);
-    declare.in(src);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src);
-
-        OCL_TEST_CYCLE() result = cv::ocl::countNonZero(oclSrc);
-
-        SANITY_CHECK(result);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() result = cv::countNonZero(src);
-
-        SANITY_CHECK(result);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Phase ////////////////////////
-
-typedef TestBaseWithParam<Size> PhaseFixture;
-
-PERF_TEST_P(PhaseFixture, Phase, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
-            dst(srcSize, CV_32FC1);
-    declare.in(src1, src2).out(dst);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
-                oclDst(srcSize, src1.type());
-
-        OCL_TEST_CYCLE() cv::ocl::phase(oclSrc1, oclSrc2, oclDst, 1);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1e-2);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::phase(src1, src2, dst, 1);
-
-        SANITY_CHECK(dst, 1e-2);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// bitwise_and////////////////////////
-
-typedef Size_MatType BitwiseAndFixture;
-
-PERF_TEST_P(BitwiseAndFixture, bitwise_and,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32SC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    declare.in(src1, src2).out(dst);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type());
-
-        OCL_TEST_CYCLE() cv::ocl::bitwise_and(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::bitwise_and(src1, src2, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// bitwise_xor ////////////////////////
-
-typedef Size_MatType BitwiseXorFixture;
-
-PERF_TEST_P(BitwiseXorFixture, bitwise_xor,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32SC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    declare.in(src1, src2).out(dst);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type());
-
-        OCL_TEST_CYCLE() cv::ocl::bitwise_xor(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::bitwise_xor(src1, src2, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// bitwise_or ////////////////////////
-
-typedef Size_MatType BitwiseOrFixture;
-
-PERF_TEST_P(BitwiseOrFixture, bitwise_or,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32SC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    declare.in(src1, src2).out(dst);
-    randu(src1, 0, 256);
-    randu(src2, 0, 256);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type());
-
-        OCL_TEST_CYCLE() cv::ocl::bitwise_or(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::bitwise_or(src1, src2, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// bitwise_not////////////////////////
-
-typedef Size_MatType BitwiseNotFixture;
-
-PERF_TEST_P(BitwiseAndFixture, bitwise_not,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32SC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::bitwise_not(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::bitwise_not(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// compare////////////////////////
-
-typedef Size_MatType CompareFixture;
-
-PERF_TEST_P(CompareFixture, compare,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, CV_8UC1);
-    declare.in(src1, src2, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, CV_8UC1);
-
-        OCL_TEST_CYCLE() cv::ocl::compare(oclSrc1, oclSrc2, oclDst, CMP_EQ);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::compare(src1, src2, dst, CMP_EQ);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// pow ////////////////////////
-
-typedef TestBaseWithParam<Size> PowFixture;
-
-PERF_TEST_P(PowFixture, pow, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-    const double eps = 1e-6;
-
-    Mat src(srcSize, CV_32F), dst(srcSize, CV_32F);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
-
-        OCL_TEST_CYCLE() cv::ocl::pow(oclSrc, -2.0, oclDst);
-
-        oclDst.download(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::pow(src, -2.0, dst);
-    }
-    else
-        OCL_PERF_ELSE
-
-    SANITY_CHECK(dst, eps, ERROR_RELATIVE);
-}
-
-///////////// AddWeighted////////////////////////
-
-typedef Size_MatType AddWeightedFixture;
-
-PERF_TEST_P(AddWeightedFixture, AddWeighted,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    declare.in(src1, src2, WARMUP_RNG).out(dst);
-    double alpha = 2.0, beta = 1.0, gama = 3.0;
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::addWeighted(oclSrc1, alpha, oclSrc2, beta, gama, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::addWeighted(src1, alpha, src2, beta, gama, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Min ////////////////////////
-
-typedef Size_MatType MinFixture;
-
-PERF_TEST_P(MinFixture, Min,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    declare.in(src1, src2, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::min(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() dst = cv::min(src1, src2);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Max ////////////////////////
-
-typedef Size_MatType MaxFixture;
-
-PERF_TEST_P(MaxFixture, Max,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
-    declare.in(src1, src2, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::max(oclSrc1, oclSrc2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() dst = cv::max(src1, src2);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Max ////////////////////////
-
-typedef Size_MatType AbsFixture;
-
-PERF_TEST_P(AbsFixture, Abs,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::abs(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() dst = cv::abs(src);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Repeat ////////////////////////
-
-typedef Size_MatType RepeatFixture;
-
-PERF_TEST_P(RepeatFixture, Repeat,
-            ::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-    const int nx = 3, ny = 2;
-    const Size dstSize(srcSize.width * nx, srcSize.height * ny);
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-    checkDeviceMaxMemoryAllocSize(dstSize, type);
-
-    Mat src(srcSize, type), dst(dstSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::repeat(oclSrc, ny, nx, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::repeat(src, ny, nx, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
deleted file mode 100644
index 9509964..0000000
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using namespace std;
-using namespace cv::ocl;
-using namespace cv;
-using std::tr1::tuple;
-using std::tr1::get;
-
-#if defined(HAVE_XINE)         || \
-    defined(HAVE_GSTREAMER)    || \
-    defined(HAVE_QUICKTIME)    || \
-    defined(HAVE_AVFOUNDATION) || \
-    defined(HAVE_FFMPEG)       || \
-    defined(WIN32)
-
-#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
-#else
-#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
-#endif
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output)
-{
-    for(int i = 0; i< (int)(input.size()); i++)
-    {
-        cvtColor(input[i], output[i], COLOR_RGB2GRAY);
-    }
-}
-
-//prepare data for CPU
-static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer)
-{
-    cv::Mat frame;
-    std::vector<Mat> frame_buffer_init;
-    int nFrame = (int)frame_buffer.size();
-    for(int i = 0; i < nFrame; i++)
-    {
-        cap >> frame;
-        ASSERT_FALSE(frame.empty());
-        frame_buffer_init.push_back(frame);
-    }
-
-    if(cn == 1)
-        cvtFrameFmt(frame_buffer_init, frame_buffer);
-    else
-        frame_buffer = frame_buffer_init;
-}
-
-//copy CPU data to GPU
-static void prepareData(vector<Mat>& frame_buffer, vector<oclMat>& frame_buffer_ocl)
-{
-    for(int i = 0; i < (int)frame_buffer.size(); i++)
-        frame_buffer_ocl.push_back(cv::ocl::oclMat(frame_buffer[i]));
-}
-
-///////////// MOG ////////////////////////
-
-typedef tuple<string, int, double> VideoMOGParamType;
-typedef TestBaseWithParam<VideoMOGParamType> VideoMOGFixture;
-
-PERF_TEST_P(VideoMOGFixture, MOG,
-            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
-            ::testing::Values(1, 3),
-            ::testing::Values(0.0, 0.01)))
-{
-    VideoMOGParamType params = GetParam();
-
-    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
-    const int cn = get<1>(params);
-    const float learningRate = static_cast<float>(get<2>(params));
-
-    const int nFrame = 5;
-
-    Mat foreground_cpu;
-    std::vector<Mat> frame_buffer(nFrame);
-    std::vector<oclMat> frame_buffer_ocl;
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    prepareData(cap, cn, frame_buffer);
-
-    cv::Mat foreground;
-    cv::ocl::oclMat foreground_d;
-    if(RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE()
-        {
-            cv::Ptr<cv::BackgroundSubtractorMOG> mog = createBackgroundSubtractorMOG();
-            foreground.release();
-            for (int i = 0; i < nFrame; i++)
-            {
-                mog->apply(frame_buffer[i], foreground, learningRate);
-            }
-        }
-        SANITY_CHECK(foreground);
-    }
-    else if(RUN_OCL_IMPL)
-    {
-        prepareData(frame_buffer, frame_buffer_ocl);
-        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
-        OCL_TEST_CYCLE()
-        {
-            cv::ocl::MOG d_mog;
-            foreground_d.release();
-            for (int i = 0; i < nFrame; ++i)
-            {
-                d_mog(frame_buffer_ocl[i], foreground_d, learningRate);
-            }
-        }
-        foreground_d.download(foreground);
-        SANITY_CHECK(foreground);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// MOG2 ////////////////////////
-
-typedef tuple<string, int> VideoMOG2ParamType;
-typedef TestBaseWithParam<VideoMOG2ParamType> VideoMOG2Fixture;
-
-PERF_TEST_P(VideoMOG2Fixture, DISABLED_MOG2, // TODO Disabled: random hungs on buildslave
-            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
-            ::testing::Values(1, 3)))
-{
-    VideoMOG2ParamType params = GetParam();
-
-    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
-    const int cn = get<1>(params);
-    int nFrame = 5;
-
-    std::vector<cv::Mat> frame_buffer(nFrame);
-    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-    prepareData(cap, cn, frame_buffer);
-    cv::Mat foreground;
-    cv::ocl::oclMat foreground_d;
-
-    if(RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE()
-        {
-            cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
-            mog2->setDetectShadows(false);
-            foreground.release();
-
-            for (int i = 0; i < nFrame; i++)
-            {
-                mog2->apply(frame_buffer[i], foreground);
-            }
-        }
-        SANITY_CHECK(foreground);
-    }
-    else if(RUN_OCL_IMPL)
-    {
-        prepareData(frame_buffer, frame_buffer_ocl);
-        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
-        OCL_TEST_CYCLE()
-        {
-            cv::ocl::MOG2 d_mog2;
-            foreground_d.release();
-            for (int i = 0; i < nFrame; i++)
-            {
-                d_mog2(frame_buffer_ocl[i], foreground_d);
-            }
-        }
-        foreground_d.download(foreground);
-        SANITY_CHECK(foreground);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// MOG2_GetBackgroundImage //////////////////
-
-typedef TestBaseWithParam<VideoMOG2ParamType> Video_MOG2GetBackgroundImage;
-
-PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
-            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
-            ::testing::Values(3)))
-{
-    VideoMOG2ParamType params = GetParam();
-
-    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
-    const int cn = get<1>(params);
-    int nFrame = 5;
-
-    std::vector<cv::Mat> frame_buffer(nFrame);
-    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    prepareData(cap, cn, frame_buffer);
-
-    cv::Mat foreground;
-    cv::Mat background;
-    cv::ocl::oclMat foreground_d;
-    cv::ocl::oclMat background_d;
-
-    if(RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE()
-        {
-            cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
-            mog2->setDetectShadows(false);
-            foreground.release();
-            background.release();
-            for (int i = 0; i < nFrame; i++)
-            {
-                mog2->apply(frame_buffer[i], foreground);
-            }
-            mog2->getBackgroundImage(background);
-        }
-        SANITY_CHECK(background);
-    }
-    else if(RUN_OCL_IMPL)
-    {
-        prepareData(frame_buffer, frame_buffer_ocl);
-        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
-        OCL_TEST_CYCLE()
-        {
-            cv::ocl::MOG2 d_mog2;
-            foreground_d.release();
-            background_d.release();
-            for (int i = 0; i < nFrame; i++)
-            {
-                d_mog2(frame_buffer_ocl[i], foreground_d);
-            }
-            d_mog2.getBackgroundImage(background_d);
-        }
-        background_d.download(background);
-        SANITY_CHECK(background);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-#endif
diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp
deleted file mode 100644
index 6f611bb..0000000
--- a/modules/ocl/perf/perf_blend.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using namespace cv;
-using std::tr1::get;
-
-///////////// blend ////////////////////////
-
-template <typename T>
-static void blendLinearGold(const Mat &img1, const Mat &img2,
-                            const Mat &weights1, const Mat &weights2,
-                            Mat &result_gold)
-{
-    CV_Assert(img1.size() == img2.size() && img1.type() == img2.type());
-    CV_Assert(weights1.size() == weights2.size() && weights1.size() == img1.size() &&
-              weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);
-
-    result_gold.create(img1.size(), img1.type());
-
-    int cn = img1.channels();
-    int step1 = img1.cols * img1.channels();
-
-    for (int y = 0; y < img1.rows; ++y)
-    {
-        const float * const weights1_row = weights1.ptr<float>(y);
-        const float * const weights2_row = weights2.ptr<float>(y);
-        const T * const img1_row = img1.ptr<T>(y);
-        const T * const img2_row = img2.ptr<T>(y);
-        T * const result_gold_row = result_gold.ptr<T>(y);
-
-        for (int x = 0; x < step1; ++x)
-        {
-            int x1 = x / cn;
-            float w1 = weights1_row[x1], w2 = weights2_row[x1];
-            result_gold_row[x] = saturate_cast<T>(((float)img1_row[x] * w1
-                                                 + (float)img2_row[x] * w2) / (w1 + w2 + 1e-5f));
-        }
-    }
-}
-
-typedef void (*blendFunction)(const Mat &img1, const Mat &img2,
-                              const Mat &weights1, const Mat &weights2,
-                              Mat &result_gold);
-
-typedef Size_MatType blendLinearFixture;
-
-PERF_TEST_P(blendLinearFixture, blendLinear, ::testing::Combine(
-                OCL_TYPICAL_MAT_SIZES, testing::Values(CV_8UC1, CV_8UC3, CV_32FC1)))
-{
-    Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int srcType = get<1>(params);
-    const double eps = CV_MAT_DEPTH(srcType) <= CV_32S ? 1.0 : 0.2;
-
-    Mat src1(srcSize, srcType), src2(srcSize, srcType), dst(srcSize, srcType);
-    Mat weights1(srcSize, CV_32FC1), weights2(srcSize, CV_32FC1);
-
-    declare.in(src1, src2, WARMUP_RNG).out(dst);
-    randu(weights1, 0.0f, 1.0f);
-    randu(weights2, 0.0f, 1.0f);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst;
-        ocl::oclMat oclWeights1(weights1), oclWeights2(weights2);
-
-        OCL_TEST_CYCLE() ocl::blendLinear(oclSrc1, oclSrc2, oclWeights1, oclWeights2, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, eps);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        blendFunction funcs[] = { (blendFunction)blendLinearGold<uchar>, (blendFunction)blendLinearGold<float> };
-        int funcIdx = CV_MAT_DEPTH(srcType) == CV_8UC1 ? 0 : 1;
-
-        TEST_CYCLE() (funcs[funcIdx])(src1, src2, weights1, weights2, dst);
-
-        SANITY_CHECK(dst, eps);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_brief.cpp b/modules/ocl/perf/perf_brief.cpp
deleted file mode 100644
index de1f4f9..0000000
--- a/modules/ocl/perf/perf_brief.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Matthias Bady, aegirxx ==> gmail.com
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace std;
-using namespace cv;
-using namespace ocl;
-using namespace perf;
-
-///////////// BRIEF ////////////////////////
-typedef TestBaseWithParam<std::tr1::tuple<std::string, int, size_t> > OCL_BRIEF;
-
-PERF_TEST_P( OCL_BRIEF, extract, testing::Combine(
-                                                   testing::Values( string( "gpu/opticalflow/rubberwhale1.png" ),
-                                                                    string( "gpu/stereobm/aloe-L.png" )
-                                                                    ), testing::Values( 16, 32, 64 ), testing::Values( 250, 500, 1000, 2500, 3000 ) ) )
-{
-    const std::string filename = std::tr1::get<0>(GetParam( ));
-    const int bytes = std::tr1::get<1>(GetParam( ));
-    const size_t numKp = std::tr1::get<2>(GetParam( ));
-
-    Mat img = imread( getDataPath( filename ), IMREAD_GRAYSCALE );
-    ASSERT_TRUE( !img.empty( ) ) << "no input image";
-
-    int threshold = 15;
-    std::vector<KeyPoint> keypoints;
-    while (threshold > 0 && keypoints.size( ) < numKp)
-    {
-        FastFeatureDetector fast( threshold );
-        fast.detect( img, keypoints, Mat( ) );
-        threshold -= 5;
-        KeyPointsFilter::runByImageBorder( keypoints, img.size( ), BRIEF_OCL::getBorderSize( ) );
-    }
-    ASSERT_TRUE( keypoints.size( ) >= numKp ) << "not enough keypoints";
-    keypoints.resize( numKp );
-
-    if ( RUN_OCL_IMPL )
-    {
-        Mat kpMat( 2, int( keypoints.size() ), CV_32FC1 );
-        for ( size_t i = 0; i < keypoints.size( ); ++i )
-        {
-            kpMat.col( int( i ) ).row( 0 ) = keypoints[i].pt.x;
-            kpMat.col( int( i ) ).row( 1 ) = keypoints[i].pt.y;
-        }
-        BRIEF_OCL brief( bytes );
-        oclMat imgCL( img ), keypointsCL(kpMat), mask;
-        while (next( ))
-        {
-            startTimer( );
-            oclMat descriptorsCL;
-            brief.compute( imgCL, keypointsCL, mask, descriptorsCL );
-            cv::ocl::finish( );
-            stopTimer( );
-        }
-        SANITY_CHECK_NOTHING( )
-    }
-    else if ( RUN_PLAIN_IMPL )
-    {
-        BriefDescriptorExtractor brief( bytes );
-
-        while (next( ))
-        {
-            startTimer( );
-            Mat descriptors;
-            brief.compute( img, keypoints, descriptors );
-            stopTimer( );
-        }
-        SANITY_CHECK_NOTHING( )
-    }
-    else
-        OCL_PERF_ELSE;
-}
\ No newline at end of file
diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp
deleted file mode 100644
index d124428..0000000
--- a/modules/ocl/perf/perf_brute_force_matcher.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-
-#define OCL_BFMATCHER_TYPICAL_MAT_SIZES ::testing::Values(cv::Size(128, 500), cv::Size(128, 1000), cv::Size(128, 2000))
-
-//////////////////// BruteForceMatch /////////////////
-
-typedef TestBaseWithParam<Size> BruteForceMatcherFixture;
-
-PERF_TEST_P(BruteForceMatcherFixture, match,
-            OCL_BFMATCHER_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    vector<DMatch> matches;
-    Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
-    declare.in(query, train).time(srcSize.height == 2000 ? 9 : 4 );
-    randu(query, 0.0f, 1.0f);
-    randu(train, 0.0f, 1.0f);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        BFMatcher matcher(NORM_L2);
-        TEST_CYCLE() matcher.match(query, train, matches);
-
-        SANITY_CHECK_MATCHES(matches);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
-        ocl::oclMat oclQuery(query), oclTrain(train);
-        ocl::oclMat oclTrainIdx, oclDistance;
-
-        OCL_TEST_CYCLE()
-            oclMatcher.matchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance);
-
-        oclMatcher.matchDownload(oclTrainIdx, oclDistance, matches);
-
-        SANITY_CHECK_MATCHES(matches, 1e-5);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-PERF_TEST_P(BruteForceMatcherFixture, knnMatch,
-            OCL_BFMATCHER_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    vector<vector<DMatch> > matches(2);
-    Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
-    randu(query, 0.0f, 1.0f);
-    randu(train, 0.0f, 1.0f);
-
-    declare.in(query, train);
-    if (srcSize.height == 2000)
-        declare.time(9);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        BFMatcher matcher(NORM_L2);
-        TEST_CYCLE() matcher.knnMatch(query, train, matches, 2);
-
-        std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
-        SANITY_CHECK_MATCHES(matches0);
-        SANITY_CHECK_MATCHES(matches1);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
-        ocl::oclMat oclQuery(query), oclTrain(train);
-        ocl::oclMat oclTrainIdx, oclDistance, oclAllDist;
-
-        OCL_TEST_CYCLE()
-                oclMatcher.knnMatchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance, oclAllDist, 2);
-
-        oclMatcher.knnMatchDownload(oclTrainIdx, oclDistance, matches);
-
-        std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
-        SANITY_CHECK_MATCHES(matches0, 1e-5);
-        SANITY_CHECK_MATCHES(matches1, 1e-5);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-PERF_TEST_P(BruteForceMatcherFixture, radiusMatch,
-            OCL_BFMATCHER_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    const float max_distance = 2.0f;
-    vector<vector<DMatch> > matches(2);
-    Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
-    declare.in(query, train);
-
-    randu(query, 0.0f, 1.0f);
-    randu(train, 0.0f, 1.0f);
-
-    if (srcSize.height == 2000)
-        declare.time(9.15);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        cv::BFMatcher matcher(NORM_L2);
-        TEST_CYCLE() matcher.radiusMatch(query, train, matches, max_distance);
-
-        std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
-        SANITY_CHECK_MATCHES(matches0);
-        SANITY_CHECK_MATCHES(matches1);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclQuery(query), oclTrain(train);
-        ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
-        ocl::oclMat oclTrainIdx, oclDistance, oclNMatches;
-
-        OCL_TEST_CYCLE()
-                oclMatcher.radiusMatchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance, oclNMatches, max_distance);
-
-        oclMatcher.radiusMatchDownload(oclTrainIdx, oclDistance, oclNMatches, matches);
-
-        std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
-        SANITY_CHECK_MATCHES(matches0);
-        SANITY_CHECK_MATCHES(matches1);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-#undef OCL_BFMATCHER_TYPICAL_MAT_SIZES
diff --git a/modules/ocl/perf/perf_calib3d.cpp b/modules/ocl/perf/perf_calib3d.cpp
deleted file mode 100644
index 12fee54..0000000
--- a/modules/ocl/perf/perf_calib3d.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-///////////// StereoMatchBM ////////////////////////
-
-PERF_TEST(StereoMatchBMFixture, StereoMatchBM)
-{
-    Mat left_image = imread(getDataPath("gpu/stereobm/aloe-L.png"), cv::IMREAD_GRAYSCALE);
-    Mat right_image = imread(getDataPath("gpu/stereobm/aloe-R.png"), cv::IMREAD_GRAYSCALE);
-
-    ASSERT_TRUE(!left_image.empty()) << "no input image";
-    ASSERT_TRUE(!right_image.empty()) << "no input image";
-    ASSERT_TRUE(right_image.size() == left_image.size());
-    ASSERT_TRUE(right_image.size() == left_image.size());
-
-    const int n_disp = 128, winSize = 19;
-    Mat disp(left_image.size(), CV_16SC1);
-
-    declare.in(left_image, right_image).out(disp);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclLeft(left_image), oclRight(right_image),
-                oclDisp(left_image.size(), CV_16SC1);
-        ocl::StereoBM_OCL oclBM(0, n_disp, winSize);
-
-        OCL_TEST_CYCLE() oclBM(oclLeft, oclRight, oclDisp);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
-
-        TEST_CYCLE() bm->compute(left_image, right_image, disp);
-    }
-    else
-        OCL_PERF_ELSE
-
-    int value = 0;
-    SANITY_CHECK(value);
-}
diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp
deleted file mode 100644
index 33723da..0000000
--- a/modules/ocl/perf/perf_canny.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-
-///////////// Canny ////////////////////////
-
-PERF_TEST(CannyFixture, Canny)
-{
-    Mat img = imread(getDataPath("gpu/stereobm/aloe-L.png"), cv::IMREAD_GRAYSCALE),
-            edges(img.size(), CV_8UC1);
-    ASSERT_TRUE(!img.empty()) << "can't open aloe-L.png";
-
-    declare.in(img).out(edges);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclImg(img), oclEdges(img.size(), CV_8UC1);
-
-        OCL_TEST_CYCLE() ocl::Canny(oclImg, oclEdges, 50.0, 100.0);
-        oclEdges.download(edges);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() Canny(img, edges, 50.0, 100.0);
-    }
-    else
-        OCL_PERF_ELSE
-
-    int value = 0;
-    SANITY_CHECK(value);
-}
diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp
deleted file mode 100644
index 1145f1f..0000000
--- a/modules/ocl/perf/perf_color.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-using std::tr1::make_tuple;
-
-///////////// cvtColor////////////////////////
-
-CV_ENUM(ConversionTypes, COLOR_RGB2GRAY, COLOR_RGB2BGR, COLOR_RGB2YUV, COLOR_YUV2RGB, COLOR_RGB2YCrCb,
-        COLOR_YCrCb2RGB, COLOR_RGB2XYZ, COLOR_XYZ2RGB, COLOR_RGB2HSV, COLOR_HSV2RGB, COLOR_RGB2HLS,
-        COLOR_HLS2RGB, COLOR_BGR5652BGR, COLOR_BGR2BGR565, COLOR_RGBA2mRGBA, COLOR_mRGBA2RGBA, COLOR_YUV2RGB_NV12)
-
-typedef tuple<Size, tuple<ConversionTypes, int, int> > cvtColorParams;
-typedef TestBaseWithParam<cvtColorParams> cvtColorFixture;
-
-PERF_TEST_P(cvtColorFixture, cvtColor, testing::Combine(
-                testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)),
-                testing::Values(
-                    make_tuple(ConversionTypes(COLOR_RGB2GRAY), 3, 1),
-                    make_tuple(ConversionTypes(COLOR_RGB2BGR), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_RGB2YUV), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_YUV2RGB), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_RGB2YCrCb), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_YCrCb2RGB), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_RGB2XYZ), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_XYZ2RGB), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_RGB2HSV), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_HSV2RGB), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_RGB2HLS), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_HLS2RGB), 3, 3),
-                    make_tuple(ConversionTypes(COLOR_BGR5652BGR), 2, 3),
-                    make_tuple(ConversionTypes(COLOR_BGR2BGR565), 3, 2),
-                    make_tuple(ConversionTypes(COLOR_RGBA2mRGBA), 4, 4),
-                    make_tuple(ConversionTypes(COLOR_mRGBA2RGBA), 4, 4),
-                    make_tuple(ConversionTypes(COLOR_YUV2RGB_NV12), 1, 3)
-                    )))
-{
-    cvtColorParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const tuple<int, int, int> conversionParams = get<1>(params);
-    const int code = get<0>(conversionParams), scn = get<1>(conversionParams),
-            dcn = get<2>(conversionParams);
-
-    Mat src(srcSize, CV_8UC(scn)), dst(srcSize, CV_8UC(scn));
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(src.size(), dst.type());
-
-        OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, code, dcn);
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::cvtColor(src, dst, code, dcn);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_fast.cpp b/modules/ocl/perf/perf_fast.cpp
deleted file mode 100644
index e5ac848..0000000
--- a/modules/ocl/perf/perf_fast.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Peter Andreas Entschev, peter@entschev.com
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace perf;
-
-///////////// FAST ////////////////////////
-
-typedef std::tr1::tuple<std::string, int, bool> Image_Threshold_NonmaxSupression_t;
-typedef perf::TestBaseWithParam<Image_Threshold_NonmaxSupression_t> Image_Threshold_NonmaxSupression;
-
-PERF_TEST_P(Image_Threshold_NonmaxSupression, FAST,
-            testing::Combine(testing::Values<string>("gpu/perf/aloe.png"),
-                    testing::Values(20),
-                    testing::Bool()))
-{
-    const Image_Threshold_NonmaxSupression_t params = GetParam();
-    const std::string imgFile = std::tr1::get<0>(params);
-    const int threshold = std::tr1::get<1>(params);
-    const bool nonmaxSupression = std::tr1::get<2>(params);
-
-    const cv::Mat img = imread(getDataPath(imgFile), cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    if (RUN_OCL_IMPL)
-    {
-        cv::ocl::FAST_OCL fast(threshold, nonmaxSupression, 0.5);
-
-        cv::ocl::oclMat d_img(img);
-        cv::ocl::oclMat d_keypoints;
-
-        OCL_TEST_CYCLE() fast(d_img, cv::ocl::oclMat(), d_keypoints);
-
-        std::vector<cv::KeyPoint> ocl_keypoints;
-        fast.downloadKeypoints(d_keypoints, ocl_keypoints);
-
-        sortKeyPoints(ocl_keypoints);
-
-        SANITY_CHECK_KEYPOINTS(ocl_keypoints);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        std::vector<cv::KeyPoint> cpu_keypoints;
-
-        TEST_CYCLE() cv::FAST(img, cpu_keypoints, threshold, nonmaxSupression);
-
-        SANITY_CHECK_KEYPOINTS(cpu_keypoints);
-    }
-    else
-        OCL_PERF_ELSE;
-}
diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp
deleted file mode 100644
index 49da659..0000000
--- a/modules/ocl/perf/perf_fft.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace perf;
-
-///////////// dft ////////////////////////
-
-typedef TestBaseWithParam<Size> dftFixture;
-
-#ifdef HAVE_CLAMDFFT
-
-PERF_TEST_P(dftFixture, dft, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    Mat src(srcSize, CV_32FC2), dst;
-    randu(src, 0.0f, 1.0f);
-    declare.in(src);
-
-    if (srcSize == OCL_SIZE_4000)
-        declare.time(7.4);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst;
-
-        OCL_TEST_CYCLE() cv::ocl::dft(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1.5);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::dft(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-#endif
diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp
deleted file mode 100644
index 7e5389d..0000000
--- a/modules/ocl/perf/perf_filters.cpp
+++ /dev/null
@@ -1,416 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::get;
-using std::tr1::tuple;
-
-///////////// Blur////////////////////////
-
-typedef Size_MatType BlurFixture;
-
-PERF_TEST_P(BlurFixture, Blur,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params), ksize(3, 3);
-    const int type = get<1>(params), bordertype = BORDER_CONSTANT;
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
-        declare.time(5);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::blur(oclSrc, oclDst, ksize, Point(-1, -1), bordertype);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::blur(src, dst, ksize, Point(-1, -1), bordertype);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Laplacian////////////////////////
-
-typedef Size_MatType LaplacianFixture;
-
-PERF_TEST_P(LaplacianFixture, Laplacian,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), ksize = 3;
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
-        declare.time(6);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::Laplacian(oclSrc, oclDst, -1, ksize, 1);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::Laplacian(src, dst, -1, ksize, 1);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Erode ////////////////////
-
-typedef Size_MatType ErodeFixture;
-
-PERF_TEST_P(ErodeFixture, Erode,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), ksize = 3;
-    const Mat ker = getStructuringElement(MORPH_RECT, Size(ksize, ksize));
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst).in(ker);
-
-    if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
-        declare.time(5);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type), oclKer(ker);
-
-        OCL_TEST_CYCLE() cv::ocl::erode(oclSrc, oclDst, oclKer);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::erode(src, dst, ker);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Sobel ////////////////////////
-
-typedef Size_MatType SobelFixture;
-
-PERF_TEST_P(SobelFixture, Sobel,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), dx = 1, dy = 1;
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type, sizeof(float) * 2);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if ((srcSize == OCL_SIZE_2000 && type == CV_8UC4) ||
-            (srcSize == OCL_SIZE_4000 && type == CV_8UC1))
-        declare.time(5.5);
-    else if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
-        declare.time(20);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::Sobel(oclSrc, oclDst, -1, dx, dy);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::Sobel(src, dst, -1, dx, dy);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Scharr ////////////////////////
-
-typedef Size_MatType ScharrFixture;
-
-PERF_TEST_P(ScharrFixture, Scharr,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), dx = 1, dy = 0;
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type, sizeof(float) * 2);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if ((srcSize == OCL_SIZE_2000 && type == CV_8UC4) ||
-            (srcSize == OCL_SIZE_4000 && type == CV_8UC1))
-        declare.time(5.5);
-    else if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
-        declare.time(21);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::Scharr(oclSrc, oclDst, -1, dx, dy);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::Scharr(src, dst, -1, dx, dy);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// GaussianBlur ////////////////////////
-
-typedef Size_MatType GaussianBlurFixture;
-
-PERF_TEST_P(GaussianBlurFixture, GaussianBlur,
-            ::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), ksize = 7;
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    const double eps = src.depth() == CV_8U ? 1 + DBL_EPSILON : 3e-4;
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::GaussianBlur(oclSrc, oclDst, Size(ksize, ksize), 0);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, eps);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::GaussianBlur(src, dst, Size(ksize, ksize), 0);
-
-        SANITY_CHECK(dst, eps);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// filter2D////////////////////////
-
-typedef Size_MatType filter2DFixture;
-
-PERF_TEST_P(filter2DFixture, filter2D,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), ksize = 3;
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type), dst(srcSize, type), kernel(ksize, ksize, CV_32SC1);
-    declare.in(src, WARMUP_RNG).in(kernel).out(dst);
-    randu(kernel, -3.0, 3.0);
-
-    if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
-        declare.time(8);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type), oclKernel(kernel);
-
-        OCL_TEST_CYCLE() cv::ocl::filter2D(oclSrc, oclDst, -1, oclKernel);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Bilateral////////////////////////
-
-typedef Size_MatType BilateralFixture;
-
-PERF_TEST_P(BilateralFixture, Bilateral,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), d = 7;
-    const double sigmacolor = 50.0, sigmaspace = 50.0;
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (srcSize == OCL_SIZE_4000)
-        declare.time(type == CV_8UC3 ? 8 : 4.5);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::bilateralFilter(oclSrc, oclDst, d, sigmacolor, sigmaspace);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::bilateralFilter(src, dst, d, sigmacolor, sigmaspace);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// adaptiveBilateral////////////////////////
-
-typedef Size_MatType adaptiveBilateralFixture;
-
-PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
-            ::testing::Combine(::testing::Values(OCL_SIZE_1000), OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-    const double sigmaspace = 10.0;
-    Size ksize(9, 9);
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::adaptiveBilateralFilter(oclSrc, oclDst, ksize, sigmaspace);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1.0);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::adaptiveBilateralFilter(src, dst, ksize, sigmaspace);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp
deleted file mode 100644
index 4dcd5d4..0000000
--- a/modules/ocl/perf/perf_gemm.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-
-///////////// gemm ////////////////////////
-
-typedef TestBaseWithParam<Size> gemmFixture;
-
-#ifdef HAVE_CLAMDBLAS
-
-PERF_TEST_P(gemmFixture, gemm, ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000))
-{
-    const Size srcSize = GetParam();
-
-    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
-            src3(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
-    declare.in(src1, src2, src3).out(dst).time(srcSize == OCL_SIZE_2000 ? 65 : 8);
-    randu(src1, -10.0f, 10.0f);
-    randu(src2, -10.0f, 10.0f);
-    randu(src3, -10.0f, 10.0f);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
-                oclSrc3(src3), oclDst(srcSize, CV_32FC1);
-
-        OCL_TEST_CYCLE() cv::ocl::gemm(oclSrc1, oclSrc2, 1.0, oclSrc3, 1.0, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 0.01);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst);
-
-        SANITY_CHECK(dst, 0.01);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-#endif
diff --git a/modules/ocl/perf/perf_gftt.cpp b/modules/ocl/perf/perf_gftt.cpp
deleted file mode 100644
index af24c34..0000000
--- a/modules/ocl/perf/perf_gftt.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-
-///////////// GoodFeaturesToTrack ////////////////////////
-
-typedef tuple<string, double> GoodFeaturesToTrackParams;
-typedef TestBaseWithParam<GoodFeaturesToTrackParams> GoodFeaturesToTrackFixture;
-
-PERF_TEST_P(GoodFeaturesToTrackFixture, GoodFeaturesToTrack,
-            ::testing::Combine(::testing::Values(string("gpu/opticalflow/rubberwhale1.png"),
-                                                 string("gpu/stereobm/aloe-L.png")),
-                               ::testing::Range(0.0, 4.0, 3.0)))
-{
-
-    const GoodFeaturesToTrackParams param = GetParam();
-    const string fileName = getDataPath(get<0>(param));
-    const int maxCorners = 2000;
-    const double qualityLevel = 0.01, minDistance = get<1>(param);
-
-    Mat frame = imread(fileName, IMREAD_GRAYSCALE);
-    ASSERT_TRUE(!frame.empty()) << "no input image";
-
-    vector<Point2f> pts_gold;
-    declare.in(frame);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclFrame(frame), pts_oclmat;
-        ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
-
-        OCL_TEST_CYCLE() detector(oclFrame, pts_oclmat);
-
-        detector.downloadPoints(pts_oclmat, pts_gold);
-
-        SANITY_CHECK(pts_gold);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::goodFeaturesToTrack(frame, pts_gold,
-                                             maxCorners, qualityLevel, minDistance);
-
-        SANITY_CHECK(pts_gold);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp
deleted file mode 100644
index dd888ac..0000000
--- a/modules/ocl/perf/perf_haar.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-#include "opencv2/objdetect/objdetect_c.h"
-
-using namespace perf;
-
-///////////// Haar ////////////////////////
-PERF_TEST(HaarFixture, Haar)
-{
-    vector<Rect> faces;
-
-    Mat img = imread(getDataPath("gpu/haarcascade/basketball1.png"), IMREAD_GRAYSCALE);
-    ASSERT_TRUE(!img.empty()) << "can't open basketball1.png";
-    declare.in(img);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        CascadeClassifier faceCascade;
-        ASSERT_TRUE(faceCascade.load(getDataPath("gpu/haarcascade/haarcascade_frontalface_alt.xml")))
-                << "can't load haarcascade_frontalface_alt.xml";
-
-        TEST_CYCLE() faceCascade.detectMultiScale(img, faces,
-                                                     1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
-
-        SANITY_CHECK(faces, 4 + 1e-4);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::OclCascadeClassifier faceCascade;
-        ocl::oclMat oclImg(img);
-
-        ASSERT_TRUE(faceCascade.load(getDataPath("gpu/haarcascade/haarcascade_frontalface_alt.xml")))
-                << "can't load haarcascade_frontalface_alt.xml";
-
-        OCL_TEST_CYCLE() faceCascade.detectMultiScale(oclImg, faces,
-                                     1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
-
-        SANITY_CHECK(faces, 4 + 1e-4);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-using namespace std;
-using namespace cv;
-using namespace perf;
-using std::tr1::make_tuple;
-using std::tr1::get;
-
-typedef std::tr1::tuple<std::string, std::string, int> OCL_Cascade_Image_MinSize_t;
-typedef perf::TestBaseWithParam<OCL_Cascade_Image_MinSize_t> OCL_Cascade_Image_MinSize;
-
-PERF_TEST_P( OCL_Cascade_Image_MinSize, CascadeClassifier,
-             testing::Combine(
-                testing::Values( string("cv/cascadeandhog/cascades/haarcascade_frontalface_alt.xml") ),
-                testing::Values( string("cv/shared/lena.png"),
-                                 string("cv/cascadeandhog/images/bttf301.png")/*,
-                                 string("cv/cascadeandhog/images/class57.png")*/ ),
-                testing::Values(30, 64, 90) ) )
-{
-    const string cascasePath = get<0>(GetParam());
-    const string imagePath   = get<1>(GetParam());
-    const int min_size = get<2>(GetParam());
-    Size minSize(min_size, min_size);
-    vector<Rect> faces;
-
-    Mat img = imread(getDataPath(imagePath), IMREAD_GRAYSCALE);
-    ASSERT_TRUE(!img.empty()) << "Can't load source image: " << getDataPath(imagePath);
-    equalizeHist(img, img);
-    declare.in(img);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        CascadeClassifier cc;
-        ASSERT_TRUE(cc.load(getDataPath(cascasePath))) << "Can't load cascade file: " << getDataPath(cascasePath);
-
-        while (next())
-        {
-            faces.clear();
-
-            startTimer();
-            cc.detectMultiScale(img, faces, 1.1, 3, 0, minSize);
-            stopTimer();
-        }
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat uimg(img);
-        ocl::OclCascadeClassifier cc;
-        ASSERT_TRUE(cc.load(getDataPath(cascasePath))) << "Can't load cascade file: " << getDataPath(cascasePath);
-
-        while (next())
-        {
-            faces.clear();
-            ocl::finish();
-
-            startTimer();
-            cc.detectMultiScale(uimg, faces, 1.1, 3, 0, minSize);
-            stopTimer();
-        }
-    }
-    else
-        OCL_PERF_ELSE
-
-        //sort(faces.begin(), faces.end(), comparators::RectLess());
-        SANITY_CHECK_NOTHING();//(faces, min_size/5);
-        // using SANITY_CHECK_NOTHING() since OCL and PLAIN version may find different faces number
-}
diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp
deleted file mode 100644
index 2a67311..0000000
--- a/modules/ocl/perf/perf_hog.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-
-///////////// HOG////////////////////////
-
-struct RectLess :
-        public std::binary_function<cv::Rect, cv::Rect, bool>
-{
-    bool operator()(const cv::Rect& a,
-        const cv::Rect& b) const
-    {
-        if (a.x != b.x)
-            return a.x < b.x;
-        else if (a.y != b.y)
-            return a.y < b.y;
-        else if (a.width != b.width)
-            return a.width < b.width;
-        else
-            return a.height < b.height;
-    }
-};
-
-PERF_TEST(HOGFixture, HOG)
-{
-    Mat src = imread(getDataPath("gpu/hog/road.png"), cv::IMREAD_GRAYSCALE);
-    ASSERT_TRUE(!src.empty()) << "can't open input image road.png";
-
-    vector<cv::Rect> found_locations;
-    declare.in(src).time(5);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        HOGDescriptor hog;
-        hog.setSVMDetector(hog.getDefaultPeopleDetector());
-
-        TEST_CYCLE() hog.detectMultiScale(src, found_locations);
-
-        std::sort(found_locations.begin(), found_locations.end(), RectLess());
-        SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::HOGDescriptor ocl_hog;
-        ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
-        ocl::oclMat oclSrc(src);
-
-        OCL_TEST_CYCLE() ocl_hog.detectMultiScale(oclSrc, found_locations);
-
-        std::sort(found_locations.begin(), found_locations.end(), RectLess());
-        SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_hough.cpp b/modules/ocl/perf/perf_hough.cpp
deleted file mode 100644
index e90356a..0000000
--- a/modules/ocl/perf/perf_hough.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace perf;
-
-//////////////////////////////////////////////////////////////////////
-// HoughCircles
-
-typedef std::tr1::tuple<cv::Size, float, float> Size_Dp_MinDist_t;
-typedef perf::TestBaseWithParam<Size_Dp_MinDist_t> Size_Dp_MinDist;
-
-PERF_TEST_P(Size_Dp_MinDist, OCL_HoughCircles,
-            testing::Combine(
-                testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p),
-                testing::Values(1.0f, 2.0f, 4.0f),
-                testing::Values(1.0f, 10.0f)))
-{
-    const Size_Dp_MinDist_t params = GetParam();
-    const cv::Size size = std::tr1::get<0>(params);
-    const float dp      = std::tr1::get<1>(params);
-    const float minDist = std::tr1::get<2>(params);
-
-    const int minRadius = 10;
-    const int maxRadius = 30;
-    const int cannyThreshold = 100;
-    const int votesThreshold = 15;
-
-    cv::RNG rng(123456789);
-
-    cv::Mat src(size, CV_8UC1, cv::Scalar::all(0)), circles;
-
-    const int numCircles = rng.uniform(50, 100);
-    for (int i = 0; i < numCircles; ++i)
-    {
-        cv::Point center(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
-        const int radius = rng.uniform(minRadius, maxRadius + 1);
-
-        cv::circle(src, center, radius, cv::Scalar::all(255), -1);
-    }
-
-    declare.time(10.0).iterations(25);
-
-    if (RUN_OCL_IMPL)
-    {
-        cv::ocl::oclMat ocl_src(src), ocl_circles;
-
-        OCL_TEST_CYCLE() cv::ocl::HoughCircles(ocl_src, ocl_circles, HOUGH_GRADIENT, dp, minDist,
-                                               cannyThreshold, votesThreshold, minRadius, maxRadius);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::HoughCircles(src, circles, HOUGH_GRADIENT, dp, minDist, cannyThreshold,
-                                      votesThreshold, minRadius, maxRadius);
-    }
-    else
-        OCL_PERF_ELSE
-
-    int value = 0;
-    SANITY_CHECK(value);
-}
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp
deleted file mode 100644
index 51b354f..0000000
--- a/modules/ocl/perf/perf_imgproc.cpp
+++ /dev/null
@@ -1,737 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-
-///////////// equalizeHist ////////////////////////
-
-typedef TestBaseWithParam<Size> equalizeHistFixture;
-
-PERF_TEST_P(equalizeHistFixture, equalizeHist, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-    const double eps = 1 + DBL_EPSILON;
-
-    Mat src(srcSize, CV_8UC1), dst(srcSize, CV_8UC1);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
-
-        OCL_TEST_CYCLE() cv::ocl::equalizeHist(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, eps);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::equalizeHist(src, dst);
-
-        SANITY_CHECK(dst, eps);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-/////////// CopyMakeBorder //////////////////////
-
-CV_ENUM(Border, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,
-        BORDER_WRAP, BORDER_REFLECT_101)
-
-typedef tuple<Size, MatType, Border> CopyMakeBorderParamType;
-typedef TestBaseWithParam<CopyMakeBorderParamType> CopyMakeBorderFixture;
-
-PERF_TEST_P(CopyMakeBorderFixture, CopyMakeBorder,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
-                               Border::all()))
-{
-    const CopyMakeBorderParamType params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), borderType = get<2>(params);
-
-    Mat src(srcSize, type), dst;
-    const Size dstSize = srcSize + Size(12, 12);
-    dst.create(dstSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::copyMakeBorder(oclSrc, oclDst, 7, 5, 5, 7, borderType, cv::Scalar(1.0));
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::copyMakeBorder(src, dst, 7, 5, 5, 7, borderType, cv::Scalar(1.0));
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// cornerMinEigenVal ////////////////////////
-
-typedef Size_MatType cornerMinEigenValFixture;
-
-PERF_TEST_P(cornerMinEigenValFixture, cornerMinEigenVal,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), borderType = BORDER_REFLECT;
-    const int blockSize = 7, apertureSize = 1 + 2 * 3;
-
-    Mat src(srcSize, type), dst(srcSize, CV_32FC1);
-    declare.in(src, WARMUP_RNG).out(dst)
-            .time(srcSize == OCL_SIZE_4000 ? 20 : srcSize == OCL_SIZE_2000 ? 5 : 3);
-
-    const int depth = CV_MAT_DEPTH(type);
-    const ERROR_TYPE errorType = depth == CV_8U ? ERROR_ABSOLUTE : ERROR_RELATIVE;
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
-
-        OCL_TEST_CYCLE() cv::ocl::cornerMinEigenVal(oclSrc, oclDst, blockSize, apertureSize, borderType);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1e-6, errorType);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
-
-        SANITY_CHECK(dst, 1e-6, errorType);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// cornerHarris ////////////////////////
-
-typedef Size_MatType cornerHarrisFixture;
-
-PERF_TEST_P(cornerHarrisFixture, cornerHarris,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), borderType = BORDER_REFLECT;
-
-    Mat src(srcSize, type), dst(srcSize, CV_32FC1);
-    randu(src, 0, 1);
-    declare.in(src).out(dst)
-            .time(srcSize == OCL_SIZE_4000 ? 20 : srcSize == OCL_SIZE_2000 ? 5 : 3);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
-
-        OCL_TEST_CYCLE() cv::ocl::cornerHarris(oclSrc, oclDst, 5, 7, 0.1, borderType);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 3e-5);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::cornerHarris(src, dst, 5, 7, 0.1, borderType);
-
-        SANITY_CHECK(dst, 3e-5);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// integral ////////////////////////
-
-typedef TestBaseWithParam<Size> integralFixture;
-
-PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    Mat src(srcSize, CV_8UC1), dst;
-    declare.in(src, WARMUP_RNG);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst;
-
-        OCL_TEST_CYCLE() cv::ocl::integral(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::integral(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// threshold////////////////////////
-
-CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
-
-typedef tuple<Size, MatType, ThreshType> ThreshParams;
-typedef TestBaseWithParam<ThreshParams> ThreshFixture;
-
-PERF_TEST_P(ThreshFixture, threshold,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC4, CV_32FC1),
-                               ThreshType::all()))
-{
-    const ThreshParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int srcType = get<1>(params);
-    const int threshType = get<2>(params);
-    const double maxValue = 220.0, threshold = 50;
-
-    Mat src(srcSize, srcType), dst(srcSize, srcType);
-    randu(src, 0, 100);
-    declare.in(src).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_8U);
-
-        OCL_TEST_CYCLE() cv::ocl::threshold(oclSrc, oclDst, threshold, maxValue, threshType);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::threshold(src, dst, threshold, maxValue, threshType);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// meanShiftFiltering////////////////////////
-
-typedef struct _COOR
-{
-    short x;
-    short y;
-} COOR;
-
-static COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
-{
-
-    int isr2 = sr * sr;
-    int c0, c1, c2, c3;
-    int iter;
-    uchar *ptr = NULL;
-    uchar *pstart = NULL;
-    int revx = 0, revy = 0;
-    c0 = sptr[0];
-    c1 = sptr[1];
-    c2 = sptr[2];
-    c3 = sptr[3];
-    // iterate meanshift procedure
-    for(iter = 0; iter < maxIter; iter++ )
-    {
-        int count = 0;
-        int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
-
-        //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
-        int minx = x0 - sp;
-        int miny = y0 - sp;
-        int maxx = x0 + sp;
-        int maxy = y0 + sp;
-
-        //deal with the image boundary
-        if(minx < 0) minx = 0;
-        if(miny < 0) miny = 0;
-        if(maxx >= size.width) maxx = size.width - 1;
-        if(maxy >= size.height) maxy = size.height - 1;
-        if(iter == 0)
-        {
-            pstart = sptr;
-        }
-        else
-        {
-            pstart = pstart + revy * sstep + (revx << 2); //point to the new position
-        }
-        ptr = pstart;
-        ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
-
-        for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
-        {
-            int rowCount = 0;
-            int x = minx;
-#if CV_ENABLE_UNROLLED
-            for( ; x + 4 <= maxx; x += 4, ptr += 16)
-            {
-                int t0, t1, t2;
-                t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x;
-                    rowCount++;
-                }
-                t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x + 1;
-                    rowCount++;
-                }
-                t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x + 2;
-                    rowCount++;
-                }
-                t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x + 3;
-                    rowCount++;
-                }
-            }
-#endif
-            for(; x <= maxx; x++, ptr += 4)
-            {
-                int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x;
-                    rowCount++;
-                }
-            }
-            if(rowCount == 0)
-                continue;
-            count += rowCount;
-            sy += y * rowCount;
-        }
-
-        if( count == 0 )
-            break;
-
-        int x1 = sx / count;
-        int y1 = sy / count;
-        s0 = s0 / count;
-        s1 = s1 / count;
-        s2 = s2 / count;
-
-        bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
-            tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
-
-        //revise the pointer corresponding to the new (y0,x0)
-        revx = x1 - x0;
-        revy = y1 - y0;
-
-        x0 = x1;
-        y0 = y1;
-        c0 = s0;
-        c1 = s1;
-        c2 = s2;
-
-        if( stopFlag )
-            break;
-    } //for iter
-
-    dptr[0] = (uchar)c0;
-    dptr[1] = (uchar)c1;
-    dptr[2] = (uchar)c2;
-    dptr[3] = (uchar)c3;
-
-    COOR coor;
-    coor.x = static_cast<short>(x0);
-    coor.y = static_cast<short>(y0);
-    return coor;
-}
-
-static void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit)
-{
-    if( src_roi.empty() )
-        CV_Error( Error::StsBadArg, "The input image is empty" );
-
-    if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
-        CV_Error( Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
-
-    dst_roi.create(src_roi.size(), src_roi.type());
-
-    CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
-    CV_Assert( !(dst_roi.step & 0x3) );
-
-    if( !(crit.type & cv::TermCriteria::MAX_ITER) )
-        crit.maxCount = 5;
-    int maxIter = std::min(std::max(crit.maxCount, 1), 100);
-    float eps;
-    if( !(crit.type & cv::TermCriteria::EPS) )
-        eps = 1.f;
-    eps = (float)std::max(crit.epsilon, 0.0);
-
-    int tab[512];
-    for(int i = 0; i < 512; i++)
-        tab[i] = (i - 255) * (i - 255);
-    uchar *sptr = src_roi.data;
-    uchar *dptr = dst_roi.data;
-    int sstep = (int)src_roi.step;
-    int dstep = (int)dst_roi.step;
-    cv::Size size = src_roi.size();
-
-    for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
-        dptr += dstep - (size.width << 2))
-    {
-        for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4)
-        {
-            do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
-        }
-    }
-}
-
-typedef TestBaseWithParam<Size> meanShiftFilteringFixture;
-
-PERF_TEST_P(meanShiftFilteringFixture, meanShiftFiltering,
-            OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-    const int sp = 5, sr = 6;
-    cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1);
-
-    Mat src(srcSize, CV_8UC4), dst(srcSize, CV_8UC4);
-    declare.in(src, WARMUP_RNG).out(dst)
-            .time(srcSize == OCL_SIZE_4000 ?
-                      56 : srcSize == OCL_SIZE_2000 ? 15 : 3.8);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() meanShiftFiltering_(src, dst, sp, sr, crit);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_8UC4);
-
-        OCL_TEST_CYCLE() ocl::meanShiftFiltering(oclSrc, oclDst, sp, sr, crit);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-static void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
-{
-    if (src_roi.empty())
-    {
-        CV_Error(Error::StsBadArg, "The input image is empty");
-    }
-    if (src_roi.depth() != CV_8U || src_roi.channels() != 4)
-    {
-        CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
-    }
-
-    dst_roi.create(src_roi.size(), src_roi.type());
-    dstCoor_roi.create(src_roi.size(), CV_16SC2);
-
-    CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
-              (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
-    CV_Assert(!(dstCoor_roi.step & 0x3));
-
-    if (!(crit.type & cv::TermCriteria::MAX_ITER))
-    {
-        crit.maxCount = 5;
-    }
-
-    int maxIter = std::min(std::max(crit.maxCount, 1), 100);
-    float eps;
-
-    if (!(crit.type & cv::TermCriteria::EPS))
-    {
-        eps = 1.f;
-    }
-
-    eps = (float)std::max(crit.epsilon, 0.0);
-
-    int tab[512];
-
-    for (int i = 0; i < 512; i++)
-    {
-        tab[i] = (i - 255) * (i - 255);
-    }
-
-    uchar *sptr = src_roi.data;
-    uchar *dptr = dst_roi.data;
-    short *dCoorptr = (short *)dstCoor_roi.data;
-    int sstep = (int)src_roi.step;
-    int dstep = (int)dst_roi.step;
-    int dCoorstep = (int)dstCoor_roi.step >> 1;
-    cv::Size size = src_roi.size();
-
-    for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
-            dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1))
-    {
-        for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2)
-        {
-            *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
-        }
-    }
-
-}
-
-typedef TestBaseWithParam<Size> meanShiftProcFixture;
-
-PERF_TEST_P(meanShiftProcFixture, meanShiftProc,
-            OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-    TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
-
-    Mat src(srcSize, CV_8UC4), dst1(srcSize, CV_8UC4),
-            dst2(srcSize, CV_16SC2);
-    declare.in(src, WARMUP_RNG).out(dst1, dst2)
-            .time(srcSize == OCL_SIZE_4000 ?
-                      56 : srcSize == OCL_SIZE_2000 ? 15 : 3.8);;
-
-    if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() meanShiftProc_(src, dst1, dst2, 5, 6, crit);
-
-        SANITY_CHECK(dst1);
-        SANITY_CHECK(dst2);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst1(srcSize, CV_8UC4),
-                oclDst2(srcSize, CV_16SC2);
-
-        OCL_TEST_CYCLE() ocl::meanShiftProc(oclSrc, oclDst1, oclDst2, 5, 6, crit);
-
-        oclDst1.download(dst1);
-        oclDst2.download(dst2);
-
-        SANITY_CHECK(dst1);
-        SANITY_CHECK(dst2);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// CLAHE ////////////////////////
-
-typedef TestBaseWithParam<Size> CLAHEFixture;
-
-PERF_TEST_P(CLAHEFixture, CLAHE, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-    const string impl = getSelectedImpl();
-
-    Mat src(srcSize, CV_8UC1), dst;
-    const double clipLimit = 40.0;
-    declare.in(src, WARMUP_RNG);
-
-    if (srcSize == OCL_SIZE_4000)
-        declare.time(11);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst;
-        cv::Ptr<cv::CLAHE> oclClahe = cv::ocl::createCLAHE(clipLimit);
-
-        OCL_TEST_CYCLE() oclClahe->apply(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(clipLimit);
-        TEST_CYCLE() clahe->apply(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// columnSum////////////////////////
-
-typedef TestBaseWithParam<Size> columnSumFixture;
-
-static void columnSumPerfTest(const Mat & src, Mat & dst)
-{
-    for (int j = 0; j < src.cols; j++)
-        dst.at<float>(0, j) = src.at<float>(0, j);
-
-    for (int i = 1; i < src.rows; ++i)
-        for (int j = 0; j < src.cols; ++j)
-            dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
-}
-
-PERF_TEST_P(columnSumFixture, columnSum, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam();
-
-    Mat src(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (srcSize == OCL_SIZE_4000)
-        declare.time(5);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
-
-        OCL_TEST_CYCLE() cv::ocl::columnSum(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() columnSumPerfTest(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-//////////////////////////////distanceToCenters////////////////////////////////////////////////
-
-CV_ENUM(DistType, NORM_L1, NORM_L2SQR)
-
-typedef tuple<Size, DistType> distanceToCentersParameters;
-typedef TestBaseWithParam<distanceToCentersParameters> distanceToCentersFixture;
-
-static void distanceToCentersPerfTest(Mat& src, Mat& centers, Mat& dists, Mat& labels, int distType)
-{
-    Mat batch_dists;
-    cv::batchDistance(src, centers, batch_dists, CV_32FC1, noArray(), distType);
-
-    std::vector<float> dists_v;
-    std::vector<int> labels_v;
-
-    for (int i = 0; i < batch_dists.rows; i++)
-    {
-        Mat r = batch_dists.row(i);
-        double mVal;
-        Point mLoc;
-
-        minMaxLoc(r, &mVal, NULL, &mLoc, NULL);
-        dists_v.push_back(static_cast<float>(mVal));
-        labels_v.push_back(mLoc.x);
-    }
-
-    Mat(dists_v).copyTo(dists);
-    Mat(labels_v).copyTo(labels);
-}
-
-PERF_TEST_P(distanceToCentersFixture, distanceToCenters, ::testing::Combine(::testing::Values(cv::Size(256,256), cv::Size(512,512)), DistType::all()) )
-{
-    Size size = get<0>(GetParam());
-    int distType = get<1>(GetParam());
-
-    Mat src(size, CV_32FC1), centers(size, CV_32FC1);
-    Mat dists(src.rows, 1, CV_32FC1), labels(src.rows, 1, CV_32SC1);
-
-    declare.in(src, centers, WARMUP_RNG).out(dists, labels);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat ocl_src(src), ocl_centers(centers);
-
-        OCL_TEST_CYCLE() ocl::distanceToCenters(ocl_src, ocl_centers, dists, labels, distType);
-
-        SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE);
-        SANITY_CHECK(labels);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() distanceToCentersPerfTest(src, centers, dists, labels, distType);
-
-        SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE);
-        SANITY_CHECK(labels);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_imgwarp.cpp b/modules/ocl/perf/perf_imgwarp.cpp
deleted file mode 100644
index e768d66..0000000
--- a/modules/ocl/perf/perf_imgwarp.cpp
+++ /dev/null
@@ -1,364 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-
-///////////// WarpAffine ////////////////////////
-
-typedef Size_MatType WarpAffineFixture;
-
-PERF_TEST_P(WarpAffineFixture, WarpAffine,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    static const double coeffs[2][3] =
-    {
-        { cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
-        { sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
-    };
-    Mat M(2, 3, CV_64F, (void *)coeffs);
-    const int interpolation = INTER_NEAREST;
-
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// WarpPerspective ////////////////////////
-
-typedef Size_MatType WarpPerspectiveFixture;
-
-PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    static const double coeffs[3][3] =
-    {
-        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
-        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
-        {0.0, 0.0, 1.0}
-    };
-    Mat M(3, 3, CV_64F, (void *)coeffs);
-    const int interpolation = INTER_LINEAR;
-
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst)
-            .time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// resize ////////////////////////
-
-CV_ENUM(resizeInterType, INTER_NEAREST, INTER_LINEAR)
-
-typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
-typedef TestBaseWithParam<resizeParams> resizeFixture;
-
-PERF_TEST_P(resizeFixture, resize,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
-                               resizeInterType::all(),
-                               ::testing::Values(0.5, 2.0)))
-{
-    const resizeParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), interType = get<2>(params);
-    double scale = get<3>(params);
-    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-    checkDeviceMaxMemoryAllocSize(dstSize, type);
-
-    Mat src(srcSize, type), dst;
-    dst.create(dstSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-    if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
-        declare.time(11);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-typedef tuple<Size, MatType, double> resizeAreaParams;
-typedef TestBaseWithParam<resizeAreaParams> resizeAreaFixture;
-
-PERF_TEST_P(resizeAreaFixture, resize,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
-                               ::testing::Values(0.3, 0.5, 0.6)))
-{
-    const resizeAreaParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-    double scale = get<2>(params);
-    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type), dst;
-    dst.create(dstSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, cv::INTER_AREA);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, cv::INTER_AREA);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// remap////////////////////////
-
-CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
-
-typedef tuple<Size, MatType, RemapInterType> remapParams;
-typedef TestBaseWithParam<remapParams> remapFixture;
-
-PERF_TEST_P(remapFixture, remap,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
-                               RemapInterType::all()))
-{
-    const remapParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), interpolation = get<2>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
-        declare.time(9);
-
-    Mat xmap, ymap;
-    xmap.create(srcSize, CV_32FC1);
-    ymap.create(srcSize, CV_32FC1);
-
-    for (int i = 0; i < srcSize.height; ++i)
-    {
-        float * const xmap_row = xmap.ptr<float>(i);
-        float * const ymap_row = ymap.ptr<float>(i);
-
-        for (int j = 0; j < srcSize.width; ++j)
-        {
-            xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
-            ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
-        }
-    }
-
-    const int borderMode = BORDER_CONSTANT;
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-        ocl::oclMat oclXMap(xmap), oclYMap(ymap);
-
-        OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-
-///////////// buildWarpPerspectiveMaps ////////////////////////
-
-static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
-{
-    CV_Assert(M.rows == 3 && M.cols == 3);
-    CV_Assert(dsize.area() > 0);
-
-    xmap.create(dsize, CV_32FC1);
-    ymap.create(dsize, CV_32FC1);
-
-    float coeffs[3 * 3];
-    Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
-
-    if (inverse)
-        M.convertTo(coeffsMat, coeffsMat.type());
-    else
-    {
-        cv::Mat iM;
-        invert(M, iM);
-        iM.convertTo(coeffsMat, coeffsMat.type());
-    }
-
-    for (int y = 0; y < dsize.height; ++y)
-    {
-        float * const xmap_ptr = xmap.ptr<float>(y);
-        float * const ymap_ptr = ymap.ptr<float>(y);
-
-        for (int x = 0; x < dsize.width; ++x)
-        {
-            float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
-            xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
-            ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
-        }
-    }
-}
-
-typedef TestBaseWithParam<Size> buildWarpPerspectiveMapsFixture;
-
-PERF_TEST_P(buildWarpPerspectiveMapsFixture, Inverse, OCL_TYPICAL_MAT_SIZES)
-{
-    static const double coeffs[3][3] =
-    {
-        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
-        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
-        {0.0, 0.0, 1.0}
-    };
-    Mat M(3, 3, CV_64F, (void *)coeffs);
-    const Size dsize = GetParam();
-    const double eps = 5e-4;
-
-    Mat xmap(dsize, CV_32FC1), ymap(dsize, CV_32FC1);
-    declare.in(M).out(xmap, ymap);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclXMap(dsize, CV_32FC1), oclYMap(dsize, CV_32FC1);
-
-        OCL_TEST_CYCLE() cv::ocl::buildWarpPerspectiveMaps(M, true, dsize, oclXMap, oclYMap);
-
-        oclXMap.download(xmap);
-        oclYMap.download(ymap);
-
-        SANITY_CHECK(xmap, eps);
-        SANITY_CHECK(ymap, eps);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() buildWarpPerspectiveMaps(M, true, dsize, xmap, ymap);
-
-        SANITY_CHECK(xmap, eps);
-        SANITY_CHECK(ymap, eps);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_kalman.cpp b/modules/ocl/perf/perf_kalman.cpp
deleted file mode 100644
index 946444a..0000000
--- a/modules/ocl/perf/perf_kalman.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-#ifdef HAVE_CLAMDBLAS
-
-using namespace perf;
-using namespace std;
-using namespace cv::ocl;
-using namespace cv;
-using std::tr1::tuple;
-using std::tr1::get;
-
-///////////// Kalman Filter ////////////////////////
-
-typedef tuple<int> KalmanFilterType;
-typedef TestBaseWithParam<KalmanFilterType> KalmanFilterFixture;
-
-PERF_TEST_P(KalmanFilterFixture, KalmanFilter,
-    ::testing::Values(1000, 1500))
-{
-    KalmanFilterType params = GetParam();
-    const int dim = get<0>(params);
-
-    cv::Mat sample(dim, 1, CV_32FC1), dresult;
-    randu(sample, -1, 1);
-
-    cv::Mat statePre_;
-
-    if (RUN_PLAIN_IMPL)
-    {
-        cv::KalmanFilter kalman;
-        TEST_CYCLE()
-        {
-            kalman.init(dim, dim);
-            kalman.correct(sample);
-            kalman.predict();
-        }
-        statePre_ = kalman.statePre;
-    }
-    else if(RUN_OCL_IMPL)
-    {
-        cv::ocl::oclMat dsample(sample);
-        cv::ocl::KalmanFilter kalman_ocl;
-        OCL_TEST_CYCLE()
-        {
-            kalman_ocl.init(dim, dim);
-            kalman_ocl.correct(dsample);
-            kalman_ocl.predict();
-        }
-        kalman_ocl.statePre.download(statePre_);
-    }
-    else
-        OCL_PERF_ELSE
-
-    SANITY_CHECK(statePre_);
-}
-
-#endif // HAVE_CLAMDBLAS
diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp
deleted file mode 100644
index 9c9829c..0000000
--- a/modules/ocl/perf/perf_match_template.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-
-/////////// matchTemplate ////////////////////////
-
-typedef Size_MatType CV_TM_CCORRFixture;
-
-PERF_TEST_P(CV_TM_CCORRFixture, matchTemplate,
-            ::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
-                               OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params), templSize(5, 5);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), templ(templSize, type);
-    const Size dstSize(src.cols - templ.cols + 1, src.rows - templ.rows + 1);
-    Mat dst(dstSize, CV_32F);
-    randu(src, 0.0f, 1.0f);
-    randu(templ, 0.0f, 1.0f);
-    declare.time(srcSize == OCL_SIZE_2000 ? 20 : 6).in(src, templ).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclTempl(templ), oclDst(dstSize, CV_32F);
-
-        OCL_TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1e-4);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::matchTemplate(src, templ, dst, TM_CCORR);
-
-        SANITY_CHECK(dst, 1e-4);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-typedef TestBaseWithParam<Size> CV_TM_CCORR_NORMEDFixture;
-
-PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, matchTemplate, OCL_TYPICAL_MAT_SIZES)
-{
-    const Size srcSize = GetParam(), templSize(5, 5);
-
-    Mat src(srcSize, CV_8UC1), templ(templSize, CV_8UC1), dst;
-    const Size dstSize(src.cols - templ.cols + 1, src.rows - templ.rows + 1);
-    dst.create(dstSize, CV_8UC1);
-    declare.in(src, templ, WARMUP_RNG).out(dst)
-            .time(srcSize == OCL_SIZE_2000 ? 10 : srcSize == OCL_SIZE_4000 ? 23 : 2);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclTempl(templ), oclDst(dstSize, CV_8UC1);
-
-        OCL_TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR_NORMED);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 3e-2);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::matchTemplate(src, templ, dst, TM_CCORR_NORMED);
-
-        SANITY_CHECK(dst, 3e-2);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp
deleted file mode 100644
index 5ca322e..0000000
--- a/modules/ocl/perf/perf_matrix_operation.cpp
+++ /dev/null
@@ -1,238 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-
-///////////// ConvertTo////////////////////////
-
-typedef Size_MatType ConvertToFixture;
-
-PERF_TEST_P(ConvertToFixture, ConvertTo,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst;
-    const int dstType = CV_MAKE_TYPE(CV_32F, src.channels());
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-    checkDeviceMaxMemoryAllocSize(srcSize, dstType);
-
-    dst.create(srcSize, dstType);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, dstType);
-
-        OCL_TEST_CYCLE() oclSrc.convertTo(oclDst, dstType);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() src.convertTo(dst, dstType);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// copyTo////////////////////////
-
-typedef Size_MatType copyToFixture;
-
-PERF_TEST_P(copyToFixture, copyTo,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() oclSrc.copyTo(oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() src.copyTo(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// setTo////////////////////////
-
-typedef Size_MatType setToFixture;
-
-PERF_TEST_P(setToFixture, setTo,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-    const Scalar val(1, 2, 3, 4);
-
-    Mat src(srcSize, type);
-    declare.in(src);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(srcSize, type);
-
-        OCL_TEST_CYCLE() oclSrc.setTo(val);
-        oclSrc.download(src);
-
-        SANITY_CHECK(src);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() src.setTo(val);
-
-        SANITY_CHECK(src);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-#if 0
-
-/////////////////// upload ///////////////////////////
-
-typedef tuple<Size, MatDepth, int> uploadParams;
-typedef TestBaseWithParam<uploadParams> uploadFixture;
-
-PERF_TEST_P(uploadFixture, upload,
-            testing::Combine(
-                OCL_TYPICAL_MAT_SIZES,
-                testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F),
-                testing::Range(1, 5)))
-{
-    const uploadParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int depth = get<1>(params), cn = get<2>(params);
-    const int type = CV_MAKE_TYPE(depth, cn);
-
-    Mat src(srcSize, type), dst;
-    declare.in(src, WARMUP_RNG);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclDst;
-
-        for(; startTimer(), next(); ocl::finish(), stopTimer(), oclDst.release())
-            oclDst.upload(src);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        for(; startTimer(), next(); ocl::finish(), stopTimer(), dst.release())
-            dst = src.clone();
-    }
-    else
-        OCL_PERF_ELSE
-
-    SANITY_CHECK_NOTHING();
-}
-
-/////////////////// download ///////////////////////////
-
-typedef TestBaseWithParam<uploadParams> downloadFixture;
-
-PERF_TEST_P(downloadFixture, download,
-            testing::Combine(
-                OCL_TYPICAL_MAT_SIZES,
-                testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F),
-                testing::Range(1, 5)))
-{
-    const uploadParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int depth = get<1>(params), cn = get<2>(params);
-    const int type = CV_MAKE_TYPE(depth, cn);
-
-    Mat src(srcSize, type), dst;
-    declare.in(src, WARMUP_RNG);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src);
-
-        for(; startTimer(), next(); ocl::finish(), stopTimer(), dst.release())
-            oclSrc.download(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        for(; startTimer(), next(); ocl::finish(), stopTimer(), dst.release())
-            dst = src.clone();
-    }
-    else
-        OCL_PERF_ELSE
-
-    SANITY_CHECK_NOTHING();
-}
-
-#endif
diff --git a/modules/ocl/perf/perf_ml.cpp b/modules/ocl/perf/perf_ml.cpp
deleted file mode 100644
index db45ece..0000000
--- a/modules/ocl/perf/perf_ml.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma, jin@multicorewareinc.com
-//    Xiaopeng Fu, fuxiaopeng2222@163.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-using namespace perf;
-using namespace std;
-using namespace cv::ocl;
-using namespace cv;
-using std::tr1::tuple;
-using std::tr1::get;
-////////////////////////////////// K-NEAREST NEIGHBOR ////////////////////////////////////
-static void genData(Mat& trainData, Size size, Mat& trainLabel = Mat().setTo(Scalar::all(0)), int nClasses = 0)
-{
-    trainData.create(size, CV_32FC1);
-    randu(trainData, 1.0, 100.0);
-
-    if(nClasses != 0)
-    {
-        trainLabel.create(size.height, 1, CV_8UC1);
-        randu(trainLabel, 0, nClasses - 1);
-        trainLabel.convertTo(trainLabel, CV_32FC1);
-    }
-}
-
-typedef tuple<int> KNNParamType;
-typedef TestBaseWithParam<KNNParamType> KNNFixture;
-
-PERF_TEST_P(KNNFixture, KNN,
-            testing::Values(1000, 2000, 4000))
-{
-    KNNParamType params = GetParam();
-    const int rows = get<0>(params);
-    int columns = 100;
-    int k = rows/250;
-
-    Mat trainData, trainLabels;
-    Size size(columns, rows);
-    genData(trainData, size, trainLabels, 3);
-
-    Mat testData;
-    genData(testData, size);
-    Mat best_label;
-
-    if(RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE()
-        {
-            CvKNearest knn_cpu;
-            knn_cpu.train(trainData, trainLabels);
-            knn_cpu.find_nearest(testData, k, &best_label);
-        }
-    }else if(RUN_OCL_IMPL)
-    {
-        cv::ocl::oclMat best_label_ocl;
-        cv::ocl::oclMat testdata;
-        testdata.upload(testData);
-
-        OCL_TEST_CYCLE()
-        {
-            cv::ocl::KNearestNeighbour knn_ocl;
-            knn_ocl.train(trainData, trainLabels);
-            knn_ocl.find_nearest(testdata, k, best_label_ocl);
-        }
-        best_label_ocl.download(best_label);
-    }else
-        OCL_PERF_ELSE
-    SANITY_CHECK(best_label);
-}
\ No newline at end of file
diff --git a/modules/ocl/perf/perf_moments.cpp b/modules/ocl/perf/perf_moments.cpp
deleted file mode 100644
index 631031e..0000000
--- a/modules/ocl/perf/perf_moments.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other Materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-
-
-///////////// Moments ////////////////////////
-//*! performance of image
-typedef tuple<Size, MatType, bool> MomentsParamType;
-typedef TestBaseWithParam<MomentsParamType> MomentsFixture;
-
-PERF_TEST_P(MomentsFixture, Moments,
-    ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                       OCL_PERF_ENUM(CV_8UC1, CV_16SC1, CV_16UC1, CV_32FC1), ::testing::Bool()))
-{
-    const MomentsParamType params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-    const bool binaryImage = get<2>(params);
-
-    Mat  src(srcSize, type), dst(7, 1, CV_64F);
-    randu(src, 0, 255);
-
-    cv::Moments mom;
-    if (RUN_OCL_IMPL)
-    {
-        oclMat src_d(src);
-        OCL_TEST_CYCLE() mom = cv::ocl::ocl_moments(src_d, binaryImage);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() mom = cv::moments(src, binaryImage);
-    }
-    else
-        OCL_PERF_ELSE
-    cv::HuMoments(mom, dst);
-    SANITY_CHECK(dst, 2e-1);
-}
diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp
deleted file mode 100644
index ff49eb4..0000000
--- a/modules/ocl/perf/perf_norm.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-
-///////////// norm////////////////////////
-
-typedef tuple<Size, MatType> normParams;
-typedef TestBaseWithParam<normParams> normFixture;
-
-PERF_TEST_P(normFixture, norm, testing::Combine(
-                OCL_TYPICAL_MAT_SIZES,
-                OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
-{
-    const normParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-    double value = 0.0;
-    const double eps = CV_MAT_DEPTH(type) == CV_8U ? DBL_EPSILON : 1e-3;
-
-    Mat src1(srcSize, type), src2(srcSize, type);
-    declare.in(src1, src2, WARMUP_RNG);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc1(src1), oclSrc2(src2);
-
-        OCL_TEST_CYCLE() value = cv::ocl::norm(oclSrc1, oclSrc2, NORM_INF);
-
-        SANITY_CHECK(value, eps);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() value = cv::norm(src1, src2, NORM_INF);
-
-        SANITY_CHECK(value);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_opticalflow.cpp b/modules/ocl/perf/perf_opticalflow.cpp
deleted file mode 100644
index bc1761b..0000000
--- a/modules/ocl/perf/perf_opticalflow.cpp
+++ /dev/null
@@ -1,255 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-///////////// PyrLKOpticalFlow ////////////////////////
-
-using namespace perf;
-using std::tr1::get;
-using std::tr1::tuple;
-using std::tr1::make_tuple;
-
-CV_ENUM(LoadMode, IMREAD_GRAYSCALE, IMREAD_COLOR)
-
-typedef tuple<int, tuple<string, string, LoadMode> > PyrLKOpticalFlowParamType;
-typedef TestBaseWithParam<PyrLKOpticalFlowParamType> PyrLKOpticalFlowFixture;
-
-PERF_TEST_P(PyrLKOpticalFlowFixture,
-            PyrLKOpticalFlow,
-            ::testing::Combine(
-                ::testing::Values(1000, 2000, 4000),
-                ::testing::Values(
-                    make_tuple<string, string, LoadMode>
-                    (
-                        string("gpu/opticalflow/rubberwhale1.png"),
-                        string("gpu/opticalflow/rubberwhale2.png"),
-                        LoadMode(IMREAD_COLOR)
-                        ),
-                    make_tuple<string, string, LoadMode>
-                    (
-                        string("gpu/stereobm/aloe-L.png"),
-                        string("gpu/stereobm/aloe-R.png"),
-                        LoadMode(IMREAD_GRAYSCALE)
-                        )
-                    )
-                )
-            )
-{
-    PyrLKOpticalFlowParamType params = GetParam();
-    tuple<string, string, LoadMode> fileParam = get<1>(params);
-    const int pointsCount = get<0>(params);
-    const int openMode = static_cast<int>(get<2>(fileParam));
-    const string fileName0 = get<0>(fileParam), fileName1 = get<1>(fileParam);
-    Mat frame0 = imread(getDataPath(fileName0), openMode);
-    Mat frame1 = imread(getDataPath(fileName1), openMode);
-
-    declare.in(frame0, frame1);
-
-    ASSERT_FALSE(frame0.empty()) << "can't load " << fileName0;
-    ASSERT_FALSE(frame1.empty()) << "can't load " << fileName1;
-
-    Mat grayFrame;
-    if (openMode == IMREAD_COLOR)
-        cvtColor(frame0, grayFrame, COLOR_BGR2GRAY);
-    else
-        grayFrame = frame0;
-
-    vector<Point2f> pts, nextPts;
-    vector<unsigned char> status;
-    vector<float> err;
-    goodFeaturesToTrack(grayFrame, pts, pointsCount, 0.01, 0.0);
-    Mat ptsMat(1, static_cast<int>(pts.size()), CV_32FC2, (void *)&pts[0]);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE()
-                cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::PyrLKOpticalFlow oclPyrLK;
-        ocl::oclMat oclFrame0(frame0), oclFrame1(frame1);
-        ocl::oclMat oclPts(ptsMat);
-        ocl::oclMat oclNextPts, oclStatus, oclErr;
-
-        OCL_TEST_CYCLE()
-                oclPyrLK.sparse(oclFrame0, oclFrame1, oclPts, oclNextPts, oclStatus, &oclErr);
-    }
-    else
-        OCL_PERF_ELSE
-
-    int value = 0;
-    SANITY_CHECK(value);
-}
-
-PERF_TEST(tvl1flowFixture, tvl1flow)
-{
-    Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png";
-
-    Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png";
-
-    const Size srcSize = frame0.size();
-    const double eps = 1.2;
-    Mat flow(srcSize, CV_32FC2), flow1(srcSize, CV_32FC1), flow2(srcSize, CV_32FC1);
-    declare.in(frame0, frame1).out(flow1, flow2).time(159);
-
-    if (RUN_PLAIN_IMPL)
-    {
-        Ptr<DenseOpticalFlow> alg = createOptFlow_DualTVL1();
-
-        TEST_CYCLE() alg->calc(frame0, frame1, flow);
-
-        alg->collectGarbage();
-        Mat flows[2] = { flow1, flow2 };
-        split(flow, flows);
-
-        SANITY_CHECK(flow1, eps);
-        SANITY_CHECK(flow2, eps);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::OpticalFlowDual_TVL1_OCL oclAlg;
-        ocl::oclMat oclFrame0(frame0), oclFrame1(frame1), oclFlow1(srcSize, CV_32FC1),
-                oclFlow2(srcSize, CV_32FC1);
-
-        OCL_TEST_CYCLE() oclAlg(oclFrame0, oclFrame1, oclFlow1, oclFlow2);
-
-        oclAlg.collectGarbage();
-
-        oclFlow1.download(flow1);
-        oclFlow2.download(flow2);
-
-        SANITY_CHECK(flow1, eps);
-        SANITY_CHECK(flow2, eps);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// FarnebackOpticalFlow ////////////////////////
-
-CV_ENUM(farneFlagType, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
-
-typedef tuple<tuple<int, double>, farneFlagType, bool> FarnebackOpticalFlowParams;
-typedef TestBaseWithParam<FarnebackOpticalFlowParams> FarnebackOpticalFlowFixture;
-
-PERF_TEST_P(FarnebackOpticalFlowFixture, FarnebackOpticalFlow,
-            ::testing::Combine(
-                ::testing::Values(make_tuple<int, double>(5, 1.1),
-                                  make_tuple<int, double>(7, 1.5)),
-                farneFlagType::all(),
-                ::testing::Bool()))
-{
-    Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png";
-
-    Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png";
-
-    const Size srcSize = frame0.size();
-
-    const FarnebackOpticalFlowParams params = GetParam();
-    const tuple<int, double> polyParams = get<0>(params);
-    const int polyN = get<0>(polyParams), flags = get<1>(params);
-    const double polySigma = get<1>(polyParams), pyrScale = 0.5;
-    const bool useInitFlow = get<2>(params);
-    const double eps = 1.5;
-
-    Mat flowx(srcSize, CV_32FC1), flowy(srcSize, CV_32FC1), flow(srcSize, CV_32FC2);
-    declare.in(frame0, frame1).out(flowx, flowy);
-
-    ocl::FarnebackOpticalFlow farn;
-    farn.pyrScale = pyrScale;
-    farn.polyN = polyN;
-    farn.polySigma = polySigma;
-    farn.flags = flags;
-
-    if (RUN_PLAIN_IMPL)
-    {
-        if (useInitFlow)
-        {
-            calcOpticalFlowFarneback(
-                        frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
-                        farn.numIters, farn.polyN, farn.polySigma, farn.flags);
-            farn.flags |= OPTFLOW_USE_INITIAL_FLOW;
-        }
-
-        TEST_CYCLE()
-                calcOpticalFlowFarneback(
-                    frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
-                    farn.numIters, farn.polyN, farn.polySigma, farn.flags);
-
-        Mat flowxy[2] = { flowx, flowy };
-        split(flow, flowxy);
-
-        SANITY_CHECK(flowx, eps);
-        SANITY_CHECK(flowy, eps);
-    }
-    else if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclFrame0(frame0), oclFrame1(frame1),
-                oclFlowx(srcSize, CV_32FC1), oclFlowy(srcSize, CV_32FC1);
-
-        if (useInitFlow)
-        {
-            farn(oclFrame0, oclFrame1, oclFlowx, oclFlowy);
-            farn.flags |= OPTFLOW_USE_INITIAL_FLOW;
-        }
-
-        OCL_TEST_CYCLE()
-                farn(oclFrame0, oclFrame1, oclFlowx, oclFlowy);
-
-        oclFlowx.download(flowx);
-        oclFlowy.download(flowy);
-
-        SANITY_CHECK(flowx, eps);
-        SANITY_CHECK(flowy, eps);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_orb.cpp b/modules/ocl/perf/perf_orb.cpp
deleted file mode 100644
index 628a560..0000000
--- a/modules/ocl/perf/perf_orb.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Peter Andreas Entschev, peter@entschev.com
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace perf;
-
-/////////////////// ORB ///////////////////
-
-typedef std::tr1::tuple<std::string, int> Image_NFeatures_t;
-typedef perf::TestBaseWithParam<Image_NFeatures_t> Image_NFeatures;
-
-PERF_TEST_P(Image_NFeatures, ORB,
-            testing::Combine(testing::Values<string>("gpu/perf/aloe.png"),
-                             testing::Values(4000)))
-{
-    declare.time(300.0);
-
-    const Image_NFeatures_t params = GetParam();
-    const std::string imgFile = std::tr1::get<0>(params);
-    const int nFeatures = std::tr1::get<1>(params);
-
-    const cv::Mat img = imread(getDataPath(imgFile), cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    if (RUN_OCL_IMPL)
-    {
-        cv::ocl::ORB_OCL d_orb(nFeatures);
-
-        const cv::ocl::oclMat d_img(img);
-        cv::ocl::oclMat d_keypoints, d_descriptors;
-
-        TEST_CYCLE() d_orb(d_img, cv::ocl::oclMat(), d_keypoints, d_descriptors);
-
-        std::vector<cv::KeyPoint> ocl_keypoints;
-        d_orb.downloadKeyPoints(d_keypoints, ocl_keypoints);
-
-        cv::Mat ocl_descriptors(d_descriptors);
-
-        ocl_keypoints.resize(10);
-        ocl_descriptors = ocl_descriptors.rowRange(0, 10);
-
-        sortKeyPoints(ocl_keypoints, ocl_descriptors);
-
-        SANITY_CHECK_KEYPOINTS(ocl_keypoints, 1e-4);
-        SANITY_CHECK(ocl_descriptors);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        cv::ORB orb(nFeatures);
-
-        std::vector<cv::KeyPoint> cpu_keypoints;
-        cv::Mat cpu_descriptors;
-
-        TEST_CYCLE() orb(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
-
-        SANITY_CHECK_KEYPOINTS(cpu_keypoints);
-        SANITY_CHECK(cpu_descriptors);
-    }
-    else
-        OCL_PERF_ELSE;
-}
diff --git a/modules/ocl/perf/perf_precomp.hpp b/modules/ocl/perf/perf_precomp.hpp
deleted file mode 100644
index 366329c..0000000
--- a/modules/ocl/perf/perf_precomp.hpp
+++ /dev/null
@@ -1,198 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-declarations"
-#  if defined __clang__ || defined __APPLE__
-#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
-#    pragma GCC diagnostic ignored "-Wextra"
-#  endif
-#endif
-
-#ifndef __OPENCV_PERF_PRECOMP_HPP__
-#define __OPENCV_PERF_PRECOMP_HPP__
-
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-declarations"
-#  if defined __clang__ || defined __APPLE__
-#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
-#    pragma GCC diagnostic ignored "-Wextra"
-#  endif
-#endif
-
-#define CV_BUILD_OCL_MODULE
-
-#include <iomanip>
-#include <stdexcept>
-#include <string>
-#include <iostream>
-#include <cstdio>
-#include <vector>
-#include <numeric>
-
-#include "cvconfig.h"
-#include "opencv2/core.hpp"
-#include "opencv2/core/utility.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/calib3d.hpp"
-#include "opencv2/video.hpp"
-#include "opencv2/objdetect.hpp"
-#include "opencv2/features2d.hpp"
-#include "opencv2/ocl.hpp"
-#include "opencv2/ts.hpp"
-
-using namespace std;
-using namespace cv;
-
-#define OCL_SIZE_1000 Size(1000, 1000)
-#define OCL_SIZE_2000 Size(2000, 2000)
-#define OCL_SIZE_4000 Size(4000, 4000)
-
-#define OCL_TYPICAL_MAT_SIZES ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000, OCL_SIZE_4000)
-
-#define OCL_PERF_ENUM(type, ...) ::testing::Values(type, ## __VA_ARGS__ )
-
-#define IMPL_OCL "ocl"
-#define IMPL_GPU "gpu"
-#define IMPL_PLAIN "plain"
-
-#define RUN_OCL_IMPL (IMPL_OCL == getSelectedImpl())
-#define RUN_PLAIN_IMPL (IMPL_PLAIN == getSelectedImpl())
-
-#ifdef HAVE_OPENCV_GPU
-# define RUN_GPU_IMPL (IMPL_GPU == getSelectedImpl())
-#endif
-
-#ifdef HAVE_OPENCV_GPU
-#define OCL_PERF_ELSE               \
-        if (RUN_GPU_IMPL)           \
-            CV_TEST_FAIL_NO_IMPL(); \
-        else                        \
-            CV_TEST_FAIL_NO_IMPL();
-#else
-#define OCL_PERF_ELSE               \
-            CV_TEST_FAIL_NO_IMPL();
-#endif
-
-#define OCL_TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); cv::ocl::finish(), stopTimer())
-#define OCL_TEST_CYCLE() for(; startTimer(), next(); cv::ocl::finish(), stopTimer())
-#define OCL_TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; cv::ocl::finish(), ++r)
-
-// TODO: Move to the ts module
-namespace cvtest {
-namespace ocl {
-inline void checkDeviceMaxMemoryAllocSize(const Size& size, int type, int factor = 1)
-{
-    assert(factor > 0);
-    if (!(IMPL_OCL == perf::TestBase::getSelectedImpl()))
-        return; // OpenCL devices are not used
-    int cn = CV_MAT_CN(type);
-    int cn_ocl = cn == 3 ? 4 : cn;
-    int type_ocl = CV_MAKE_TYPE(CV_MAT_DEPTH(type), cn_ocl);
-    size_t memSize = size.area() * CV_ELEM_SIZE(type_ocl);
-    const cv::ocl::DeviceInfo& devInfo = cv::ocl::Context::getContext()->getDeviceInfo();
-    if (memSize * factor >= devInfo.maxMemAllocSize)
-    {
-        throw perf::TestBase::PerfSkipTestException();
-    }
-}
-
-struct KeypointIdxCompare
-{
-    std::vector<cv::KeyPoint>* keypoints;
-
-    explicit KeypointIdxCompare(std::vector<cv::KeyPoint>* _keypoints) : keypoints(_keypoints) {}
-
-    bool operator ()(size_t i1, size_t i2) const
-    {
-        cv::KeyPoint kp1 = (*keypoints)[i1];
-        cv::KeyPoint kp2 = (*keypoints)[i2];
-        if (kp1.pt.x != kp2.pt.x)
-            return kp1.pt.x < kp2.pt.x;
-        if (kp1.pt.y != kp2.pt.y)
-            return kp1.pt.y < kp2.pt.y;
-        if (kp1.response != kp2.response)
-            return kp1.response < kp2.response;
-        return kp1.octave < kp2.octave;
-    }
-};
-
-inline void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors = cv::noArray())
-{
-    std::vector<size_t> indexies(keypoints.size());
-    for (size_t i = 0; i < indexies.size(); ++i)
-        indexies[i] = i;
-
-    std::sort(indexies.begin(), indexies.end(), KeypointIdxCompare(&keypoints));
-
-    std::vector<cv::KeyPoint> new_keypoints;
-    cv::Mat new_descriptors;
-
-    new_keypoints.resize(keypoints.size());
-
-    cv::Mat descriptors;
-    if (_descriptors.needed())
-    {
-        descriptors = _descriptors.getMat();
-        new_descriptors.create(descriptors.size(), descriptors.type());
-    }
-
-    for (size_t i = 0; i < indexies.size(); ++i)
-    {
-        size_t new_idx = indexies[i];
-        new_keypoints[i] = keypoints[new_idx];
-        if (!new_descriptors.empty())
-            descriptors.row((int) new_idx).copyTo(new_descriptors.row((int) i));
-    }
-
-    keypoints.swap(new_keypoints);
-    if (_descriptors.needed())
-        new_descriptors.copyTo(_descriptors);
-}
-
-} // namespace cvtest::ocl
-} // namespace cvtest
-
-using namespace cvtest::ocl;
-
-#endif
diff --git a/modules/ocl/perf/perf_pyramid.cpp b/modules/ocl/perf/perf_pyramid.cpp
deleted file mode 100644
index 820dd60..0000000
--- a/modules/ocl/perf/perf_pyramid.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-
-///////////// pyrDown //////////////////////
-
-typedef Size_MatType pyrDownFixture;
-
-PERF_TEST_P(pyrDownFixture, pyrDown,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-    Size dstSize((srcSize.height + 1) >> 1, (srcSize.width + 1) >> 1);
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-    checkDeviceMaxMemoryAllocSize(dstSize, type);
-
-    Mat src(srcSize, type), dst;
-    dst.create(dstSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
-
-        OCL_TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() pyrDown(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// pyrUp ////////////////////////
-
-typedef Size_MatType pyrUpFixture;
-
-PERF_TEST_P(pyrUpFixture, pyrUp,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-    Size dstSize(srcSize.height << 1, srcSize.width << 1);
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-    checkDeviceMaxMemoryAllocSize(dstSize, type);
-
-    Mat src(srcSize, type), dst;
-    dst.create(dstSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
-
-        OCL_TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() pyrDown(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp
deleted file mode 100644
index ecfc49e..0000000
--- a/modules/ocl/perf/perf_split_merge.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//    Jin Ma,       jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-
-using namespace perf;
-using std::tr1::tuple;
-using std::tr1::get;
-
-///////////// Merge////////////////////////
-
-typedef Size_MatType MergeFixture;
-
-PERF_TEST_P(MergeFixture, Merge,
-            ::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
-                               OCL_PERF_ENUM(CV_8U, CV_32F)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int depth = get<1>(params), channels = 3;
-    const int dstType = CV_MAKE_TYPE(depth, channels);
-
-    checkDeviceMaxMemoryAllocSize(srcSize, dstType);
-
-    Mat dst(srcSize, dstType);
-    vector<Mat> src(channels);
-    for (vector<Mat>::iterator i = src.begin(), end = src.end(); i != end; ++i)
-    {
-        i->create(srcSize, CV_MAKE_TYPE(depth, 1));
-        declare.in(*i, WARMUP_RNG);
-    }
-    declare.out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclDst(srcSize, dstType);
-        vector<ocl::oclMat> oclSrc(src.size());
-        for (vector<ocl::oclMat>::size_type i = 0, end = src.size(); i < end; ++i)
-            oclSrc[i] = src[i];
-
-        OCL_TEST_CYCLE() cv::ocl::merge(oclSrc, oclDst);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::merge(src, dst);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// Split////////////////////////
-
-typedef Size_MatType SplitFixture;
-
-PERF_TEST_P(SplitFixture, Split,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8U, CV_32F)))
-{
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int depth = get<1>(params), channels = 3;
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    checkDeviceMaxMemoryAllocSize(srcSize, type);
-
-    Mat src(srcSize, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src);
-        vector<ocl::oclMat> oclDst(channels, ocl::oclMat(srcSize, CV_MAKE_TYPE(depth, 1)));
-
-        OCL_TEST_CYCLE() cv::ocl::split(oclSrc, oclDst);
-
-        ASSERT_EQ(3, channels);
-        Mat dst0, dst1, dst2;
-        oclDst[0].download(dst0);
-        oclDst[1].download(dst1);
-        oclDst[2].download(dst2);
-        SANITY_CHECK(dst0);
-        SANITY_CHECK(dst1);
-        SANITY_CHECK(dst2);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        vector<Mat> dst(channels, Mat(srcSize, CV_MAKE_TYPE(depth, 1)));
-        TEST_CYCLE() cv::split(src, dst);
-
-        ASSERT_EQ(3, channels);
-        Mat & dst0 = dst[0], & dst1 = dst[1], & dst2 = dst[2];
-        SANITY_CHECK(dst0);
-        SANITY_CHECK(dst1);
-        SANITY_CHECK(dst2);
-    }
-    else
-        OCL_PERF_ELSE
-}
diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp
deleted file mode 100644
index d008e8b..0000000
--- a/modules/ocl/src/arithm.cpp
+++ /dev/null
@@ -1,1804 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Shengen Yan, yanshengen@gmail.com
-//    Jiang Liyuan, jlyuan001.good@163.com
-//    Rock Li, Rock.Li@amd.com
-//    Zailong Wu, bullet@yeah.net
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-static std::vector<uchar> scalarToVector(const cv::Scalar & sc, int depth, int ocn, int cn)
-{
-    CV_Assert(ocn == cn || (ocn == 4 && cn == 3));
-
-    static const int sizeMap[] = { sizeof(uchar), sizeof(char), sizeof(ushort),
-                               sizeof(short), sizeof(int), sizeof(float), sizeof(double) };
-
-    int elemSize1 = sizeMap[depth];
-    int bufSize = elemSize1 * ocn;
-    std::vector<uchar> _buf(bufSize);
-    uchar * buf = &_buf[0];
-    scalarToRawData(sc, buf, CV_MAKE_TYPE(depth, cn));
-    memset(buf + elemSize1 * cn, 0, (ocn - cn) * elemSize1);
-
-    return _buf;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////// add subtract multiply divide min max /////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-enum { ADD = 0, SUB, MUL, DIV, ABS, ABS_DIFF, MIN, MAX };
-
-static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const Scalar & scalar, const oclMat & mask,
-                            oclMat &dst, int op_type, bool use_scalar = false)
-{
-    Context *clCxt = src1.clCxt;
-    bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
-    if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F))
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(src2.empty() || (!src2.empty() && src1.type() == src2.type() && src1.size() == src2.size()));
-    CV_Assert(mask.empty() || (!mask.empty() && mask.type() == CV_8UC1 && mask.size() == src1.size()));
-    CV_Assert(op_type >= ADD && op_type <= MAX);
-
-    dst.create(src1.size(), src1.type());
-
-    int oclChannels = src1.oclchannels(), depth = src1.depth();
-    int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize();
-    int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize();
-    int maskstep1 = mask.step, maskoffset1 = mask.offset / mask.elemSize();
-    int dststep1 = dst.step / dst.elemSize(), dstoffset1 = dst.offset / dst.elemSize();
-    std::vector<uchar> m;
-
-#ifdef ANDROID
-    size_t localThreads[3]  = { 16, 10, 1 };
-#else
-    size_t localThreads[3]  = { 16, 16, 1 };
-#endif
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    std::string kernelName = "arithm_binary_op";
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char * const WTypeMap[] = { "short", "short", "int", "int", "int", "float", "double" };
-    const char * const funcMap[] = { "FUNC_ADD", "FUNC_SUB", "FUNC_MUL", "FUNC_DIV", "FUNC_ABS", "FUNC_ABS_DIFF", "FUNC_MIN", "FUNC_MAX" };
-    const char * const channelMap[] = { "", "", "2", "4", "4" };
-    bool haveScalar = use_scalar || src2.empty();
-
-    int WDepth = depth;
-    if (haveScalar)
-        WDepth = hasDouble && WDepth == CV_64F ? CV_64F : CV_32F;
-    if (op_type == DIV)
-        WDepth = hasDouble ? CV_64F : CV_32F;
-    else if (op_type == MUL)
-        WDepth = hasDouble && (depth == CV_32S || depth == CV_64F) ? CV_64F : CV_32F;
-
-    std::string buildOptions = format("-D T=%s%s -D WT=%s%s -D convertToT=convert_%s%s%s -D %s "
-                                      "-D convertToWT=convert_%s%s",
-                                      typeMap[depth], channelMap[oclChannels],
-                                      WTypeMap[WDepth], channelMap[oclChannels],
-                                      typeMap[depth], channelMap[oclChannels], (depth >= CV_32F ? "" : (depth == CV_32S ? "_rte" : "_sat_rte")),
-                                      funcMap[op_type], WTypeMap[WDepth], channelMap[oclChannels]);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1offset1 ));
-
-    if (!src2.empty())
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2step1 ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2offset1 ));
-
-        kernelName += "_mat";
-
-        if (haveScalar)
-            buildOptions += " -D HAVE_SCALAR";
-    }
-
-    if (haveScalar)
-    {
-        const int WDepthMap[] = { CV_16S, CV_16S, CV_32S, CV_32S, CV_32S, CV_32F, CV_64F };
-        m = scalarToVector(scalar, WDepthMap[WDepth], oclChannels, src1.channels());
-
-        args.push_back( std::make_pair( m.size(), (void *)&m[0]));
-
-        kernelName += "_scalar";
-    }
-
-    if (!mask.empty())
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&maskstep1 ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&maskoffset1 ));
-
-        kernelName += "_mask";
-    }
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dststep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
-
-    openCLExecuteKernel(clCxt, mask.empty() ?
-                            (!src2.empty() ? &arithm_add : &arithm_add_scalar) :
-                            (!src2.empty() ? &arithm_add_mask : &arithm_add_scalar_mask),
-                        kernelName, globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-}
-
-void cv::ocl::add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
-{
-    arithmetic_run_generic(src1, src2, Scalar(), mask, dst, ADD);
-}
-
-void cv::ocl::add(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
-{
-    arithmetic_run_generic(src1, oclMat(), src2, mask, dst, ADD);
-}
-
-void cv::ocl::subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
-{
-    arithmetic_run_generic(src1, src2, Scalar(), mask, dst, SUB);
-}
-
-void cv::ocl::subtract(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
-{
-    arithmetic_run_generic(src1, oclMat(), src2, mask, dst, SUB);
-}
-
-void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
-{
-    const bool use_scalar = !(std::abs(scalar - 1.0) < std::numeric_limits<double>::epsilon());
-    arithmetic_run_generic(src1, src2, Scalar::all(scalar), oclMat(), dst, MUL, use_scalar);
-}
-
-void cv::ocl::multiply(double scalar, const oclMat &src, oclMat &dst)
-{
-    arithmetic_run_generic(src, oclMat(), Scalar::all(scalar), oclMat(), dst, MUL);
-}
-
-void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
-{
-    const bool use_scalar = !(std::abs(scalar - 1.0) < std::numeric_limits<double>::epsilon());
-    arithmetic_run_generic(src1, src2, Scalar::all(scalar), oclMat(), dst, DIV, use_scalar);
-}
-
-void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst)
-{
-    arithmetic_run_generic(src, oclMat(), Scalar::all(scalar), oclMat(), dst, DIV);
-}
-
-void cv::ocl::min(const oclMat &src1, const oclMat &src2, oclMat &dst)
-{
-    arithmetic_run_generic(src1, src2, Scalar::all(0), oclMat(), dst, MIN);
-}
-
-void cv::ocl::max(const oclMat &src1, const oclMat &src2, oclMat &dst)
-{
-    arithmetic_run_generic(src1, src2, Scalar::all(0), oclMat(), dst, MAX);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////////////Abs, Absdiff ////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-void cv::ocl::abs(const oclMat &src, oclMat &dst)
-{
-    // explicitly uses use_scalar (even if zero) so that the correct kernel is used
-    arithmetic_run_generic(src, oclMat(), Scalar(), oclMat(), dst, ABS, true);
-}
-
-void cv::ocl::absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst)
-{
-    arithmetic_run_generic(src1, src2, Scalar(), oclMat(), dst, ABS_DIFF);
-}
-
-void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst)
-{
-    arithmetic_run_generic(src1, oclMat(), src2, oclMat(), dst, ABS_DIFF);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////  compare ///////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp,
-                        String kernelName, const cv::ocl::ProgramEntry* source)
-{
-    dst.create(src1.size(), CV_8UC1);
-
-    int depth = src1.depth();
-    size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    int src1step1 = src1.step1(), src1offset1 = src1.offset / src1.elemSize1();
-    int src2step1 = src2.step1(), src2offset1 = src2.offset / src2.elemSize1();
-    int dststep1 = dst.step1(), dstoffset1 = dst.offset / dst.elemSize1();
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char * operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
-    std::string buildOptions = format("-D T=%s -D Operation=%s", typeMap[depth], operationMap[cmpOp]);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1offset1 ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2offset1 ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dststep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
-
-    openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-}
-
-void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp)
-{
-    if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(src1.type() == src2.type() && src1.channels() == 1);
-    CV_Assert(cmpOp >= CMP_EQ && cmpOp <= CMP_NE);
-
-    compare_run(src1, src2, dst, cmpOp, "arithm_compare", &arithm_compare);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////////// sum  //////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-enum { SUM = 0, ABS_SUM, SQR_SUM };
-
-static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int groupnum, int type, int ddepth)
-{
-    int ochannels = src.oclchannels();
-    int all_cols = src.step / src.elemSize();
-    int pre_cols = (src.offset % src.step) / src.elemSize();
-    int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
-    int invalid_cols = pre_cols + sec_cols;
-    int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
-    int offset = src.offset / src.elemSize();
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char * const funcMap[] = { "FUNC_SUM", "FUNC_ABS_SUM", "FUNC_SQR_SUM" };
-    const char * const channelMap[] = { " ", " ", "2", "4", "4" };
-    String buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s -D %s",
-                                 typeMap[src.depth()], channelMap[ochannels],
-                                 typeMap[ddepth], channelMap[ochannels],
-                                 typeMap[ddepth], channelMap[ochannels],
-                                 funcMap[type]);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));
-    size_t globalThreads[3] = { groupnum * 256, 1, 1 };
-
-#ifdef ANDROID
-    openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", globalThreads, NULL,
-                        args, -1, -1, buildOptions.c_str());
-#else
-    size_t localThreads[3] = { 256, 1, 1 };
-    openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-#endif
-}
-
-template <typename T>
-Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
-{
-    CV_Assert(src.step % src.elemSize() == 0);
-
-    size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
-    CV_Assert(groupnum != 0);
-
-    int dbsize = groupnum * src.oclchannels();
-    Context *clCxt = src.clCxt;
-
-    AutoBuffer<T> _buf(dbsize);
-    T *p = (T*)_buf;
-    memset(p, 0, dbsize * sizeof(T));
-
-    cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
-    arithmetic_sum_buffer_run(src, dstBuffer, groupnum, type, ddepth);
-    openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T));
-    openCLFree(dstBuffer);
-
-    Scalar s = Scalar::all(0.0);
-    for (int i = 0; i < dbsize;)
-         for (int j = 0; j < src.oclchannels(); j++, i++)
-            s.val[j] += p[i];
-
-    return s;
-}
-
-typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth);
-
-Scalar cv::ocl::sum(const oclMat &src)
-{
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return Scalar::all(0);
-    }
-    static sumFunc functab[3] =
-    {
-        arithmetic_sum<int>,
-        arithmetic_sum<float>,
-        arithmetic_sum<double>
-    };
-
-    int ddepth = std::max(src.depth(), CV_32S);
-    sumFunc func = functab[ddepth - CV_32S];
-    return func(src, SUM, ddepth);
-}
-
-Scalar cv::ocl::absSum(const oclMat &src)
-{
-    int sdepth = src.depth();
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && sdepth == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return cv::Scalar::all(0);
-    }
-
-    if (sdepth == CV_8U || sdepth == CV_16U)
-        return sum(src);
-
-    static sumFunc functab[3] =
-    {
-        arithmetic_sum<int>,
-        arithmetic_sum<float>,
-        arithmetic_sum<double>
-    };
-
-    int ddepth = std::max(sdepth, CV_32S);
-    sumFunc func = functab[ddepth - CV_32S];
-    return func(src, ABS_SUM, ddepth);
-}
-
-Scalar cv::ocl::sqrSum(const oclMat &src)
-{
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return cv::Scalar::all(0);
-    }
-    static sumFunc functab[3] =
-    {
-        arithmetic_sum<int>,
-        arithmetic_sum<float>,
-        arithmetic_sum<double>
-    };
-
-    int ddepth = std::max(src.depth(), CV_32S);
-    sumFunc func = functab[ddepth - CV_32S];
-    return func(src, SQR_SUM, ddepth);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////// meanStdDev //////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
-{
-    if (src.depth() == CV_64F && !src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    double total = 1.0 / src.size().area();
-
-    mean = sum(src);
-    stddev = sqrSum(src);
-
-    for (int i = 0; i < 4; ++i)
-    {
-        mean[i] *= total;
-        stddev[i] = std::sqrt(std::max(stddev[i] * total - mean.val[i] * mean.val[i] , 0.));
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////// minMax  /////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-template <typename T, typename WT>
-static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem &dst, int groupnum, String kernelName)
-{
-    int all_cols = src.step / src.elemSize();
-    int pre_cols = (src.offset % src.step) / src.elemSize();
-    int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
-    int invalid_cols = pre_cols + sec_cols;
-    int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
-    int offset = src.offset / src.elemSize();
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char * const channelMap[] = { " ", " ", "2", "4", "4" };
-
-    std::ostringstream stream;
-    stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()];
-    if (std::numeric_limits<T>::is_integer)
-    {
-        stream << " -D MAX_VAL=" << (WT)std::numeric_limits<T>::max();
-        stream << " -D MIN_VAL=" << (WT)std::numeric_limits<T>::min();
-    }
-    else
-        stream << " -D DEPTH_" << src.depth();
-    std::string buildOptions = stream.str();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
-
-    int minvalid_cols = 0, moffset = 0;
-    if (!mask.empty())
-    {
-        int mall_cols = mask.step / mask.elemSize();
-        int mpre_cols = (mask.offset % mask.step) / mask.elemSize();
-        int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / mask.elemSize() - 1;
-        minvalid_cols = mpre_cols + msec_cols;
-        moffset = mask.offset / mask.elemSize();
-
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&minvalid_cols ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&moffset ));
-
-        kernelName += "_mask";
-    }
-
-    size_t globalThreads[3] = {groupnum * 256, 1, 1};
-    size_t localThreads[3] = {256, 1, 1};
-
-    // kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
-    openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-}
-
-template <typename T, typename WT>
-void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
-{
-    size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
-    CV_Assert(groupnum != 0);
-
-    int dbsize = groupnum * 2 * src.elemSize();
-    oclMat buf;
-    ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
-
-    cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
-    arithmetic_minMax_run<T, WT>(src, mask, buf_data, groupnum, "arithm_op_minMax");
-
-    Mat matbuf = Mat(buf);
-    T *p = matbuf.ptr<T>();
-    if (minVal != NULL)
-    {
-        *minVal = std::numeric_limits<double>::max();
-        for (int i = 0, end = src.oclchannels() * (int)groupnum; i < end; i++)
-            *minVal = *minVal < p[i] ? *minVal : p[i];
-    }
-    if (maxVal != NULL)
-    {
-        *maxVal = -std::numeric_limits<double>::max();
-        for (int i = src.oclchannels() * (int)groupnum, end = i << 1; i < end; i++)
-            *maxVal = *maxVal > p[i] ? *maxVal : p[i];
-    }
-}
-
-typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
-
-void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
-{
-    CV_Assert(src.channels() == 1);
-    CV_Assert(src.size() == mask.size() || mask.empty());
-    CV_Assert(src.step % src.elemSize() == 0);
-
-    if (minVal == NULL && maxVal == NULL)
-        return;
-
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    static minMaxFunc functab[] =
-    {
-        arithmetic_minMax<uchar, int>,
-        arithmetic_minMax<char, int>,
-        arithmetic_minMax<ushort, int>,
-        arithmetic_minMax<short, int>,
-        arithmetic_minMax<int, int>,
-        arithmetic_minMax<float, float>,
-        arithmetic_minMax<double, double>,
-        0
-    };
-
-    minMaxFunc func = functab[src.depth()];
-    CV_Assert(func != 0);
-
-    func(src, minVal, maxVal, mask);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////// norm /////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-double cv::ocl::norm(const oclMat &src1, int normType)
-{
-    CV_Assert((normType & NORM_RELATIVE) == 0);
-    return norm(src1, oclMat(), normType);
-}
-
-static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff, int ntype)
-{
-    Context *clCxt = src1.clCxt;
-    if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-    CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0));
-
-    if (src2.empty() && (src1.depth() == CV_8U || src1.depth() == CV_16U))
-    {
-        src1.convertTo(diff, CV_32S);
-        return;
-    }
-
-    int ddepth = std::max(src1.depth(), CV_32S);
-    if (ntype == NORM_L2)
-        ddepth = std::max<int>(CV_32F, ddepth);
-
-    diff.create(src1.size(), CV_MAKE_TYPE(ddepth, src1.channels()));
-    CV_Assert(diff.step % diff.elemSize() == 0);
-
-    int oclChannels = src1.oclchannels(), sdepth = src1.depth();
-    int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize();
-    int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize();
-    int diffstep1 = diff.step / diff.elemSize(), diffoffset1 = diff.offset / diff.elemSize();
-
-    String kernelName = "arithm_absdiff_nonsaturate";
-#ifdef ANDROID
-    size_t localThreads[3]  = { 16, 10, 1 };
-#else
-    size_t localThreads[3]  = { 16, 16, 1 };
-#endif
-    size_t globalThreads[3] = { diff.cols, diff.rows, 1 };
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char * const channelMap[] = { "", "", "2", "4", "4" };
-
-    std::string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s",
-                                      typeMap[sdepth], channelMap[oclChannels],
-                                      typeMap[ddepth], channelMap[oclChannels],
-                                      typeMap[ddepth], channelMap[oclChannels]);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1offset1 ));
-
-    if (!src2.empty())
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2step1 ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2offset1 ));
-
-        kernelName += "_binary";
-        buildOptions += " -D BINARY";
-    }
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&diff.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&diffstep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&diffoffset1 ));
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
-
-    openCLExecuteKernel(clCxt, &arithm_absdiff_nonsaturate,
-                        kernelName, globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-}
-
-double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
-{
-    if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return -1;
-    }
-    CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size()));
-
-    bool isRelative = (normType & NORM_RELATIVE) != 0;
-    normType &= NORM_TYPE_MASK;
-    CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
-
-    Scalar s;
-    int cn = src1.channels();
-    double r = 0;
-    oclMat diff;
-
-    arithm_absdiff_nonsaturate_run(src1, src2, diff, normType);
-
-    switch (normType)
-    {
-    case NORM_INF:
-        diff = diff.reshape(1);
-        minMax(diff, NULL, &r);
-        break;
-    case NORM_L1:
-        s = sum(diff);
-        for (int i = 0; i < cn; ++i)
-            r += s[i];
-        break;
-    case NORM_L2:
-        s = sqrSum(diff);
-        for (int i = 0; i < cn; ++i)
-            r += s[i];
-        r = std::sqrt(r);
-        break;
-    }
-    if (isRelative)
-        r = r / (norm(src2, normType) + DBL_EPSILON);
-
-    return r;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////////// flip //////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS };
-
-static void arithmetic_flip_run(const oclMat &src, oclMat &dst, String kernelName, int flipType)
-{
-    int cols = dst.cols, rows = dst.rows;
-    if ((cols == 1 && flipType == FLIP_COLS) ||
-            (rows == 1 && flipType == FLIP_ROWS) ||
-            (rows == 1 && cols == 1 && flipType == FLIP_BOTH))
-    {
-        src.copyTo(dst);
-        return;
-    }
-
-    cols = flipType == FLIP_COLS ? divUp(cols, 2) : cols;
-    rows = flipType & FLIP_ROWS ? divUp(rows, 2) : rows;
-
-    const char * const channelMap[] = { "", "", "2", "4", "4" };
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    std::string buildOptions = format("-D T=%s%s", typeMap[dst.depth()], channelMap[dst.oclchannels()]);
-
-    size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { cols, rows, 1 };
-
-    int elemSize = src.elemSize();
-    int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
-    int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
-
-    openCLExecuteKernel(src.clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args,
-                        -1, -1, buildOptions.c_str());
-}
-
-void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
-{
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    dst.create(src.size(), src.type());
-
-    if (flipCode == 0)
-        arithmetic_flip_run(src, dst, "arithm_flip_rows", FLIP_ROWS);
-    else if (flipCode > 0)
-        arithmetic_flip_run(src, dst, "arithm_flip_cols", FLIP_COLS);
-    else
-        arithmetic_flip_run(src, dst, "arithm_flip_rows_cols", FLIP_BOTH);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////////// LUT  //////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void arithmetic_lut_run(const oclMat &src, const oclMat &lut, oclMat &dst, String kernelName)
-{
-    int sdepth = src.depth();
-    int src_step1 = src.step1(), dst_step1 = dst.step1();
-    int src_offset1 = src.offset / src.elemSize1(), dst_offset1 = dst.offset / dst.elemSize1();
-    int lut_offset1 = lut.offset / lut.elemSize1() + (sdepth == CV_8U ? 0 : 128) * lut.channels();
-    int cols1 = src.cols * src.oclchannels();
-
-    size_t localSize[] = { 16, 16, 1 };
-    size_t globalSize[] = { lut.channels() == 1 ? cols1 : src.cols, src.rows, 1 };
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    std::string buildOptions = format("-D srcT=%s -D dstT=%s", typeMap[sdepth], typeMap[dst.depth()]);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&lut.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols1));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut_offset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
-
-    openCLExecuteKernel(src.clCxt, &arithm_LUT, kernelName, globalSize, localSize,
-                        args, lut.oclchannels(), -1, buildOptions.c_str());
-}
-
-void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
-{
-    if (!lut.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && lut.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    int cn = src.channels(), depth = src.depth();
-
-    CV_Assert(depth == CV_8U || depth == CV_8S);
-    CV_Assert(lut.channels() == 1 || lut.channels() == src.channels());
-    CV_Assert(lut.rows == 1 && lut.cols == 256);
-
-    dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn));
-    arithmetic_lut_run(src, lut, dst, "LUT");
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////// exp log /////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void arithmetic_exp_log_sqrt_run(const oclMat &src, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
-{
-    Context  *clCxt = src.clCxt;
-    if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert( src.depth() == CV_32F || src.depth() == CV_64F);
-    dst.create(src.size(), src.type());
-
-    int ddepth = dst.depth();
-    int cols1 = src.cols * src.oclchannels();
-    int srcoffset1 = src.offset / src.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();
-    int srcstep1 = src.step1(), dststep1 = dst.step1();
-
-#ifdef ANDROID
-    size_t localThreads[3]  = { 64, 2, 1 };
-#else
-    size_t localThreads[3]  = { 64, 4, 1 };
-#endif
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    std::string buildOptions = format("-D srcT=%s",
-                                      ddepth == CV_32F ? "float" : "double");
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcoffset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcstep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dststep1 ));
-
-    openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads,
-                        args, src.oclchannels(), -1, buildOptions.c_str());
-}
-
-void cv::ocl::exp(const oclMat &src, oclMat &dst)
-{
-    arithmetic_exp_log_sqrt_run(src, dst, "arithm_exp", &arithm_exp);
-}
-
-void cv::ocl::log(const oclMat &src, oclMat &dst)
-{
-    arithmetic_exp_log_sqrt_run(src, dst, "arithm_log", &arithm_log);
-}
-
-void cv::ocl::sqrt(const oclMat &src, oclMat &dst)
-{
-    arithmetic_exp_log_sqrt_run(src, dst, "arithm_sqrt", &arithm_sqrt);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////// magnitude phase ///////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName)
-{
-    int depth = dst.depth();
-
-#ifdef ANDROID
-    size_t localThreads[3]  = { 64, 2, 1 };
-#else
-    size_t localThreads[3]  = { 64, 4, 1 };
-#endif
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
-    int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
-    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
-
-    const char * const channelMap[] = { "", "", "2", "4", "4" };
-    std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[dst.channels()]);
-
-    openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
-}
-
-void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst)
-{
-    if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(src1.type() == src2.type() && src1.size() == src2.size() &&
-              (src1.depth() == CV_32F || src1.depth() == CV_64F));
-
-    dst.create(src1.size(), src1.type());
-    arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude");
-}
-
-static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
-{
-    int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels();
-    int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1();
-    int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1();
-    int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();
-
-#ifdef ANDROID
-    size_t localThreads[3]  = { 64, 2, 1 };
-#else
-    size_t localThreads[3]  = { 64, 4, 1 };
-#endif
-    size_t globalThreads[3] = { cols1, dst.rows, 1 };
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1offset1 ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2offset1 ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dststep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
-
-    openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
-}
-
-void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
-{
-    if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
-    CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0);
-
-    Angle.create(x.size(), x.type());
-    arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////////// cartToPolar ///////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
-                                String kernelName, bool angleInDegrees)
-{
-    int channels = src1.oclchannels();
-    int depth = src1.depth();
-
-    int cols = src1.cols * channels;
-
-#ifdef ANDROID
-    size_t localThreads[3]  = { 64, 2, 1 };
-#else
-    size_t localThreads[3]  = { 64, 4, 1 };
-#endif
-    size_t globalThreads[3] = { cols, src1.rows, 1 };
-
-    int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1();
-    int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1();
-    int dst_mag_step = dst_mag.step / dst_mag.elemSize1(), dst_mag_offset = dst_mag.offset / dst_mag.elemSize1();
-    int dst_cart_step = dst_cart.step / dst_cart.elemSize1(), dst_cart_offset = dst_cart.offset / dst_cart.elemSize1();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst_mag.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst_cart.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
-
-    openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args,
-                        -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN");
-}
-
-void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat &angle, bool angleInDegrees)
-{
-    if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
-
-    mag.create(x.size(), x.type());
-    angle.create(x.size(), x.type());
-
-    arithmetic_cartToPolar_run(x, y, mag, angle, "arithm_cartToPolar", angleInDegrees);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////////// polarToCart ///////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
-                        String kernelName)
-{
-    int channels = src2.oclchannels(), depth = src2.depth();
-    int cols = src2.cols * channels, rows = src2.rows;
-
-#ifdef ANDROID
-    size_t localThreads[3]  = { 64, 2, 1 };
-#else
-    size_t localThreads[3]  = { 64, 4, 1 };
-#endif
-    size_t globalThreads[3] = { cols, rows, 1 };
-
-    int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1();
-    int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1();
-    int dst1_step = dst1.step / dst1.elemSize1(), dst1_offset = dst1.offset / dst1.elemSize1();
-    int dst2_step = dst2.step / dst2.elemSize1(), dst2_offset = dst2.offset / dst2.elemSize1();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    if (src1.data)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
-    }
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
-
-    openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads,
-                        args, -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN");
-}
-
-void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees)
-{
-    if (!magnitude.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && magnitude.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(angle.depth() == CV_32F || angle.depth() == CV_64F);
-    CV_Assert(magnitude.size() == angle.size() && magnitude.type() == angle.type());
-
-    x.create(angle.size(), angle.type());
-    y.create(angle.size(), angle.type());
-
-    if ( magnitude.data )
-        arithmetic_ptc_run(magnitude, angle, x, y, angleInDegrees, "arithm_polarToCart_mag");
-    else
-        arithmetic_ptc_run(magnitude, angle, x, y, angleInDegrees, "arithm_polarToCart");
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////// minMaxLoc ////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void arithmetic_minMaxLoc_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum)
-{
-    std::vector<std::pair<size_t , const void *> > args;
-    int all_cols = src.step / (vlen * src.elemSize1());
-    int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1());
-    int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize1() - 1) / (vlen * src.elemSize1()) - 1;
-    int invalid_cols = pre_cols + sec_cols;
-    int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
-    int offset = src.offset / (vlen * src.elemSize1());
-    int repeat_s = src.offset / src.elemSize1() - offset * vlen;
-    int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));
-    char build_options[50];
-    sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
-    size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
-
-    // kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
-    openCLExecuteKernel(src.clCxt, &arithm_minMaxLoc, "arithm_op_minMaxLoc", gt, lt, args, -1, -1, build_options);
-}
-
-static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum)
-{
-    std::vector<std::pair<size_t , const void *> > args;
-    size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
-    char build_options[50];
-    if (src.oclchannels() == 1)
-    {
-        int cols = (src.cols - 1) / vlen + 1;
-        int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
-        int offset = src.offset / src.elemSize1();
-        int repeat_me = vlen - (mask.cols % vlen == 0 ? vlen : mask.cols % vlen);
-        int minvalid_cols = mask.step / (vlen * mask.elemSize1()) - cols;
-        int moffset = mask.offset / mask.elemSize1();
-        int elemnum = cols * src.rows;
-        sprintf(build_options, "-D DEPTH_%d -D REPEAT_E%d", src.depth(), repeat_me);
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&minvalid_cols ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&moffset ));
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));
-
-        // kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
-        openCLExecuteKernel(src.clCxt, &arithm_minMaxLoc_mask, "arithm_op_minMaxLoc_mask", gt, lt, args, -1, -1, build_options);
-    }
-}
-
-template <typename T>
-void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
-                          Point *minLoc, Point *maxLoc, const oclMat &mask)
-{
-    CV_Assert(src.oclchannels() == 1);
-    size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
-    CV_Assert(groupnum != 0);
-    int minloc = -1 , maxloc = -1;
-    int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ;
-    Context *clCxt = src.clCxt;
-    cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
-    *minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
-
-    if (mask.empty())
-        arithmetic_minMaxLoc_run(src, dstBuffer, vlen, groupnum);
-    else
-        arithmetic_minMaxLoc_mask_run(src, mask, dstBuffer, vlen, groupnum);
-
-    AutoBuffer<T> _buf(groupnum * vlen * 4);
-    T *p = (T*)_buf;
-    memset(p, 0, dbsize);
-
-    openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
-    for (int i = 0; i < vlen * (int)groupnum; i++)
-    {
-        *minVal = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? *minVal : p[i];
-        minloc = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? minloc : cvRound(p[i + 2 * vlen * groupnum]);
-    }
-    for (int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
-    {
-        *maxVal = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? *maxVal : p[i];
-        maxloc = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? maxloc : cvRound(p[i + 2 * vlen * groupnum]);
-    }
-
-    int pre_rows = src.offset / src.step;
-    int pre_cols = (src.offset % src.step) / src.elemSize1();
-    int wholecols = src.step / src.elemSize1();
-    if ( minLoc )
-    {
-        if ( minloc >= 0 )
-        {
-            minLoc->y = minloc / wholecols - pre_rows;
-            minLoc->x = minloc % wholecols - pre_cols;
-        }
-        else
-            minLoc->x = minLoc->y = -1;
-    }
-    if ( maxLoc )
-    {
-        if ( maxloc >= 0 )
-        {
-            maxLoc->y = maxloc / wholecols - pre_rows;
-            maxLoc->x = maxloc % wholecols - pre_cols;
-        }
-        else
-            maxLoc->x = maxLoc->y = -1;
-    }
-
-    openCLSafeCall(clReleaseMemObject(dstBuffer));
-}
-
-typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal,
-                              Point *minLoc, Point *maxLoc, const oclMat &mask);
-
-void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
-                        Point *minLoc, Point *maxLoc, const oclMat &mask)
-{
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    static minMaxLocFunc functab[2] =
-    {
-        arithmetic_minMaxLoc<float>,
-        arithmetic_minMaxLoc<double>
-    };
-
-    minMaxLocFunc func;
-    func = functab[(int)src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)];
-    func(src, minVal, maxVal, minLoc, maxLoc, mask);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-///////////////////////////// countNonZero ///////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int groupnum, String kernelName)
-{
-    int ochannels = src.oclchannels();
-    int all_cols = src.step / src.elemSize();
-    int pre_cols = (src.offset % src.step) / src.elemSize();
-    int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
-    int invalid_cols = pre_cols + sec_cols;
-    int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
-    int offset = src.offset / src.elemSize();
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char * const channelMap[] = { " ", " ", "2", "4", "4" };
-    String buildOptions = format("-D srcT=%s%s -D dstT=int%s", typeMap[src.depth()], channelMap[ochannels],
-                                 channelMap[ochannels]);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));
-
-    size_t globalThreads[3] = { groupnum * 256, 1, 1 };
-
-#ifdef ANDROID
-    openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, globalThreads, NULL,
-                        args, -1, -1, buildOptions.c_str());
-#else
-    size_t localThreads[3] = { 256, 1, 1 };
-    openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-#endif
-}
-
-int cv::ocl::countNonZero(const oclMat &src)
-{
-    CV_Assert(src.step % src.elemSize() == 0);
-    CV_Assert(src.channels() == 1);
-
-    Context *clCxt = src.clCxt;
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "selected device doesn't support double");
-        return -1;
-    }
-
-    size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
-    CV_Assert(groupnum != 0);
-    int dbsize = groupnum;
-
-    String kernelName = "arithm_op_nonzero";
-
-    AutoBuffer<int> _buf(dbsize);
-    int *p = (int*)_buf, nonzero = 0;
-    memset(p, 0, dbsize * sizeof(int));
-
-    cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int));
-    arithmetic_countNonZero_run(src, dstBuffer, groupnum, kernelName);
-    openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int));
-
-    for (int i = 0; i < dbsize; i++)
-        nonzero += p[i];
-
-    openCLSafeCall(clReleaseMemObject(dstBuffer));
-
-    return nonzero;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////////bitwise_op////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void bitwise_unary_run(const oclMat &src1, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
-{
-    dst.create(src1.size(), src1.type());
-
-    int channels = dst.oclchannels();
-    int depth = dst.depth();
-
-    int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
-        {4, 4, 4, 4, 1, 1, 1},
-        {4, 4, 4, 4, 1, 1, 1},
-        {4, 4, 4, 4, 1, 1, 1}
-    };
-
-    size_t vector_length = vector_lengths[channels - 1][depth];
-    int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
-    int cols = divUp(dst.cols * channels + offset_cols, vector_length);
-
-#ifdef ANDROID
-    size_t localThreads[3]  = { 64, 2, 1 };
-#else
-    size_t localThreads[3]  = { 64, 4, 1 };
-#endif
-    size_t globalThreads[3] = { cols, dst.rows, 1 };
-
-    int dst_step1 = dst.cols * dst.elemSize();
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
-
-    openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
-}
-
-enum { AND = 0, OR, XOR };
-
-static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Scalar& src3, const oclMat &mask,
-                               oclMat &dst, int operationType)
-{
-    CV_Assert(operationType >= AND && operationType <= XOR);
-    CV_Assert(src2.empty() || (!src2.empty() && src1.type() == src2.type() && src1.size() == src2.size()));
-    CV_Assert(mask.empty() || (!mask.empty() && mask.type() == CV_8UC1 && mask.size() == src1.size()));
-
-    dst.create(src1.size(), src1.type());
-    oclMat m;
-
-    const char operationMap[] = { '&', '|', '^' };
-    std::string kernelName("arithm_bitwise_binary");
-
-    int vlen = std::min<int>(8, src1.elemSize1() * src1.oclchannels());
-    std::string vlenstr = vlen > 1 ? format("%d", vlen) : "";
-    std::string buildOptions = format("-D Operation=%c -D vloadn=vload%s -D vstoren=vstore%s -D elemSize=%d -D vlen=%d"
-                                      " -D ucharv=uchar%s",
-                                      operationMap[operationType], vlenstr.c_str(), vlenstr.c_str(),
-                                      (int)src1.elemSize(), vlen, vlenstr.c_str());
-
-#ifdef ANDROID
-    size_t localThreads[3]  = { 16, 10, 1 };
-#else
-    size_t localThreads[3]  = { 16, 16, 1 };
-#endif
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
-
-    if (src2.empty())
-    {
-        m.create(1, 1, dst.type());
-        m.setTo(src3);
-
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&m.data ));
-
-        kernelName += "_scalar";
-    }
-    else
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset ));
-    }
-
-    if (!mask.empty())
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.offset ));
-
-        kernelName += "_mask";
-    }
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
-
-    openCLExecuteKernel(src1.clCxt, mask.empty() ? (!src2.empty() ? &arithm_bitwise_binary : &arithm_bitwise_binary_scalar) :
-                                              (!src2.empty() ? &arithm_bitwise_binary_mask : &arithm_bitwise_binary_scalar_mask),
-                        kernelName, globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-}
-
-void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
-{
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    dst.create(src.size(), src.type());
-    bitwise_unary_run(src, dst, "arithm_bitwise_not", &arithm_bitwise_not);
-}
-
-void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
-{
-    bitwise_binary_run(src1, src2, Scalar(), mask, dst, OR);
-}
-
-void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
-{
-    bitwise_binary_run(src1, oclMat(), src2, mask, dst, OR);
-}
-
-void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
-{
-    bitwise_binary_run(src1, src2, Scalar(), mask, dst, AND);
-}
-
-void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
-{
-    bitwise_binary_run(src1, oclMat(), src2, mask, dst, AND);
-}
-
-void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
-{
-    bitwise_binary_run(src1, src2, Scalar(), mask, dst, XOR);
-}
-
-void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
-{
-    bitwise_binary_run(src1, oclMat(), src2, mask, dst, XOR);
-}
-
-oclMat cv::ocl::operator ~ (const oclMat &src)
-{
-    return oclMatExpr(src, oclMat(), MAT_NOT);
-}
-
-oclMat cv::ocl::operator | (const oclMat &src1, const oclMat &src2)
-{
-    return oclMatExpr(src1, src2, MAT_OR);
-}
-
-oclMat cv::ocl::operator & (const oclMat &src1, const oclMat &src2)
-{
-    return oclMatExpr(src1, src2, MAT_AND);
-}
-
-oclMat cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2)
-{
-    return oclMatExpr(src1, src2, MAT_XOR);
-}
-
-cv::ocl::oclMatExpr cv::ocl::operator + (const oclMat &src1, const oclMat &src2)
-{
-    return oclMatExpr(src1, src2, cv::ocl::MAT_ADD);
-}
-
-cv::ocl::oclMatExpr cv::ocl::operator - (const oclMat &src1, const oclMat &src2)
-{
-    return oclMatExpr(src1, src2, cv::ocl::MAT_SUB);
-}
-
-cv::ocl::oclMatExpr cv::ocl::operator * (const oclMat &src1, const oclMat &src2)
-{
-    return oclMatExpr(src1, src2, cv::ocl::MAT_MUL);
-}
-
-cv::ocl::oclMatExpr cv::ocl::operator / (const oclMat &src1, const oclMat &src2)
-{
-    return oclMatExpr(src1, src2, cv::ocl::MAT_DIV);
-}
-
-void oclMatExpr::assign(oclMat& m) const
-{
-    switch (op)
-    {
-        case MAT_ADD:
-            add(a, b, m);
-            break;
-        case MAT_SUB:
-            subtract(a, b, m);
-            break;
-        case MAT_MUL:
-            multiply(a, b, m);
-            break;
-        case MAT_DIV:
-            divide(a, b, m);
-            break;
-        case MAT_NOT:
-            bitwise_not(a, m);
-            break;
-        case MAT_AND:
-            bitwise_and(a, b, m);
-            break;
-        case MAT_OR:
-            bitwise_or(a, b, m);
-            break;
-        case MAT_XOR:
-            bitwise_xor(a, b, m);
-            break;
-    }
-}
-
-oclMatExpr::operator oclMat() const
-{
-    oclMat m;
-    assign(m);
-    return m;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////////////// transpose ////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-#define TILE_DIM   (32)
-#define BLOCK_ROWS (256 / TILE_DIM)
-
-static void transpose_run(const oclMat &src, oclMat &dst, String kernelName, bool inplace = false)
-{
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char channelsString[] = { ' ', ' ', '2', '4', '4' };
-    std::string buildOptions = format("-D T=%s%c", typeMap[src.depth()],
-                                      channelsString[src.channels()]);
-
-    size_t localThreads[3]  = { TILE_DIM, BLOCK_ROWS, 1 };
-    size_t globalThreads[3] = { src.cols, inplace ? src.rows : divUp(src.rows, TILE_DIM) * BLOCK_ROWS, 1 };
-
-    int srcstep1 = src.step / src.elemSize(), dststep1 = dst.step / dst.elemSize();
-    int srcoffset1 = src.offset / src.elemSize(), dstoffset1 = dst.offset / dst.elemSize();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcstep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dststep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcoffset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
-
-    openCLExecuteKernel(src.clCxt, &arithm_transpose, kernelName, globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-}
-
-void cv::ocl::transpose(const oclMat &src, oclMat &dst)
-{
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    if ( src.data == dst.data && src.cols == src.rows && dst.offset == src.offset
-         && dst.size() == src.size())
-        transpose_run( src, dst, "transpose_inplace", true);
-    else
-    {
-        dst.create(src.cols, src.rows, src.type());
-        transpose_run( src, dst, "transpose");
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////// addWeighted ///////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst)
-{
-    Context *clCxt = src1.clCxt;
-    bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
-    if (!hasDouble && src1.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(src1.size() ==  src2.size() && src1.type() == src2.type());
-    dst.create(src1.size(), src1.type());
-
-    int channels = dst.oclchannels();
-    int depth = dst.depth();
-
-    int cols1 = src1.cols * channels;
-    int src1step1 = src1.step1(), src1offset1 = src1.offset / src1.elemSize1();
-    int src2step1 = src2.step1(), src2offset1 = src2.offset / src1.elemSize1();
-    int dststep1 = dst.step1(), dstoffset1 = dst.offset / dst.elemSize1();
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    std::string buildOptions = format("-D T=%s -D WT=%s -D convertToT=convert_%s%s",
-                                      typeMap[depth], hasDouble ? "double" : "float", typeMap[depth],
-                                      depth >= CV_32F ? "" : "_sat_rte");
-
-    size_t globalThreads[3] = { cols1, dst.rows, 1};
-
-    float alpha_f = static_cast<float>(alpha),
-            beta_f = static_cast<float>(beta),
-            gama_f = static_cast<float>(gama);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1offset1));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2offset1));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dststep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstoffset1));
-
-    if (!hasDouble)
-    {
-        args.push_back( std::make_pair( sizeof(cl_float), (void *)&alpha_f ));
-        args.push_back( std::make_pair( sizeof(cl_float), (void *)&beta_f ));
-        args.push_back( std::make_pair( sizeof(cl_float), (void *)&gama_f ));
-    }
-    else
-    {
-        args.push_back( std::make_pair( sizeof(cl_double), (void *)&alpha ));
-        args.push_back( std::make_pair( sizeof(cl_double), (void *)&beta ));
-        args.push_back( std::make_pair( sizeof(cl_double), (void *)&gama ));
-    }
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
-
-#ifdef ANDROID
-    openCLExecuteKernel(clCxt, &arithm_addWeighted, "addWeighted", globalThreads, NULL,
-                        args, -1, -1, buildOptions.c_str());
-#else
-    size_t localThreads[3] = { 256, 1, 1};
-    openCLExecuteKernel(clCxt, &arithm_addWeighted, "addWeighted", globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////// Pow //////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
-{
-    int channels = dst.oclchannels();
-    int depth = dst.depth();
-
-    size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    const char * const typeStr = depth == CV_32F ? "float" : "double";
-    const char * const channelMap[] = { "", "", "2", "4", "4" };
-    std::string buildOptions = format("-D VT=%s%s -D T=%s", typeStr, channelMap[channels], typeStr);
-
-    int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
-    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
-
-    float pf = static_cast<float>(p);
-    if(src.depth() == CV_32F)
-        args.push_back( std::make_pair( sizeof(cl_float), (void *)&pf ));
-    else
-        args.push_back( std::make_pair( sizeof(cl_double), (void *)&p ));
-
-    openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
-}
-
-void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
-{
-    if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(x.depth() == CV_32F || x.depth() == CV_64F);
-    y.create(x.size(), x.type());
-
-    arithmetic_pow_run(x, p, y, "arithm_pow", &arithm_pow);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////////////// setIdentity //////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar)
-{
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(src.step % src.elemSize() == 0);
-
-    int src_step1 = src.step / src.elemSize(), src_offset1 = src.offset / src.elemSize();
-    size_t local_threads[] = { 16, 16, 1 };
-    size_t global_threads[] = { src.cols, src.rows, 1 };
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char * const channelMap[] = { "", "", "2", "4", "4" };
-    String buildOptions = format("-D T=%s%s", typeMap[src.depth()], channelMap[src.oclchannels()]);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
-
-    oclMat sc(1, 1, src.type(), scalar);
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sc.data ));
-
-    openCLExecuteKernel(src.clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads,
-                        args, -1, -1, buildOptions.c_str());
-}
-
-//////////////////////////////////////////////////////////////////////////////
-////////////////////////////////// Repeat ////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-
-void cv::ocl::repeat(const oclMat & src, int ny, int nx, oclMat & dst)
-{
-    CV_Assert(nx > 0 && ny > 0);
-    dst.create(src.rows * ny, src.cols * nx, src.type());
-
-    for (int y = 0; y < ny; ++y)
-        for (int x = 0; x < nx; ++x)
-        {
-            Rect roi(x * src.cols, y * src.rows, src.cols, src.rows);
-            oclMat hdr = dst(roi);
-            src.copyTo(hdr);
-        }
-}
diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp
deleted file mode 100644
index c688366..0000000
--- a/modules/ocl/src/bgfg_mog.cpp
+++ /dev/null
@@ -1,639 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma, jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace cv
-{
-    namespace ocl
-    {
-        typedef struct _contant_struct
-        {
-            cl_float c_Tb;
-            cl_float c_TB;
-            cl_float c_Tg;
-            cl_float c_varInit;
-            cl_float c_varMin;
-            cl_float c_varMax;
-            cl_float c_tau;
-            cl_uchar c_shadowVal;
-        }contant_struct;
-
-        cl_mem cl_constants = NULL;
-        float c_TB;
-    }
-}
-
-#if defined _MSC_VER
-#define snprintf sprintf_s
-#endif
-
-namespace cv { namespace ocl { namespace device
-{
-    namespace mog
-    {
-        void mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
-            int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma);
-
-        void getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio);
-
-        void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau,
-                            unsigned char shadowVal);
-
-        void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean,
-                      float alphaT, float prune, bool detectShadows, int nmixtures);
-
-        void getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures);
-    }
-}}}
-
-namespace mog
-{
-    const int defaultNMixtures = 5;
-    const int defaultHistory = 200;
-    const float defaultBackgroundRatio = 0.7f;
-    const float defaultVarThreshold = 2.5f * 2.5f;
-    const float defaultNoiseSigma = 30.0f * 0.5f;
-    const float defaultInitialWeight = 0.05f;
-}
-void cv::ocl::BackgroundSubtractor::operator()(const oclMat&, oclMat&, float)
-{
-
-}
-cv::ocl::BackgroundSubtractor::~BackgroundSubtractor()
-{
-
-}
-
-cv::ocl::MOG::MOG(int nmixtures) :
-frameSize_(0, 0), frameType_(0), nframes_(0)
-{
-    nmixtures_ = std::min(nmixtures > 0 ? nmixtures : mog::defaultNMixtures, 8);
-    history = mog::defaultHistory;
-    varThreshold = mog::defaultVarThreshold;
-    backgroundRatio = mog::defaultBackgroundRatio;
-    noiseSigma = mog::defaultNoiseSigma;
-}
-
-void cv::ocl::MOG::initialize(cv::Size frameSize, int frameType)
-{
-    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
-
-    frameSize_ = frameSize;
-    frameType_ = frameType;
-
-    int ch = CV_MAT_CN(frameType);
-    int work_ch = ch;
-
-    // for each gaussian mixture of each pixel bg model we store
-    // the mixture sort key (w/sum_of_variances), the mixture weight (w),
-    // the mean (nchannels values) and
-    // the diagonal covariance matrix (another nchannels values)
-
-    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-    sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
-    var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
-
-    weight_.setTo(cv::Scalar::all(0));
-    sortKey_.setTo(cv::Scalar::all(0));
-    mean_.setTo(cv::Scalar::all(0));
-    var_.setTo(cv::Scalar::all(0));
-
-    nframes_ = 0;
-}
-
-void cv::ocl::MOG::operator()(const cv::ocl::oclMat& frame, cv::ocl::oclMat& fgmask, float learningRate)
-{
-    using namespace cv::ocl::device::mog;
-
-    CV_Assert(frame.depth() == CV_8U);
-
-    int ch = frame.oclchannels();
-    int work_ch = ch;
-
-    if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
-        initialize(frame.size(), frame.type());
-
-    fgmask.create(frameSize_, CV_8UC1);
-
-    ++nframes_;
-    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history);
-    CV_Assert(learningRate >= 0.0f);
-
-    mog_ocl(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
-        varThreshold, learningRate, backgroundRatio, noiseSigma);
-}
-
-void cv::ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const
-{
-    using namespace cv::ocl::device::mog;
-
-    backgroundImage.create(frameSize_, frameType_);
-
-    cv::ocl::device::mog::getBackgroundImage_ocl(backgroundImage.oclchannels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio);
-}
-
-void cv::ocl::MOG::release()
-{
-    frameSize_ = Size(0, 0);
-    frameType_ = 0;
-    nframes_ = 0;
-
-    weight_.release();
-    sortKey_.release();
-    mean_.release();
-    var_.release();
-    clReleaseMemObject(cl_constants);
-}
-
-static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var,
-    int nmixtures, float varThreshold, float backgroundRatio)
-{
-    Context* clCxt = Context::getContext();
-
-    size_t local_thread[] = {32, 8, 1};
-    size_t global_thread[] = {frame.cols, frame.rows, 1};
-
-    int frame_step = (int)(frame.step/frame.elemSize());
-    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
-    int weight_step = (int)(weight.step/weight.elemSize());
-    int mean_step = (int)(mean.step/mean.elemSize());
-    int var_step = (int)(var.step/var.elemSize());
-
-    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
-    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
-    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
-
-    int frame_offset_y = (int)(frame.offset/frame.step);
-    int frame_offset_x = (int)(frame.offset%frame.step);
-    frame_offset_x = frame_offset_x/(int)frame.elemSize();
-
-    char build_option[50];
-    if(cn == 1)
-    {
-        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
-    }else
-    {
-        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
-    }
-
-    String kernel_name = "mog_withoutLearning_kernel";
-    std::vector<std::pair<size_t, const void*> > args;
-
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
-
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
-
-    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
-}
-
-
-static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask_raw, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
-    int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar)
-{
-    Context* clCxt = Context::getContext();
-
-    size_t local_thread[] = {32, 8, 1};
-    size_t global_thread[] = {frame.cols, frame.rows, 1};
-
-    oclMat fgmask(fgmask_raw.size(), CV_32SC1);
-
-    int frame_step = (int)(frame.step/frame.elemSize());
-    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
-    int weight_step = (int)(weight.step/weight.elemSize());
-    int sortKey_step = (int)(sortKey.step/sortKey.elemSize());
-    int mean_step = (int)(mean.step/mean.elemSize());
-    int var_step = (int)(var.step/var.elemSize());
-
-    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
-    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
-    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
-
-    int frame_offset_y = (int)(frame.offset/frame.step);
-    int frame_offset_x = (int)(frame.offset%frame.step);
-    frame_offset_x = frame_offset_x/(int)frame.elemSize();
-
-    char build_option[50];
-    if(cn == 1)
-    {
-        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
-    }else
-    {
-        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
-    }
-
-    String kernel_name = "mog_withLearning_kernel";
-    std::vector<std::pair<size_t, const void*> > args;
-
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&sortKey.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&sortKey_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
-
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&learningRate));
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&minVar));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
-
-    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
-    fgmask.convertTo(fgmask, CV_8U);
-    fgmask.copyTo(fgmask_raw);
-}
-
-void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
-    int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma)
-{
-    const float minVar = noiseSigma * noiseSigma;
-
-    if(learningRate > 0.0f)
-        mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures,
-                         varThreshold, backgroundRatio, learningRate, minVar);
-    else
-        mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio);
-}
-
-void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio)
-{
-    Context* clCxt = Context::getContext();
-
-    size_t local_thread[] = {32, 8, 1};
-    size_t global_thread[] = {dst.cols, dst.rows, 1};
-
-    int weight_step = (int)(weight.step/weight.elemSize());
-    int mean_step = (int)(mean.step/mean.elemSize());
-    int dst_step = (int)(dst.step/dst.elemSize());
-
-    char build_option[50];
-    if(cn == 1)
-    {
-        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
-    }else
-    {
-        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
-    }
-
-    String kernel_name = "getBackgroundImage_kernel";
-    std::vector<std::pair<size_t, const void*> > args;
-
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.cols));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
-
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
-
-    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
-}
-
-void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
-{
-    varMin = cv::min(varMin, varMax);
-    varMax = cv::max(varMin, varMax);
-
-    c_TB = TB;
-
-    _contant_struct *constants = new _contant_struct;
-    constants->c_Tb = Tb;
-    constants->c_TB = TB;
-    constants->c_Tg = Tg;
-    constants->c_varInit = varInit;
-    constants->c_varMin = varMin;
-    constants->c_varMax = varMax;
-    constants->c_tau = tau;
-    constants->c_shadowVal = shadowVal;
-
-    cl_constants = load_constant(*((cl_context*)getClContextPtr()), *((cl_command_queue*)getClCommandQueuePtr()),
-        (void *)constants, sizeof(_contant_struct));
-}
-
-void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance,
-                                oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
-{
-    oclMat fgmask(fgmaskRaw.size(), CV_32SC1);
-
-    Context* clCxt = Context::getContext();
-
-    const float alpha1 = 1.0f - alphaT;
-
-    cl_int detectShadows_flag = 0;
-    if(detectShadows)
-        detectShadows_flag = 1;
-
-    size_t local_thread[] = {32, 8, 1};
-    size_t global_thread[] = {frame.cols, frame.rows, 1};
-
-    int frame_step = (int)(frame.step/frame.elemSize());
-    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
-    int weight_step = (int)(weight.step/weight.elemSize());
-    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
-    int mean_step = (int)(mean.step/mean.elemSize());
-    int var_step = (int)(variance.step/variance.elemSize());
-
-    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
-    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
-    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
-
-    int frame_offset_y = (int)(frame.offset/frame.step);
-    int frame_offset_x = (int)(frame.offset%frame.step);
-    frame_offset_x = frame_offset_x/(int)frame.elemSize();
-
-    String kernel_name = "mog2_kernel";
-    std::vector<std::pair<size_t, const void*> > args;
-
-    char build_option[50];
-    if(cn == 1)
-    {
-        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
-    }else
-    {
-        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
-    }
-
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&variance.data));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
-
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&alphaT));
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&alpha1));
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&prune));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&detectShadows_flag));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&cl_constants));
-
-    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
-
-    fgmask.convertTo(fgmask, CV_8U);
-    fgmask.copyTo(fgmaskRaw);
-}
-
-void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
-{
-    Context* clCxt = Context::getContext();
-
-    size_t local_thread[] = {32, 8, 1};
-    size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1};
-
-    int weight_step = (int)(weight.step/weight.elemSize());
-    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
-    int mean_step = (int)(mean.step/mean.elemSize());
-    int dst_step = (int)(dst.step/dst.elemSize());
-
-    int dst_y = (int)(dst.offset/dst.step);
-    int dst_x = (int)(dst.offset%dst.step);
-    dst_x = dst_x/(int)dst.elemSize();
-
-    String kernel_name = "getBackgroundImage2_kernel";
-    std::vector<std::pair<size_t, const void*> > args;
-
-    char build_option[50];
-    if(cn == 1)
-    {
-        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
-    }else
-    {
-        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
-    }
-
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y));
-
-    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
-}
-
-/////////////////////////////////////////////////////////////////
-// MOG2
-
-namespace mog2
-{
-    // default parameters of gaussian background detection algorithm
-    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
-    const float defaultVarThreshold = 4.0f * 4.0f;
-    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
-    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
-    const float defaultVarThresholdGen = 3.0f * 3.0f;
-    const float defaultVarInit = 15.0f; // initial variance for new components
-    const float defaultVarMax = 5.0f * defaultVarInit;
-    const float defaultVarMin = 4.0f;
-
-    // additional parameters
-    const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
-    const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
-    const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
-}
-
-cv::ocl::MOG2::MOG2(int nmixtures) : frameSize_(0, 0), frameType_(0), nframes_(0)
-{
-    nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures;
-
-    history = mog2::defaultHistory;
-    varThreshold = mog2::defaultVarThreshold;
-    bShadowDetection = true;
-
-    backgroundRatio = mog2::defaultBackgroundRatio;
-    fVarInit = mog2::defaultVarInit;
-    fVarMax  = mog2::defaultVarMax;
-    fVarMin = mog2::defaultVarMin;
-
-    varThresholdGen = mog2::defaultVarThresholdGen;
-    fCT = mog2::defaultfCT;
-    nShadowDetection =  mog2::defaultnShadowDetection;
-    fTau = mog2::defaultfTau;
-}
-
-void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType)
-{
-    using namespace cv::ocl::device::mog;
-    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
-
-    frameSize_ = frameSize;
-    frameType_ = frameType;
-    nframes_ = 0;
-
-    int ch = CV_MAT_CN(frameType);
-    int work_ch = ch;
-
-    // for each gaussian mixture of each pixel bg model we store ...
-    // the mixture weight (w),
-    // the mean (nchannels values) and
-    // the covariance
-    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-    weight_.setTo(Scalar::all(0));
-
-    variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-    variance_.setTo(Scalar::all(0));
-
-    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); //4 channels
-    mean_.setTo(Scalar::all(0));
-
-    //make the array for keeping track of the used modes per pixel - all zeros at start
-    bgmodelUsedModes_.create(frameSize_, CV_32FC1);
-    bgmodelUsedModes_.setTo(cv::Scalar::all(0));
-
-    loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
-}
-
-void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate)
-{
-    using namespace cv::ocl::device::mog;
-
-    int ch = frame.oclchannels();
-    int work_ch = ch;
-
-    if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
-        initialize(frame.size(), frame.type());
-
-    fgmask.create(frameSize_, CV_8UC1);
-    fgmask.setTo(cv::Scalar::all(0));
-
-    ++nframes_;
-    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
-    CV_Assert(learningRate >= 0.0f);
-
-    mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_);
-}
-
-void cv::ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
-{
-    using namespace cv::ocl::device::mog;
-
-    backgroundImage.create(frameSize_, frameType_);
-
-    cv::ocl::device::mog::getBackgroundImage2_ocl(backgroundImage.oclchannels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, nmixtures_);
-}
-
-void cv::ocl::MOG2::release()
-{
-    frameSize_ = Size(0, 0);
-    frameType_ = 0;
-    nframes_ = 0;
-
-    weight_.release();
-    variance_.release();
-    mean_.release();
-
-    bgmodelUsedModes_.release();
-}
diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp
deleted file mode 100644
index 39f09c4..0000000
--- a/modules/ocl/src/blend.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Nathan, liujun@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2,
-                          oclMat &dst)
-{
-    CV_Assert(src1.depth() <= CV_32F);
-    CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
-    CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() &&
-              weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);
-
-    dst.create(src1.size(), src1.type());
-
-    size_t globalSize[] = { dst.cols, dst.rows, 1};
-    size_t localSize[] = { 16, 16, 1 };
-
-    int depth = dst.depth(), ocn = dst.oclchannels();
-    int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
-    int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
-    int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize();
-    int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize();
-    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
-
-    const char * const channelMap[] = { "", "", "2", "4", "4" };
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s",
-                                      typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn],
-                                      depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]);
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&weights1.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight1_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight1_step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&weights2.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight2_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight2_step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
-
-    openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args,
-                        -1, -1, buildOptions.c_str());
-}
diff --git a/modules/ocl/src/brief.cpp b/modules/ocl/src/brief.cpp
deleted file mode 100644
index d176a5e..0000000
--- a/modules/ocl/src/brief.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Matthias Bady aegirxx ==> gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-BRIEF_OCL::BRIEF_OCL( int _bytes ) : bytes( _bytes )
-{
-}
-
-void BRIEF_OCL::compute( const oclMat& image, const oclMat& keypoints, oclMat& mask, oclMat& descriptors ) const
-{
-    CV_Assert( image.type( ) == CV_8UC1 );
-    if ( keypoints.size( ).area( ) == 0 ) return;
-    descriptors = oclMat( Mat( keypoints.cols, bytes, CV_8UC1 ) );
-    if( mask.cols != keypoints.cols )
-    {
-        mask = oclMat( Mat::ones( 1, keypoints.cols, CV_8UC1 ) );
-    }
-    oclMat sum;
-    integral( image, sum, CV_32S );
-    cl_mem sumTexture = bindTexture( sum );
-    std::stringstream build_opt;
-    build_opt
-            << " -D BYTES=" << bytes
-            << " -D KERNEL_SIZE=" << KERNEL_SIZE
-            << " -D BORDER=" << getBorderSize();
-    const String kernelname = "extractBriefDescriptors";
-    size_t localThreads[3] = {bytes, 1, 1};
-    size_t globalThreads[3] = {keypoints.cols * bytes, 1, 1};
-    Context* ctx = Context::getContext( );
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof (cl_mem), (void *) &sumTexture ) );
-    args.push_back( std::make_pair( sizeof (cl_mem), (void *) &keypoints.data ) );
-    args.push_back( std::make_pair( sizeof (cl_int), (void *) &keypoints.step ) );
-    args.push_back( std::make_pair( sizeof (cl_mem), (void *) &descriptors.data ) );
-    args.push_back( std::make_pair( sizeof (cl_int), (void *) &descriptors.step ) );
-    args.push_back( std::make_pair( sizeof (cl_mem), (void *) &mask.data ) );
-    openCLExecuteKernel( ctx, &brief, kernelname, globalThreads, localThreads, args, -1, -1, build_opt.str( ).c_str( ) );
-    openCLFree( sumTexture );
-}
-
-int BRIEF_OCL::getBorderSize( )
-{
-    return PATCH_SIZE / 2 + KERNEL_SIZE / 2;
-}
diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp
deleted file mode 100644
index ca16f43..0000000
--- a/modules/ocl/src/brute_force_matcher.cpp
+++ /dev/null
@@ -1,1213 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Nathan, liujun@multicorewareinc.com
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include <functional>
-#include <iterator>
-#include <vector>
-#include <algorithm>
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-static const int OPT_SIZE = 100;
-
-static const char * T_ARR [] = {
-    "uchar",
-    "char",
-    "ushort",
-    "short",
-    "int",
-    "float -D T_FLOAT",
-    "double"};
-
-template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
-void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
-                         const oclMat &trainIdx, const oclMat &distance, int distType)
-{
-    cv::ocl::Context *ctx = query.clCxt;
-    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
-    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
-    const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-    int block_size = BLOCK_SIZE;
-    int m_size = MAX_DESC_LEN;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    char opt [OPT_SIZE] = "";
-    sprintf(opt,
-        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
-        T_ARR[query.depth()], distType, block_size, m_size);
-
-    if(globalSize[0] != 0)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
-        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
-        args.push_back( std::make_pair( smemSize, (void *)NULL));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-
-        String kernelName = "BruteForceMatch_UnrollMatch";
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
-    }
-}
-
-template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
-void matchUnrolledCached(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const oclMat /*mask*/,
-                         const oclMat &/*bestTrainIdx*/, const oclMat & /*bestImgIdx*/, const oclMat & /*bestDistance*/, int /*distType*/)
-{
-}
-
-template < int BLOCK_SIZE/*, typename Mask*/ >
-void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
-           const oclMat &trainIdx, const oclMat &distance, int distType)
-{
-    cv::ocl::Context *ctx = query.clCxt;
-    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
-    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
-    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-    int block_size = BLOCK_SIZE;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    char opt [OPT_SIZE] = "";
-    sprintf(opt,
-        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
-        T_ARR[query.depth()], distType, block_size);
-    if(globalSize[0] != 0)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
-        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
-        args.push_back( std::make_pair( smemSize, (void *)NULL));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-
-        String kernelName = "BruteForceMatch_Match";
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
-    }
-}
-
-template < int BLOCK_SIZE/*, typename Mask*/ >
-void match(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const oclMat /*mask*/,
-           const oclMat &/*bestTrainIdx*/, const oclMat & /*bestImgIdx*/, const oclMat & /*bestDistance*/, int /*distType*/)
-{
-}
-
-//radius_matchUnrolledCached
-template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
-void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
-                         const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
-{
-    cv::ocl::Context *ctx = query.clCxt;
-    size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
-    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
-    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-    int block_size = BLOCK_SIZE;
-    int m_size = MAX_DESC_LEN;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    char opt [OPT_SIZE] = "";
-    sprintf(opt,
-        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
-        T_ARR[query.depth()], distType, block_size, m_size);
-
-    if(globalSize[0] != 0)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
-        args.push_back( std::make_pair( sizeof(cl_float), (void *)&maxDistance ));
-        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
-        args.push_back( std::make_pair( smemSize, (void *)NULL));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&trainIdx.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
-
-        String kernelName = "BruteForceMatch_RadiusUnrollMatch";
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
-    }
-}
-
-//radius_match
-template < int BLOCK_SIZE/*, typename Mask*/ >
-void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
-                  const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
-{
-    cv::ocl::Context *ctx = query.clCxt;
-    size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
-    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
-    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-    int block_size = BLOCK_SIZE;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    char opt [OPT_SIZE] = "";
-    sprintf(opt,
-        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
-        T_ARR[query.depth()], distType, block_size);
-
-    if(globalSize[0] != 0)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
-        args.push_back( std::make_pair( sizeof(cl_float), (void *)&maxDistance ));
-        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
-        args.push_back( std::make_pair( smemSize, (void *)NULL));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&trainIdx.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
-
-        String kernelName = "BruteForceMatch_RadiusMatch";
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
-    }
-}
-
-static void matchDispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
-                     const oclMat &trainIdx, const oclMat &distance, int distType)
-{
-    const oclMat zeroMask;
-    const oclMat &tempMask = mask.data ? mask : zeroMask;
-    bool is_cpu = isCpuDevice();
-    if (query.cols <= 64)
-    {
-        matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
-    }
-    else if (query.cols <= 128 && !is_cpu)
-    {
-        matchUnrolledCached<16, 128>(query, train, tempMask, trainIdx,  distance, distType);
-    }
-    else
-    {
-        match<16>(query, train, tempMask, trainIdx, distance, distType);
-    }
-}
-
-static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, const oclMat &mask,
-                     const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, int distType)
-{
-    const oclMat zeroMask;
-    const oclMat &tempMask = mask.data ? mask : zeroMask;
-    bool is_cpu = isCpuDevice();
-    if (query.cols <= 64)
-    {
-        matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
-    }
-    else if (query.cols <= 128 && !is_cpu)
-    {
-        matchUnrolledCached<16, 128>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
-    }
-    else
-    {
-        match<16>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
-    }
-}
-
-//radius matchDispatcher
-static void matchDispatcher(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
-                     const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
-{
-    const oclMat zeroMask;
-    const oclMat &tempMask = mask.data ? mask : zeroMask;
-    bool is_cpu = isCpuDevice();
-    if (query.cols <= 64)
-    {
-        matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
-    }
-    else if (query.cols <= 128 && !is_cpu)
-    {
-        matchUnrolledCached<16, 128>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
-    }
-    else
-    {
-        radius_match<16>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
-    }
-}
-
-//knn match Dispatcher
-template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
-void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
-                             const oclMat &trainIdx, const oclMat &distance, int distType)
-{
-    cv::ocl::Context *ctx = query.clCxt;
-    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
-    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
-    const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-    int block_size = BLOCK_SIZE;
-    int m_size = MAX_DESC_LEN;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    char opt [OPT_SIZE] = "";
-    sprintf(opt,
-        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
-        T_ARR[query.depth()], distType, block_size, m_size);
-
-    if(globalSize[0] != 0)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
-        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
-        args.push_back( std::make_pair( smemSize, (void *)NULL));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-
-        String kernelName = "BruteForceMatch_knnUnrollMatch";
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
-    }
-}
-
-template < int BLOCK_SIZE/*, typename Mask*/ >
-void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
-               const oclMat &trainIdx, const oclMat &distance, int distType)
-{
-    cv::ocl::Context *ctx = query.clCxt;
-    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
-    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
-    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-    int block_size = BLOCK_SIZE;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    char opt [OPT_SIZE] = "";
-    sprintf(opt,
-        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
-        T_ARR[query.depth()], distType, block_size);
-
-    if(globalSize[0] != 0)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
-        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
-        args.push_back( std::make_pair( smemSize, (void *)NULL));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-
-        String kernelName = "BruteForceMatch_knnMatch";
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
-    }
-}
-
-template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
-void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
-{
-    cv::ocl::Context *ctx = query.clCxt;
-    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
-    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
-    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-    int block_size = BLOCK_SIZE;
-    int m_size = MAX_DESC_LEN;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    char opt [OPT_SIZE] = "";
-    sprintf(opt,
-        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
-        T_ARR[query.depth()], distType, block_size, m_size);
-
-    if(globalSize[0] != 0)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
-        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&allDist.data ));
-        args.push_back( std::make_pair( smemSize, (void *)NULL));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_size ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&m_size ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-
-        String kernelName = "BruteForceMatch_calcDistanceUnrolled";
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
-    }
-}
-
-template < int BLOCK_SIZE/*, typename Mask*/ >
-void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
-{
-    cv::ocl::Context *ctx = query.clCxt;
-    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
-    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
-    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-    int block_size = BLOCK_SIZE;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    char opt [OPT_SIZE] = "";
-    sprintf(opt,
-        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
-        T_ARR[query.depth()], distType, block_size);
-
-    if(globalSize[0] != 0)
-    {
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
-        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&allDist.data ));
-        args.push_back( std::make_pair( smemSize, (void *)NULL));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_size ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-
-        String kernelName = "BruteForceMatch_calcDistance";
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Calc Distance dispatcher
-static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
-                            const oclMat &allDist, int distType)
-{
-    if (query.cols <= 64)
-    {
-        calcDistanceUnrolled<16, 64>(query, train, mask, allDist, distType);
-    }
-    else if (query.cols <= 128)
-    {
-        calcDistanceUnrolled<16, 128>(query, train, mask, allDist, distType);
-    }
-    else
-    {
-        calcDistance<16>(query, train, mask, allDist, distType);
-    }
-}
-
-static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
-                      const oclMat &trainIdx, const oclMat &distance, int distType)
-{
-    bool is_cpu = isCpuDevice();
-    if (query.cols <= 64)
-    {
-        knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
-    }
-    else if (query.cols <= 128 && !is_cpu)
-    {
-        knn_matchUnrolledCached<16, 128>(query, train, mask, trainIdx, distance, distType);
-    }
-    else
-    {
-        knn_match<16>(query, train, mask, trainIdx, distance, distType);
-    }
-}
-
-template <int BLOCK_SIZE>
-void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int /*distType*/)
-{
-    cv::ocl::Context *ctx = trainIdx.clCxt;
-    size_t globalSize[] = {trainIdx.rows * BLOCK_SIZE, 1, 1};
-    size_t localSize[] = {BLOCK_SIZE, 1, 1};
-    int block_size = BLOCK_SIZE;
-    String kernelName = "BruteForceMatch_findBestMatch";
-
-    for (int i = 0; i < k; ++i)
-    {
-        std::vector< std::pair<size_t, const void *> > args;
-
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&allDist.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&i));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_size ));
-        //args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
-        //args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
-        //args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));
-
-        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
-    }
-}
-
-static void findKnnMatchDispatcher(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
-{
-    findKnnMatch<256>(k, trainIdx, distance, allDist, distType);
-}
-
-static void kmatchDispatcher(const oclMat &query, const oclMat &train, int k, const oclMat &mask,
-                      const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
-{
-    const oclMat zeroMask;
-    const oclMat &tempMask = mask.data ? mask : zeroMask;
-    if (k == 2)
-    {
-        match2Dispatcher(query, train, tempMask, trainIdx, distance, distType);
-    }
-    else
-    {
-        calcDistanceDispatcher(query, train, tempMask, allDist, distType);
-        findKnnMatchDispatcher(k, trainIdx, distance, allDist, distType);
-    }
-}
-
-cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType distType_) : distType(distType_)
-{
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::add(const std::vector<oclMat> &descCollection)
-{
-    trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
-}
-
-const std::vector<oclMat> &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const
-{
-    return trainDescCollection;
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::clear()
-{
-    trainDescCollection.clear();
-}
-
-bool cv::ocl::BruteForceMatcher_OCL_base::empty() const
-{
-    return trainDescCollection.empty();
-}
-
-bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const
-{
-    return true;
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const oclMat &train,
-        oclMat &trainIdx, oclMat &distance, const oclMat &mask)
-{
-    if (query.empty() || train.empty())
-        return;
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(train.cols == query.cols && train.type() == query.type());
-
-    ensureSizeIsEnough(1, query.rows, CV_32S, trainIdx);
-    ensureSizeIsEnough(1, query.rows, CV_32F, distance);
-
-    matchDispatcher(query, train, mask, trainIdx, distance, distType);
-
-    return;
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches)
-{
-    if (trainIdx.empty() || distance.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat distanceCPU(distance);
-
-    matchConvert(trainIdxCPU, distanceCPU, matches);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches)
-{
-    if (trainIdx.empty() || distance.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC1);
-    CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols);
-
-    const int nQuery = trainIdx.cols;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int *trainIdx_ptr = trainIdx.ptr<int>();
-    const float *distance_ptr =  distance.ptr<float>();
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
-    {
-        int trainIdx = *trainIdx_ptr;
-
-        if (trainIdx == -1)
-            continue;
-
-        float distance = *distance_ptr;
-
-        DMatch m(queryIdx, trainIdx, 0, distance);
-
-        matches.push_back(m);
-    }
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask)
-{
-    CV_Assert(mask.empty()); // mask is not supported at the moment
-    oclMat trainIdx, distance;
-    matchSingle(query, train, trainIdx, distance, mask);
-    matchDownload(trainIdx, distance, matches);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks)
-{
-
-    if (empty())
-        return;
-
-    if (masks.empty())
-    {
-        Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));
-
-        oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
-
-        for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
-            *trainCollectionCPU_ptr = trainDescCollection[i];
-
-        trainCollection.upload(trainCollectionCPU);
-        maskCollection.release();
-    }
-    else
-    {
-        CV_Assert(masks.size() == trainDescCollection.size());
-
-        Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));
-        Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));
-
-        oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
-        oclMat *maskCollectionCPU_ptr = maskCollectionCPU.ptr<oclMat>();
-
-        for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
-        {
-            const oclMat &train = trainDescCollection[i];
-            const oclMat &mask = masks[i];
-
-            CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows));
-
-            *trainCollectionCPU_ptr = train;
-            *maskCollectionCPU_ptr = mask;
-        }
-
-        trainCollection.upload(trainCollectionCPU);
-        maskCollection.upload(maskCollectionCPU);
-    }
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx,
-        oclMat &imgIdx, oclMat &distance, const oclMat &masks)
-{
-    if (query.empty() || trainCollection.empty())
-        return;
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-
-    const int nQuery = query.rows;
-
-    ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32F, distance);
-
-    matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);
-
-    return;
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat imgIdxCPU(imgIdx);
-    Mat distanceCPU(distance);
-
-    matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC1);
-    CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.cols == trainIdx.cols);
-    CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols);
-
-    const int nQuery = trainIdx.cols;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int *trainIdx_ptr = trainIdx.ptr<int>();
-    const int *imgIdx_ptr = imgIdx.ptr<int>();
-    const float *distance_ptr =  distance.ptr<float>();
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
-    {
-        int trainIdx = *trainIdx_ptr;
-
-        if (trainIdx == -1)
-            continue;
-
-        int imgIdx = *imgIdx_ptr;
-
-        float distance = *distance_ptr;
-
-        DMatch m(queryIdx, trainIdx, imgIdx, distance);
-
-        matches.push_back(m);
-    }
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks)
-{
-    oclMat trainCollection;
-    oclMat maskCollection;
-
-    makeGpuCollection(trainCollection, maskCollection, masks);
-
-    oclMat trainIdx, imgIdx, distance;
-
-    matchCollection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection);
-    matchDownload(trainIdx, imgIdx, distance, matches);
-}
-
-// knn match
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx,
-        oclMat &distance, oclMat &allDist, int k, const oclMat &mask)
-{
-    if (query.empty() || train.empty())
-        return;
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(train.type() == query.type() && train.cols == query.cols);
-
-    const int nQuery = query.rows;
-    const int nTrain = train.rows;
-
-    if (k == 2)
-    {
-        ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
-        ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
-    }
-    else
-    {
-        ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
-        ensureSizeIsEnough(nQuery, k, CV_32F, distance);
-        ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
-    }
-
-    trainIdx.setTo(Scalar::all(-1));
-
-    kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType);
-
-    return;
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector< std::vector<DMatch> > &matches, bool compactResult)
-{
-    if (trainIdx.empty() || distance.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat distanceCPU(distance);
-
-    knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &trainIdx, const Mat &distance, std::vector< std::vector<DMatch> > &matches, bool compactResult)
-{
-    if (trainIdx.empty() || distance.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1);
-    CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1);
-    CV_Assert(distance.size() == trainIdx.size());
-    CV_Assert(trainIdx.isContinuous() && distance.isContinuous());
-
-    const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;
-    const int k = trainIdx.type() == CV_32SC2 ? 2 : trainIdx.cols;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int *trainIdx_ptr = trainIdx.ptr<int>();
-    const float *distance_ptr = distance.ptr<float>();
-
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
-    {
-        matches.push_back(std::vector<DMatch>());
-        std::vector<DMatch> &curMatches = matches.back();
-        curMatches.reserve(k);
-
-        for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
-        {
-            int trainIdx = *trainIdx_ptr;
-
-            if (trainIdx != -1)
-            {
-                float distance = *distance_ptr;
-
-                DMatch m(queryIdx, trainIdx, 0, distance);
-
-                curMatches.push_back(m);
-            }
-        }
-
-        if (compactResult && curMatches.empty())
-            matches.pop_back();
-    }
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, const oclMat &train, std::vector< std::vector<DMatch> > &matches
-        , int k, const oclMat &mask, bool compactResult)
-{
-    oclMat trainIdx, distance, allDist;
-    knnMatchSingle(query, train, trainIdx, distance, allDist, k, mask);
-    knnMatchDownload(trainIdx, distance, matches, compactResult);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
-        oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &/*maskCollection*/)
-{
-    if (query.empty() || trainCollection.empty())
-        return;
-
-    // typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks,
-    //                          const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance);
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-
-    const int nQuery = query.rows;
-
-    ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
-
-    trainIdx.setTo(Scalar::all(-1));
-
-    //caller_t func = callers[distType][query.depth()];
-    //CV_Assert(func != 0);
-
-    //func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream));
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx,
-        const oclMat &distance, std::vector< std::vector<DMatch> > &matches, bool compactResult)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat imgIdxCPU(imgIdx);
-    Mat distanceCPU(distance);
-
-    knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
-        std::vector< std::vector<DMatch> > &matches, bool compactResult)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC2);
-    CV_Assert(imgIdx.type() == CV_32SC2 && imgIdx.cols == trainIdx.cols);
-    CV_Assert(distance.type() == CV_32FC2 && distance.cols == trainIdx.cols);
-
-    const int nQuery = trainIdx.cols;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int *trainIdx_ptr = trainIdx.ptr<int>();
-    const int *imgIdx_ptr = imgIdx.ptr<int>();
-    const float *distance_ptr = distance.ptr<float>();
-
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
-    {
-        matches.push_back(std::vector<DMatch>());
-        std::vector<DMatch> &curMatches = matches.back();
-        curMatches.reserve(2);
-
-        for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
-        {
-            int trainIdx = *trainIdx_ptr;
-
-            if (trainIdx != -1)
-            {
-                int imgIdx = *imgIdx_ptr;
-
-                float distance = *distance_ptr;
-
-                DMatch m(queryIdx, trainIdx, imgIdx, distance);
-
-                curMatches.push_back(m);
-            }
-        }
-
-        if (compactResult && curMatches.empty())
-            matches.pop_back();
-    }
-}
-
-namespace
-{
-    struct ImgIdxSetter
-    {
-        explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {}
-        inline void operator()(DMatch &m) const
-        {
-            m.imgIdx = imgIdx;
-        }
-        int imgIdx;
-    };
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
-        const std::vector<oclMat> &masks, bool compactResult)
-{
-    if (k == 2)
-    {
-        oclMat trainCollection;
-        oclMat maskCollection;
-
-        makeGpuCollection(trainCollection, maskCollection, masks);
-
-        oclMat trainIdx, imgIdx, distance;
-
-        knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection);
-        knnMatch2Download(trainIdx, imgIdx, distance, matches);
-    }
-    else
-    {
-        if (query.empty() || empty())
-            return;
-
-        std::vector< std::vector<DMatch> > curMatches;
-        std::vector<DMatch> temp;
-        temp.reserve(2 * k);
-
-        matches.resize(query.rows);
-        for_each(matches.begin(), matches.end(), bind2nd(mem_fun_ref(&std::vector<DMatch>::reserve), k));
-
-        for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx)
-        {
-            knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? oclMat() : masks[imgIdx]);
-
-            for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
-            {
-                std::vector<DMatch> &localMatch = curMatches[queryIdx];
-                std::vector<DMatch> &globalMatch = matches[queryIdx];
-
-                std::for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));
-
-                temp.clear();
-                std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));
-
-                globalMatch.clear();
-                const size_t count = std::min((size_t)k, temp.size());
-                std::copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch));
-            }
-        }
-
-        if (compactResult)
-        {
-            std::vector< std::vector<DMatch> >::iterator new_end = remove_if(matches.begin(), matches.end(), mem_fun_ref(&std::vector<DMatch>::empty));
-            matches.erase(new_end, matches.end());
-        }
-    }
-}
-
-// radiusMatchSingle
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
-        oclMat &trainIdx,   oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
-{
-    if (query.empty() || train.empty())
-        return;
-
-    const int nQuery = query.rows;
-    const int nTrain = train.rows;
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(train.type() == query.type() && train.cols == query.cols);
-    CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size()));
-
-    ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
-    if (trainIdx.empty())
-    {
-        ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx);
-        ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
-    }
-
-    nMatches.setTo(Scalar::all(0));
-
-    matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
-
-    return;
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
-        std::vector< std::vector<DMatch> > &matches, bool compactResult)
-{
-    if (trainIdx.empty() || distance.empty() || nMatches.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat distanceCPU(distance);
-    Mat nMatchesCPU(nMatches);
-
-    radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
-        std::vector< std::vector<DMatch> > &matches, bool compactResult)
-{
-    if (trainIdx.empty() || distance.empty() || nMatches.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC1);
-    CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
-    CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows);
-
-    const int nQuery = trainIdx.rows;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int *nMatches_ptr = nMatches.ptr<int>();
-
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
-    {
-        const int *trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
-        const float *distance_ptr = distance.ptr<float>(queryIdx);
-
-        const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
-
-        if (nMatches == 0)
-        {
-            if (!compactResult)
-                matches.push_back(std::vector<DMatch>());
-            continue;
-        }
-
-        matches.push_back(std::vector<DMatch>(nMatches));
-        std::vector<DMatch> &curMatches = matches.back();
-
-        for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr)
-        {
-            int trainIdx = *trainIdx_ptr;
-
-            float distance = *distance_ptr;
-
-            DMatch m(queryIdx, trainIdx, 0, distance);
-
-            curMatches[i] = m;
-        }
-
-        std::sort(curMatches.begin(), curMatches.end());
-    }
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, const oclMat &train, std::vector< std::vector<DMatch> > &matches,
-        float maxDistance, const oclMat &mask, bool compactResult)
-{
-    oclMat trainIdx, distance, nMatches;
-    radiusMatchSingle(query, train, trainIdx, distance, nMatches, maxDistance, mask);
-    radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-        oclMat &nMatches, float /*maxDistance*/, const std::vector<oclMat> &masks)
-{
-    if (query.empty() || empty())
-        return;
-
-#if 0
-    typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks,
-                             const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches);
-    static const caller_t callers[3][6] =
-    {
-        {
-            ocl_matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-            ocl_matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-            ocl_matchL1_gpu<int>, matchL1_gpu<float>
-        },
-        {
-            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-            0/*matchL2_gpu<int>*/, ocl_matchL2_gpu<float>
-        },
-        {
-            ocl_matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-            ocl_matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-            ocl_matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-        }
-    };
-#endif
-    const int nQuery = query.rows;
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size()));
-
-    nMatches.create(1, nQuery, CV_32SC1);
-    if (trainIdx.empty())
-    {
-        trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
-        imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
-        distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1);
-    }
-
-    nMatches.setTo(Scalar::all(0));
-
-    //caller_t func = callers[distType][query.depth()];
-    //CV_Assert(func != 0);
-
-    std::vector<oclMat> trains_(trainDescCollection.begin(), trainDescCollection.end());
-    std::vector<oclMat> masks_(masks.begin(), masks.end());
-
-    /*  func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
-          trainIdx, imgIdx, distance, nMatches));*/
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
-        const oclMat &nMatches, std::vector< std::vector<DMatch> > &matches, bool compactResult)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat imgIdxCPU(imgIdx);
-    Mat distanceCPU(distance);
-    Mat nMatchesCPU(nMatches);
-
-    radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
-        std::vector< std::vector<DMatch> > &matches, bool compactResult)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC1);
-    CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.size() == trainIdx.size());
-    CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
-    CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows);
-
-    const int nQuery = trainIdx.rows;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int *nMatches_ptr = nMatches.ptr<int>();
-
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
-    {
-        const int *trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
-        const int *imgIdx_ptr = imgIdx.ptr<int>(queryIdx);
-        const float *distance_ptr = distance.ptr<float>(queryIdx);
-
-        const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
-
-        if (nMatches == 0)
-        {
-            if (!compactResult)
-                matches.push_back(std::vector<DMatch>());
-            continue;
-        }
-
-        matches.push_back(std::vector<DMatch>());
-        std::vector<DMatch> &curMatches = matches.back();
-        curMatches.reserve(nMatches);
-
-        for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
-        {
-            int trainIdx = *trainIdx_ptr;
-            int imgIdx = *imgIdx_ptr;
-            float distance = *distance_ptr;
-
-            DMatch m(queryIdx, trainIdx, imgIdx, distance);
-
-            curMatches.push_back(m);
-        }
-
-        std::sort(curMatches.begin(), curMatches.end());
-    }
-}
-
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
-        const std::vector<oclMat> &masks, bool compactResult)
-{
-    oclMat trainIdx, imgIdx, distance, nMatches;
-    radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
-    radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
-}
diff --git a/modules/ocl/src/build_warps.cpp b/modules/ocl/src/build_warps.cpp
deleted file mode 100644
index 0116728..0000000
--- a/modules/ocl/src/build_warps.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpPlaneMaps
-
-void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
-                                 float scale, oclMat &xmap, oclMat &ymap)
-{
-    CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
-    CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous());
-
-    Mat K_Rinv = K * R.t();
-    CV_Assert(K_Rinv.isContinuous());
-
-    Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3
-    KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1);
-    KRT_mat(Range::all(), Range(9, 11)) = T;
-
-    oclMat KRT_oclMat(KRT_mat);
-    // transfer K_Rinv and T into a single cl_mem
-    xmap.create(dst_roi.size(), CV_32F);
-    ymap.create(dst_roi.size(), CV_32F);
-
-    int tl_u = dst_roi.tl().x;
-    int tl_v = dst_roi.tl().y;
-
-    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
-    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
-
-    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
-#ifdef ANDROID
-    size_t localThreads[3]  = {32, 4, 1};
-#else
-    size_t localThreads[3]  = {32, 8, 1};
-#endif
-    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpCylyndricalMaps
-
-void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
-                                       oclMat &xmap, oclMat &ymap)
-{
-    CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
-
-    Mat K_Rinv = K * R.t();
-    CV_Assert(K_Rinv.isContinuous());
-
-    oclMat KR_oclMat(K_Rinv.reshape(1, 1));
-
-    xmap.create(dst_roi.size(), CV_32F);
-    ymap.create(dst_roi.size(), CV_32F);
-
-    int tl_u = dst_roi.tl().x;
-    int tl_v = dst_roi.tl().y;
-
-    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
-    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
-
-    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
-#ifdef ANDROID
-    size_t localThreads[3]  = {32, 1, 1};
-#else
-    size_t localThreads[3]  = {32, 8, 1};
-#endif
-    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpSphericalMaps
-
-void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
-                                     oclMat &xmap, oclMat &ymap)
-{
-    CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
-
-    Mat K_Rinv = K * R.t();
-    CV_Assert(K_Rinv.isContinuous());
-
-    oclMat KR_oclMat(K_Rinv.reshape(1, 1));
-    // transfer K_Rinv, R_Kinv into a single cl_mem
-    xmap.create(dst_roi.size(), CV_32F);
-    ymap.create(dst_roi.size(), CV_32F);
-
-    int tl_u = dst_roi.tl().x;
-    int tl_v = dst_roi.tl().y;
-
-    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
-    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
-
-    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
-#ifdef ANDROID
-    size_t localThreads[3]  = {32, 4, 1};
-#else
-    size_t localThreads[3]  = {32, 8, 1};
-#endif
-    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpSphericalMaps", globalThreads, localThreads, args, -1, -1);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpAffineMaps
-
-void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
-{
-    CV_Assert(M.rows == 2 && M.cols == 3);
-    CV_Assert(dsize.area());
-
-    xmap.create(dsize, CV_32FC1);
-    ymap.create(dsize, CV_32FC1);
-
-    float coeffs[2 * 3];
-    Mat coeffsMat(2, 3, CV_32F, (void *)coeffs);
-
-    if (inverse)
-        M.convertTo(coeffsMat, coeffsMat.type());
-    else
-    {
-        cv::Mat iM;
-        invertAffineTransform(M, iM);
-        iM.convertTo(coeffsMat, coeffsMat.type());
-    }
-
-    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
-    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
-
-    oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
-
-    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
-#ifdef ANDROID
-    size_t localThreads[3]  = {32, 4, 1};
-#else
-    size_t localThreads[3]  = {32, 8, 1};
-#endif
-    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpAffineMaps", globalThreads, localThreads, args, -1, -1);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpPerspectiveMaps
-
-void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
-{
-    CV_Assert(M.rows == 3 && M.cols == 3);
-    CV_Assert(dsize.area() > 0);
-
-    xmap.create(dsize, CV_32FC1);
-    ymap.create(dsize, CV_32FC1);
-
-    float coeffs[3 * 3];
-    Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
-
-    if (inverse)
-        M.convertTo(coeffsMat, coeffsMat.type());
-    else
-    {
-        cv::Mat iM;
-        invert(M, iM);
-        iM.convertTo(coeffsMat, coeffsMat.type());
-    }
-
-    oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
-
-    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
-    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
-
-    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
-
-    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPerspectiveMaps", globalThreads, NULL, args, -1, -1);
-}
diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp
deleted file mode 100644
index 8c68d8b..0000000
--- a/modules/ocl/src/canny.cpp
+++ /dev/null
@@ -1,387 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(1, 1, CV_32SC1)
-{
-    CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
-
-    create(dx_.size(), -1);
-}
-
-void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
-{
-    ensureSizeIsEnough(image_size, CV_32SC1, dx);
-    ensureSizeIsEnough(image_size, CV_32SC1, dy);
-
-    if(apperture_size == 3)
-    {
-        ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);
-        ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);
-    }
-    else if(apperture_size > 0)
-    {
-        Mat kx, ky;
-        if (!filterDX)
-        {
-            filterDX = createDerivFilter_GPU(CV_8U, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
-        }
-        if (!filterDY)
-        {
-            filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
-        }
-    }
-    ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, magBuf);
-    ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, mapBuf);
-
-    ensureSizeIsEnough(1, image_size.area(), CV_16UC2, trackBuf1);
-    ensureSizeIsEnough(1, image_size.area(), CV_16UC2, trackBuf2);
-}
-
-void cv::ocl::CannyBuf::release()
-{
-    dx.release();
-    dy.release();
-    dx_buf.release();
-    dy_buf.release();
-    magBuf.release();
-    mapBuf.release();
-    trackBuf1.release();
-    trackBuf2.release();
-}
-
-namespace cv
-{
-    namespace ocl
-    {
-        namespace canny
-        {
-            void calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols);
-
-            void calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
-            void calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
-
-            void calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh);
-
-            void edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, oclMat& counter, int rows, int cols);
-
-            void edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, oclMat& counter, int rows, int cols);
-
-            void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols);
-        }
-    }
-}// cv::ocl
-
-namespace
-{
-    void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
-    {
-        using namespace ::cv::ocl::canny;
-        calcMap_gpu(buf.dx, buf.dy, buf.magBuf, buf.mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
-
-        edgesHysteresisLocal_gpu(buf.mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
-
-        edgesHysteresisGlobal_gpu(buf.mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
-
-        getEdges_gpu(buf.mapBuf, dst, dst.rows, dst.cols);
-    }
-}
-
-void cv::ocl::Canny(const oclMat &src, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
-{
-    CannyBuf buf(src.size(), apperture_size);
-    Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
-}
-
-void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
-{
-    using namespace ::cv::ocl::canny;
-
-    CV_Assert(src.type() == CV_8UC1);
-
-    if( low_thresh > high_thresh )
-        std::swap( low_thresh, high_thresh );
-
-    dst.create(src.size(), CV_8U);
-    dst.setTo(Scalar::all(0));
-
-    buf.create(src.size(), apperture_size);
-    buf.magBuf.setTo(Scalar::all(0));
-
-    if (apperture_size == 3)
-    {
-        calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
-
-        calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.magBuf, src.rows, src.cols, L2gradient);
-    }
-    else
-    {
-        buf.filterDX->apply(src, buf.dx);
-        buf.filterDY->apply(src, buf.dy);
-
-        calcMagnitude_gpu(buf.dx, buf.dy, buf.magBuf, src.rows, src.cols, L2gradient);
-    }
-    CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
-}
-void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
-{
-    CannyBuf buf(dx, dy);
-    Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
-}
-
-void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
-{
-    using namespace ::cv::ocl::canny;
-
-    CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
-
-    if( low_thresh > high_thresh )
-        std::swap( low_thresh, high_thresh);
-
-    dst.create(dx.size(), CV_8U);
-    dst.setTo(Scalar::all(0));
-
-    buf.dx = dx;
-    buf.dy = dy;
-    buf.create(dx.size(), -1);
-    buf.magBuf.setTo(Scalar::all(0));
-    calcMagnitude_gpu(buf.dx, buf.dy, buf.magBuf, dx.rows, dx.cols, L2gradient);
-
-    CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
-}
-
-void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols)
-{
-    Context *clCxt = src.clCxt;
-    String kernelName = "calcSobelRowPass";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx_buf.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy_buf.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
-
-    size_t globalThreads[3] = {cols, rows, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
-{
-    Context *clCxt = dx_buf.clCxt;
-    String kernelName = "calcMagnitude_buf";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dx.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dy.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mag.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx_buf.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy_buf.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.offset));
-
-    size_t globalThreads[3] = {cols, rows, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-
-    const char * build_options = L2Grad ? "-D L2GRAD":"";
-    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
-}
-void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
-{
-    Context *clCxt = dx.clCxt;
-    String kernelName = "calcMagnitude";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dx.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dy.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mag.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.offset));
-
-    size_t globalThreads[3] = {cols, rows, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-
-    const char * build_options = L2Grad ? "-D L2GRAD":"";
-    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
-}
-
-void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
-{
-    Context *clCxt = dx.clCxt;
-
-    std::vector< std::pair<size_t, const void *> > args;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dx.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dy.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mag.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&low_thresh));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&high_thresh));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
-
-
-    size_t globalThreads[3] = {cols, rows, 1};
-    String kernelName = "calcMap";
-    size_t localThreads[3]  = {16, 16, 1};
-
-    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, oclMat& counter, int rows, int cols)
-{
-    Context *clCxt = map.clCxt;
-    std::vector< std::pair<size_t, const void *> > args;
-
-    Mat counterMat(counter.rows, counter.cols, counter.type());
-    counterMat.at<int>(0, 0) = 0;
-    counter.upload(counterMat);
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&st1.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counter.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-    cl_int stepBytes = map.step;
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&stepBytes));
-    cl_int offsetBytes = map.offset;
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&offsetBytes));
-
-    size_t globalThreads[3] = {cols, rows, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-
-    openCLExecuteKernel(clCxt, &imgproc_canny, "edgesHysteresisLocal", globalThreads, localThreads, args, -1, -1);
-}
-
-void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, oclMat& counter, int rows, int cols)
-{
-    Context *clCxt = map.clCxt;
-    std::vector< std::pair<size_t, const void *> > args;
-    size_t localThreads[3]  = {128, 1, 1};
-
-    while(1 > 0)
-    {
-        Mat counterMat; counter.download(counterMat);
-        int count = counterMat.at<int>(0, 0);
-        CV_Assert(count >= 0);
-        if (count == 0)
-            break;
-
-        counterMat.at<int>(0, 0) = 0;
-        counter.upload(counterMat);
-
-        args.clear();
-        size_t globalThreads[3] = {std::min((unsigned)count, 65535u) * 128, divUp(count, 65535), 1};
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map.data));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&st1.data));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&st2.data));
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counter.data));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&count));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
-
-        openCLExecuteKernel(clCxt, &imgproc_canny, "edgesHysteresisGlobal", globalThreads, localThreads, args, -1, -1);
-        std::swap(st1, st2);
-    }
-}
-
-void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
-{
-    Context *clCxt = map.clCxt;
-    String kernelName = "getEdges";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset));
-
-    size_t globalThreads[3] = {cols, rows, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-
-    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
-}
diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp
deleted file mode 100644
index d6d081f..0000000
--- a/modules/ocl/src/cl_context.cpp
+++ /dev/null
@@ -1,944 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Guoping Long, longguoping@gmail.com
-//    Niko Li, newlife20080214@gmail.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include <stdlib.h>
-#include <ctype.h>
-#include <iomanip>
-#include <fstream>
-#include "cl_programcache.hpp"
-
-#include "opencv2/ocl/private/opencl_utils.hpp"
-
-namespace cv {
-namespace ocl {
-
-using namespace cl_utils;
-
-#if defined(WIN32)
-static bool __termination = false;
-#endif
-
-struct __Module
-{
-    __Module();
-    ~__Module();
-    cv::Mutex initializationMutex;
-    cv::Mutex currentContextMutex;
-};
-static __Module __module;
-
-cv::Mutex& getInitializationMutex()
-{
-    return __module.initializationMutex;
-}
-
-static cv::Mutex& getCurrentContextMutex()
-{
-    return __module.currentContextMutex;
-}
-
-static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor)
-{
-    size_t p0 = versionStr.find(' ');
-    while (true)
-    {
-        if (p0 == std::string::npos)
-            break;
-        if (p0 + 1 >= versionStr.length())
-            break;
-        char c = versionStr[p0 + 1];
-        if (isdigit(c))
-            break;
-        p0 = versionStr.find(' ', p0 + 1);
-    }
-    size_t p1 = versionStr.find('.', p0);
-    size_t p2 = versionStr.find(' ', p1);
-    if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos)
-    {
-        major = 0;
-        minor = 0;
-        return false;
-    }
-    std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1);
-    std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1);
-    major = atoi(majorStr.c_str());
-    minor = atoi(minorStr.c_str());
-    return true;
-}
-
-struct PlatformInfoImpl : public PlatformInfo
-{
-    cl_platform_id platform_id;
-
-    std::vector<int> deviceIDs;
-
-    PlatformInfoImpl()
-        : platform_id(NULL)
-    {
-    }
-
-    void init(int id, cl_platform_id platform)
-    {
-        CV_Assert(platform_id == NULL);
-
-        this->_id = id;
-        platform_id = platform;
-
-        openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE, this->platformProfile));
-        openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_VERSION, this->platformVersion));
-        openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_NAME, this->platformName));
-        openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR, this->platformVendor));
-        openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_EXTENSIONS, this->platformExtensons));
-
-        parseOpenCLVersion(this->platformVersion,
-                this->platformVersionMajor, this->platformVersionMinor);
-    }
-
-};
-
-struct DeviceInfoImpl: public DeviceInfo
-{
-    cl_platform_id platform_id;
-    cl_device_id device_id;
-
-    DeviceInfoImpl()
-        : platform_id(NULL), device_id(NULL)
-    {
-    }
-
-    void init(int id, PlatformInfoImpl& platformInfoImpl, cl_device_id device)
-    {
-        CV_Assert(device_id == NULL);
-
-        this->_id = id;
-        platform_id = platformInfoImpl.platform_id;
-        device_id = device;
-
-        this->platform = &platformInfoImpl;
-
-        cl_device_type type = cl_device_type(-1);
-        openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_TYPE, type));
-        this->deviceType = DeviceType(type);
-
-        openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_PROFILE, this->deviceProfile));
-        openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_VERSION, this->deviceVersion));
-        openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_NAME, this->deviceName));
-        openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_VENDOR, this->deviceVendor));
-        cl_uint vendorID = 0;
-        openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_VENDOR_ID, vendorID));
-        this->deviceVendorId = vendorID;
-        openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DRIVER_VERSION, this->deviceDriverVersion));
-        openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, this->deviceExtensions));
-
-        parseOpenCLVersion(this->deviceVersion,
-                this->deviceVersionMajor, this->deviceVersionMinor);
-
-        size_t maxWorkGroupSize = 0;
-        openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_WORK_GROUP_SIZE, maxWorkGroupSize));
-        this->maxWorkGroupSize = maxWorkGroupSize;
-
-        cl_uint maxDimensions = 0;
-        openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, maxDimensions));
-        std::vector<size_t> maxWorkItemSizes(maxDimensions);
-        openCLSafeCall(clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions,
-                (void *)&maxWorkItemSizes[0], 0));
-        this->maxWorkItemSizes = maxWorkItemSizes;
-
-        cl_uint maxComputeUnits = 0;
-        openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_COMPUTE_UNITS, maxComputeUnits));
-        this->maxComputeUnits = maxComputeUnits;
-
-        cl_ulong localMemorySize = 0;
-        openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_LOCAL_MEM_SIZE, localMemorySize));
-        this->localMemorySize = (size_t)localMemorySize;
-
-        cl_ulong maxMemAllocSize = 0;
-        openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, maxMemAllocSize));
-        this->maxMemAllocSize = (size_t)maxMemAllocSize;
-
-        cl_bool unifiedMemory = false;
-        openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_HOST_UNIFIED_MEMORY, unifiedMemory));
-        this->isUnifiedMemory = unifiedMemory != 0;
-
-        //initialize extra options for compilation. Currently only fp64 is included.
-        //Assume 4KB is enough to store all possible extensions.
-        openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, this->deviceExtensions));
-
-        size_t fp64_khr = this->deviceExtensions.find("cl_khr_fp64");
-        if(fp64_khr != std::string::npos)
-        {
-            this->compilationExtraOptions += "-D DOUBLE_SUPPORT";
-            this->haveDoubleSupport = true;
-        }
-        else
-        {
-            this->haveDoubleSupport = false;
-        }
-
-        size_t intel_platform = platformInfoImpl.platformVendor.find("Intel");
-        if(intel_platform != std::string::npos)
-        {
-            this->compilationExtraOptions += " -D INTEL_DEVICE";
-            this->isIntelDevice = true;
-        }
-        else
-        {
-            this->isIntelDevice = false;
-        }
-
-        if (id < 0)
-        {
-#ifdef CL_VERSION_1_2
-            if (this->deviceVersionMajor > 1 || (this->deviceVersionMajor == 1 && this->deviceVersionMinor >= 2))
-            {
-                ::clRetainDevice(device);
-            }
-#endif
-        }
-    }
-};
-
-static std::vector<PlatformInfoImpl> global_platforms;
-static std::vector<DeviceInfoImpl> global_devices;
-
-static void split(const std::string &s, char delim, std::vector<std::string> &elems) {
-    std::stringstream ss(s);
-    std::string item;
-    while (std::getline(ss, item, delim)) {
-        elems.push_back(item);
-    }
-}
-
-static std::vector<std::string> split(const std::string &s, char delim) {
-    std::vector<std::string> elems;
-    split(s, delim, elems);
-    return elems;
-}
-
-// Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
-// Sample: AMD:GPU:
-// Sample: AMD:GPU:Tahiti
-// Sample: :GPU|CPU: = '' = ':' = '::'
-static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
-        std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
-{
-    std::string deviceTypesStr;
-    size_t p0 = configurationStr.find(':');
-    if (p0 != std::string::npos)
-    {
-        size_t p1 = configurationStr.find(':', p0 + 1);
-        if (p1 != std::string::npos)
-        {
-            size_t p2 = configurationStr.find(':', p1 + 1);
-            if (p2 != std::string::npos)
-            {
-                std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
-                return false;
-            }
-            else
-            {
-                // assume platform + device types + device name/id
-                platform = configurationStr.substr(0, p0);
-                deviceTypesStr = configurationStr.substr(p0 + 1, p1 - (p0 + 1));
-                deviceNameOrID = configurationStr.substr(p1 + 1, configurationStr.length() - (p1 + 1));
-            }
-        }
-        else
-        {
-            // assume platform + device types
-            platform = configurationStr.substr(0, p0);
-            deviceTypesStr = configurationStr.substr(p0 + 1, configurationStr.length() - (p0 + 1));
-        }
-    }
-    else
-    {
-        // assume only platform
-        platform = configurationStr;
-    }
-    deviceTypes = split(deviceTypesStr, '|');
-    return true;
-}
-
-static bool selectOpenCLDevice()
-{
-    std::string platform;
-    std::vector<std::string> deviceTypes;
-    std::string deviceName;
-    const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
-    if (configuration)
-    {
-        if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
-            return false;
-    }
-
-    bool isID = false;
-    int deviceID = -1;
-    if (deviceName.length() == 1)
-    // We limit ID range to 0..9, because we want to write:
-    // - '2500' to mean i5-2500
-    // - '8350' to mean AMD FX-8350
-    // - '650' to mean GeForce 650
-    // To extend ID range change condition to '> 0'
-    {
-        isID = true;
-        for (size_t i = 0; i < deviceName.length(); i++)
-        {
-            if (!isdigit(deviceName[i]))
-            {
-                isID = false;
-                break;
-            }
-        }
-        if (isID)
-        {
-            deviceID = atoi(deviceName.c_str());
-            CV_Assert(deviceID >= 0);
-        }
-    }
-
-    const PlatformInfo* platformInfo = NULL;
-    if (platform.length() > 0)
-    {
-        PlatformsInfo platforms;
-        getOpenCLPlatforms(platforms);
-        for (size_t i = 0; i < platforms.size(); i++)
-        {
-            if (platforms[i]->platformName.find(platform) != std::string::npos)
-            {
-                platformInfo = platforms[i];
-                break;
-            }
-        }
-        if (platformInfo == NULL)
-        {
-            std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
-            goto not_found;
-        }
-    }
-
-    if (deviceTypes.size() == 0)
-    {
-        if (!isID)
-        {
-            deviceTypes.push_back("GPU");
-            deviceTypes.push_back("CPU");
-        }
-        else
-        {
-            deviceTypes.push_back("ALL");
-        }
-    }
-    for (size_t t = 0; t < deviceTypes.size(); t++)
-    {
-        int deviceType = 0;
-        if (deviceTypes[t] == "GPU")
-        {
-            deviceType = CVCL_DEVICE_TYPE_GPU;
-        }
-        else if (deviceTypes[t] == "CPU")
-        {
-            deviceType = CVCL_DEVICE_TYPE_CPU;
-        }
-        else if (deviceTypes[t] == "ACCELERATOR")
-        {
-            deviceType = CVCL_DEVICE_TYPE_ACCELERATOR;
-        }
-        else if (deviceTypes[t] == "ALL")
-        {
-            deviceType = CVCL_DEVICE_TYPE_ALL;
-        }
-        else
-        {
-            std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
-            goto not_found;
-        }
-
-        DevicesInfo devices;
-        getOpenCLDevices(devices, deviceType, platformInfo);
-
-        for (size_t i = (isID ? deviceID : 0);
-             (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
-             i++)
-        {
-            if (isID || devices[i]->deviceName.find(deviceName) != std::string::npos)
-            {
-                // check for OpenCL 1.1
-                if (devices[i]->deviceVersionMajor < 1 ||
-                        (devices[i]->deviceVersionMajor == 1 && devices[i]->deviceVersionMinor < 1))
-                {
-                    std::cerr << "Skip unsupported version of OpenCL device: " << devices[i]->deviceName
-                            << "(" << devices[i]->platform->platformName << ")" << std::endl;
-                    continue; // unsupported version of device, skip it
-                }
-                try
-                {
-                    setDevice(devices[i]);
-                }
-                catch (...)
-                {
-                    std::cerr << "ERROR: Can't select OpenCL device: " << devices[i]->deviceName
-                            << "(" << devices[i]->platform->platformName << ")" << std::endl;
-                    goto not_found;
-                }
-                return true;
-            }
-        }
-    }
-not_found:
-    std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
-            << "    Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
-            << "    Device types: ";
-    for (size_t t = 0; t < deviceTypes.size(); t++)
-    {
-        std::cerr << deviceTypes[t] << " ";
-    }
-    std::cerr << std::endl << "    Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
-    return false;
-}
-
-static bool __initialized = false;
-static int initializeOpenCLDevices()
-{
-    assert(!__initialized);
-    __initialized = true;
-
-    assert(global_devices.size() == 0);
-
-    std::vector<cl_platform_id> platforms;
-    try
-    {
-        openCLSafeCall(getPlatforms(platforms));
-    }
-    catch (cv::Exception&)
-    {
-        return 0; // OpenCL not found
-    }
-
-    global_platforms.resize(platforms.size());
-
-    for (size_t i = 0; i < platforms.size(); ++i)
-    {
-        PlatformInfoImpl& platformInfo = global_platforms[i];
-
-        cl_platform_id platform = platforms[i];
-        platformInfo.init(i, platform);
-
-        std::vector<cl_device_id> devices;
-        cl_int status = getDevices(platform, CL_DEVICE_TYPE_ALL, devices);
-        if(status != CL_DEVICE_NOT_FOUND)
-            openCLVerifyCall(status);
-
-        if(devices.size() > 0)
-        {
-            int baseIndx = global_devices.size();
-            global_devices.resize(baseIndx + devices.size());
-            platformInfo.deviceIDs.resize(devices.size());
-            platformInfo.devices.resize(devices.size());
-
-            for(size_t j = 0; j < devices.size(); ++j)
-            {
-                cl_device_id device = devices[j];
-
-                DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j];
-                platformInfo.deviceIDs[j] = baseIndx + j;
-                deviceInfo.init(baseIndx + j, platformInfo, device);
-            }
-        }
-    }
-
-    for (size_t i = 0; i < platforms.size(); ++i)
-    {
-        PlatformInfoImpl& platformInfo = global_platforms[i];
-        for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j)
-        {
-            DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]];
-            platformInfo.devices[j] = &deviceInfo;
-        }
-    }
-
-    return global_devices.size();
-}
-
-
-DeviceInfo::DeviceInfo()
-    : _id(-1), deviceType(DeviceType(0)),
-      deviceVendorId(-1),
-      maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), maxMemAllocSize(0),
-      deviceVersionMajor(0), deviceVersionMinor(0),
-      haveDoubleSupport(false), isUnifiedMemory(false),isIntelDevice(false),
-      platform(NULL)
-{
-    // nothing
-}
-
-DeviceInfo::~DeviceInfo() { }
-
-PlatformInfo::PlatformInfo()
-    : _id(-1),
-      platformVersionMajor(0), platformVersionMinor(0)
-{
-    // nothing
-}
-
-PlatformInfo::~PlatformInfo() { }
-
-class ContextImpl;
-
-struct CommandQueue
-{
-    ContextImpl* context_;
-    cl_command_queue clQueue_;
-
-    CommandQueue() : context_(NULL), clQueue_(NULL) { }
-    ~CommandQueue() { release(); }
-
-    void create(ContextImpl* context_);
-    void release()
-    {
-#ifdef WIN32
-        // if process is on termination stage (ExitProcess was called and other threads were terminated)
-        // then disable command queue release because it may cause program hang
-        if (!__termination)
-#endif
-        {
-            if(clQueue_)
-            {
-                openCLSafeCall(clReleaseCommandQueue(clQueue_)); // some cleanup problems are here
-            }
-
-        }
-        clQueue_ = NULL;
-        context_ = NULL;
-    }
-};
-
-cv::TLSData<CommandQueue> commandQueueTLSData;
-
-//////////////////////////////// OpenCL context ////////////////////////
-//This is a global singleton class used to represent a OpenCL context.
-class ContextImpl : public Context
-{
-public:
-    cl_device_id clDeviceID;
-    cl_context clContext;
-    const DeviceInfoImpl& deviceInfoImpl;
-
-protected:
-    ContextImpl(const DeviceInfoImpl& _deviceInfoImpl, cl_context context)
-        : clDeviceID(_deviceInfoImpl.device_id), clContext(context), deviceInfoImpl(_deviceInfoImpl)
-    {
-#ifdef CL_VERSION_1_2
-        if (supportsFeature(FEATURE_CL_VER_1_2))
-        {
-            openCLSafeCall(clRetainDevice(clDeviceID));
-        }
-#endif
-        openCLSafeCall(clRetainContext(clContext));
-
-        ContextImpl* old = NULL;
-        {
-            cv::AutoLock lock(getCurrentContextMutex());
-            old = currentContext;
-            currentContext = this;
-        }
-        if (old != NULL)
-        {
-            delete old;
-        }
-    }
-    ~ContextImpl()
-    {
-        CV_Assert(this != currentContext);
-
-#ifdef CL_VERSION_1_2
-        if (supportsFeature(FEATURE_CL_VER_1_2))
-        {
-            openCLSafeCall(clReleaseDevice(clDeviceID));
-        }
-#endif
-        if (deviceInfoImpl._id < 0) // not in the global registry, so we should cleanup it
-        {
-#ifdef CL_VERSION_1_2
-            if (supportsFeature(FEATURE_CL_VER_1_2))
-            {
-                openCLSafeCall(clReleaseDevice(deviceInfoImpl.device_id));
-            }
-#endif
-            PlatformInfoImpl* platformImpl = (PlatformInfoImpl*)(deviceInfoImpl.platform);
-            delete platformImpl;
-            delete const_cast<DeviceInfoImpl*>(&deviceInfoImpl);
-        }
-        clDeviceID = NULL;
-
-#ifdef WIN32
-        // if process is on termination stage (ExitProcess was called and other threads were terminated)
-        // then disable command queue release because it may cause program hang
-        if (!__termination)
-#endif
-        {
-            if(clContext)
-            {
-                openCLSafeCall(clReleaseContext(clContext));
-            }
-        }
-        clContext = NULL;
-    }
-public:
-    static void setContext(const DeviceInfo* deviceInfo);
-    static void initializeContext(void* pClPlatform, void* pClContext, void* pClDevice);
-
-    bool supportsFeature(FEATURE_TYPE featureType) const;
-
-    static void cleanupContext(void);
-
-    static ContextImpl* getContext();
-private:
-    ContextImpl(const ContextImpl&); // disabled
-    ContextImpl& operator=(const ContextImpl&); // disabled
-
-    static ContextImpl* currentContext;
-};
-
-ContextImpl* ContextImpl::currentContext = NULL;
-
-static bool __deviceSelected = false;
-
-Context* Context::getContext()
-{
-    return ContextImpl::getContext();
-}
-
-ContextImpl* ContextImpl::getContext()
-{
-    if (currentContext == NULL)
-    {
-        static bool defaultInitiaization = false;
-        if (!defaultInitiaization)
-        {
-            cv::AutoLock lock(getInitializationMutex());
-            try
-            {
-                if (!__initialized)
-                {
-                    if (initializeOpenCLDevices() == 0)
-                    {
-                        CV_Error(Error::OpenCLInitError, "OpenCL not available");
-                    }
-                }
-                if (!__deviceSelected)
-                {
-                    if (!selectOpenCLDevice())
-                    {
-                        CV_Error(Error::OpenCLInitError, "Can't select OpenCL device");
-                    }
-                }
-                defaultInitiaization = true;
-            }
-            catch (...)
-            {
-                defaultInitiaization = true;
-                throw;
-            }
-        }
-        CV_Assert(currentContext != NULL);
-    }
-    return currentContext;
-}
-
-bool Context::supportsFeature(FEATURE_TYPE featureType) const
-{
-    return ((ContextImpl*)this)->supportsFeature(featureType);
-}
-
-const DeviceInfo& Context::getDeviceInfo() const
-{
-    return ((ContextImpl*)this)->deviceInfoImpl;
-}
-
-const void* Context::getOpenCLContextPtr() const
-{
-    return &(((ContextImpl*)this)->clContext);
-}
-
-const void* Context::getOpenCLCommandQueuePtr() const
-{
-    ContextImpl* pThis = (ContextImpl*)this;
-    CommandQueue* commandQueue = commandQueueTLSData.get();
-    if (commandQueue->context_ != pThis)
-    {
-        commandQueue->create(pThis);
-    }
-    return &commandQueue->clQueue_;
-}
-
-const void* Context::getOpenCLDeviceIDPtr() const
-{
-    return &(((ContextImpl*)this)->clDeviceID);
-}
-
-
-bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const
-{
-    switch (featureType)
-    {
-    case FEATURE_CL_INTEL_DEVICE:
-        return deviceInfoImpl.isIntelDevice;
-    case FEATURE_CL_DOUBLE:
-        return deviceInfoImpl.haveDoubleSupport;
-    case FEATURE_CL_UNIFIED_MEM:
-        return deviceInfoImpl.isUnifiedMemory;
-    case FEATURE_CL_VER_1_2:
-        return deviceInfoImpl.deviceVersionMajor > 1 || (deviceInfoImpl.deviceVersionMajor == 1 && deviceInfoImpl.deviceVersionMinor >= 2);
-    }
-    CV_Error(CV_StsBadArg, "Invalid feature type");
-    return false;
-}
-
-void fft_teardown();
-void clBlasTeardown();
-
-void ContextImpl::cleanupContext(void)
-{
-    fft_teardown();
-    clBlasTeardown();
-
-    cv::AutoLock lock(getCurrentContextMutex());
-    if (currentContext)
-    {
-        ContextImpl* ctx = currentContext;
-        currentContext = NULL;
-        delete ctx;
-    }
-}
-
-void ContextImpl::setContext(const DeviceInfo* deviceInfo)
-{
-    CV_Assert(deviceInfo->_id >= 0); // we can't specify custom devices
-    CV_Assert(deviceInfo->_id < (int)global_devices.size());
-
-    {
-        cv::AutoLock lock(getCurrentContextMutex());
-        if (currentContext)
-        {
-            if (currentContext->deviceInfoImpl._id == deviceInfo->_id)
-                return;
-        }
-    }
-
-    DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id];
-    CV_Assert(deviceInfo == &infoImpl);
-
-    cl_int status = 0;
-    cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 };
-    cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status);
-    openCLVerifyCall(status);
-
-    ContextImpl* ctx = new ContextImpl(infoImpl, clContext);
-    clReleaseContext(clContext);
-    (void)ctx;
-}
-
-void ContextImpl::initializeContext(void* pClPlatform, void* pClContext, void* pClDevice)
-{
-    CV_Assert(pClPlatform != NULL);
-    CV_Assert(pClContext != NULL);
-    CV_Assert(pClDevice != NULL);
-    cl_platform_id platform = *(cl_platform_id*)pClPlatform;
-    cl_context context = *(cl_context*)pClContext;
-    cl_device_id device = *(cl_device_id*)pClDevice;
-
-    PlatformInfoImpl* platformInfoImpl = new PlatformInfoImpl();
-    platformInfoImpl->init(-1, platform);
-    DeviceInfoImpl* deviceInfoImpl = new DeviceInfoImpl();
-    deviceInfoImpl->init(-1, *platformInfoImpl, device);
-
-    ContextImpl* ctx = new ContextImpl(*deviceInfoImpl, context);
-    (void)ctx;
-}
-
-void CommandQueue::create(ContextImpl* context)
-{
-    release();
-    cl_int status = 0;
-    // TODO add CL_QUEUE_PROFILING_ENABLE
-    cl_command_queue clCmdQueue = clCreateCommandQueue(context->clContext, context->clDeviceID, 0, &status);
-    openCLVerifyCall(status);
-    context_ = context;
-    clQueue_ = clCmdQueue;
-}
-
-int getOpenCLPlatforms(PlatformsInfo& platforms)
-{
-    if (!__initialized)
-        initializeOpenCLDevices();
-
-    platforms.clear();
-
-    for (size_t id = 0; id < global_platforms.size(); ++id)
-    {
-        PlatformInfoImpl& impl = global_platforms[id];
-        platforms.push_back(&impl);
-    }
-
-    return platforms.size();
-}
-
-int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, const PlatformInfo* platform)
-{
-    if (!__initialized)
-        initializeOpenCLDevices();
-
-    devices.clear();
-
-    switch(deviceType)
-    {
-    case CVCL_DEVICE_TYPE_DEFAULT:
-    case CVCL_DEVICE_TYPE_CPU:
-    case CVCL_DEVICE_TYPE_GPU:
-    case CVCL_DEVICE_TYPE_ACCELERATOR:
-    case CVCL_DEVICE_TYPE_ALL:
-        break;
-    default:
-        return 0;
-    }
-
-    if (platform == NULL)
-    {
-        for (size_t id = 0; id < global_devices.size(); ++id)
-        {
-            DeviceInfoImpl& deviceInfo = global_devices[id];
-            if (((int)deviceInfo.deviceType & deviceType) != 0)
-            {
-                devices.push_back(&deviceInfo);
-            }
-        }
-    }
-    else
-    {
-        for (size_t id = 0; id < platform->devices.size(); ++id)
-        {
-            const DeviceInfo* deviceInfo = platform->devices[id];
-            if (((int)deviceInfo->deviceType & deviceType) == deviceType)
-            {
-                devices.push_back(deviceInfo);
-            }
-        }
-    }
-
-    return (int)devices.size();
-}
-
-void setDevice(const DeviceInfo* info)
-{
-    try
-    {
-        ContextImpl::setContext(info);
-        __deviceSelected = true;
-    }
-    catch (...)
-    {
-        __deviceSelected = true;
-        throw;
-    }
-}
-
-void initializeContext(void* pClPlatform, void* pClContext, void* pClDevice)
-{
-    try
-    {
-        ContextImpl::initializeContext(pClPlatform, pClContext, pClDevice);
-        __deviceSelected = true;
-    }
-    catch (...)
-    {
-        __deviceSelected = true;
-        throw;
-    }
-}
-
-bool supportsFeature(FEATURE_TYPE featureType)
-{
-    return Context::getContext()->supportsFeature(featureType);
-}
-
-__Module::__Module()
-{
-    /* moved to Context::getContext(): initializeOpenCLDevices(); */
-}
-
-__Module::~__Module()
-{
-#if defined(WIN32) && defined(CVAPI_EXPORTS)
-    // nothing, see DllMain
-#else
-    ContextImpl::cleanupContext();
-#endif
-}
-
-} // namespace ocl
-} // namespace cv
-
-
-#if defined(WIN32) && defined(CVAPI_EXPORTS)
-
-extern "C"
-BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved);
-
-extern "C"
-BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved)
-{
-    if (fdwReason == DLL_PROCESS_DETACH)
-    {
-        if (lpReserved != NULL) // called after ExitProcess() call
-            cv::ocl::__termination = true;
-        cv::ocl::ContextImpl::cleanupContext();
-    }
-    return TRUE;
-}
-
-#endif
diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp
deleted file mode 100644
index aa44c48..0000000
--- a/modules/ocl/src/cl_operations.cpp
+++ /dev/null
@@ -1,549 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Guoping Long, longguoping@gmail.com
-//    Niko Li, newlife20080214@gmail.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include <iomanip>
-#include <fstream>
-#include "cl_programcache.hpp"
-
-//#define PRINT_KERNEL_RUN_TIME
-#define RUN_TIMES 100
-#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD
-#define CL_MEM_USE_PERSISTENT_MEM_AMD 0
-#endif
-//#define AMD_DOUBLE_DIFFER
-
-namespace cv {
-namespace ocl {
-
-DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
-DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
-int gDevMemTypeValueMap[5] = {0,
-                              CL_MEM_ALLOC_HOST_PTR,
-                              CL_MEM_USE_HOST_PTR,
-                              CL_MEM_COPY_HOST_PTR,
-                              CL_MEM_USE_PERSISTENT_MEM_AMD};
-int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
-
-void finish()
-{
-    clFinish(getClCommandQueue(Context::getContext()));
-}
-
-bool isCpuDevice()
-{
-    const DeviceInfo& info = Context::getContext()->getDeviceInfo();
-    return (info.deviceType == CVCL_DEVICE_TYPE_CPU);
-}
-
-size_t queryWaveFrontSize(cl_kernel kernel)
-{
-    const DeviceInfo& info = Context::getContext()->getDeviceInfo();
-    if (info.deviceType == CVCL_DEVICE_TYPE_CPU)
-        return 1;
-    size_t wavefront = 0;
-    CV_Assert(kernel != NULL);
-    openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(Context::getContext()),
-            CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &wavefront, NULL));
-    return wavefront;
-}
-
-
-void openCLReadBuffer(Context *ctx, cl_mem dst_buffer, void *host_buffer, size_t size)
-{
-    cl_int status;
-    status = clEnqueueReadBuffer(getClCommandQueue(ctx), dst_buffer, CL_TRUE, 0,
-                                 size, host_buffer, 0, NULL, NULL);
-    openCLVerifyCall(status);
-}
-
-cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size)
-{
-    cl_int status;
-    cl_mem buffer = clCreateBuffer(getClContext(ctx), (cl_mem_flags)flag, size, NULL, &status);
-    openCLVerifyCall(status);
-    return buffer;
-}
-
-#define MEMORY_CORRUPTION_GUARD
-#ifdef MEMORY_CORRUPTION_GUARD
-//#define CHECK_MEMORY_CORRUPTION
-#define CHECK_MEMORY_CORRUPTION_PRINT_ERROR
-#define CHECK_MEMORY_CORRUPTION_RAISE_ERROR
-static const int __memory_corruption_guard_bytes = 64*1024;
-#ifdef CHECK_MEMORY_CORRUPTION
-static const int __memory_corruption_check_pattern = 0x14326547; // change pattern for sizeof(int)==8
-#endif
-struct CheckBuffers
-{
-    cl_mem mainBuffer;
-    size_t size;
-    size_t widthInBytes, height;
-    CheckBuffers()
-        : mainBuffer(NULL), size(0), widthInBytes(0), height(0)
-    {
-        // nothing
-    }
-    CheckBuffers(cl_mem _mainBuffer, size_t _size, size_t _widthInBytes, size_t _height)
-        : mainBuffer(_mainBuffer), size(_size), widthInBytes(_widthInBytes), height(_height)
-    {
-        // nothing
-    }
-};
-static std::map<cl_mem, CheckBuffers> __check_buffers;
-#endif
-
-void openCLMallocPitch(Context *ctx, void **dev_ptr, size_t *pitch,
-                       size_t widthInBytes, size_t height)
-{
-    openCLMallocPitchEx(ctx, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType);
-}
-
-void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch,
-                       size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
-{
-    cl_int status;
-    size_t size = widthInBytes * height;
-    bool useSubBuffers =
-#ifndef MEMORY_CORRUPTION_GUARD
-            false;
-#else
-            true;
-#endif
-    const DeviceInfo& devInfo = ctx->getDeviceInfo();
-    if (useSubBuffers && devInfo.isIntelDevice)
-    {
-        useSubBuffers = false; // TODO FIXIT We observe memory leaks then we working with sub-buffers
-                               // on the CPU device of Intel OpenCL SDK (Linux). We will investigate this later.
-    }
-    if (!useSubBuffers)
-    {
-        *dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
-                                  size, 0, &status);
-        openCLVerifyCall(status);
-    }
-#ifdef MEMORY_CORRUPTION_GUARD
-    else
-    {
-        size_t allocSize = size + __memory_corruption_guard_bytes * 2;
-        cl_mem mainBuffer = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
-                allocSize, 0, &status);
-        openCLVerifyCall(status);
-        cl_buffer_region r = {__memory_corruption_guard_bytes, size};
-        *dev_ptr = clCreateSubBuffer(mainBuffer,
-                gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
-                CL_BUFFER_CREATE_TYPE_REGION, &r,
-                &status);
-        openCLVerifyCall(status);
-#ifdef CHECK_MEMORY_CORRUPTION
-        std::vector<int> tmp(__memory_corruption_guard_bytes / sizeof(int),
-                __memory_corruption_check_pattern);
-        CV_Assert(tmp.size() * sizeof(int) == __memory_corruption_guard_bytes);
-        openCLVerifyCall(clEnqueueWriteBuffer(getClCommandQueue(ctx),
-                mainBuffer, CL_FALSE, 0, __memory_corruption_guard_bytes, &tmp[0],
-                0, NULL, NULL));
-        openCLVerifyCall(clEnqueueWriteBuffer(getClCommandQueue(ctx),
-                mainBuffer, CL_FALSE, __memory_corruption_guard_bytes + size, __memory_corruption_guard_bytes, &tmp[0],
-                0, NULL, NULL));
-        clFinish(getClCommandQueue(ctx));
-#endif
-        CheckBuffers data(mainBuffer, size, widthInBytes, height);
-        cv::AutoLock lock(getInitializationMutex());
-        __check_buffers.insert(std::pair<cl_mem, CheckBuffers>((cl_mem)*dev_ptr, data));
-    }
-#endif
-    *pitch = widthInBytes;
-}
-
-void openCLMemcpy2D(Context *ctx, void *dst, size_t dpitch,
-                    const void *src, size_t spitch,
-                    size_t width, size_t height, openCLMemcpyKind kind, int channels)
-{
-    size_t buffer_origin[3] = {0, 0, 0};
-    size_t host_origin[3] = {0, 0, 0};
-    size_t region[3] = {width, height, 1};
-    if(kind == clMemcpyHostToDevice)
-    {
-        if(dpitch == width || channels == 3 || height == 1)
-        {
-            openCLSafeCall(clEnqueueWriteBuffer(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE,
-                                                0, width * height, src, 0, NULL, NULL));
-        }
-        else
-        {
-            openCLSafeCall(clEnqueueWriteBufferRect(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE,
-                                                    buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
-        }
-    }
-    else if(kind == clMemcpyDeviceToHost)
-    {
-        if(spitch == width || channels == 3 || height == 1)
-        {
-            openCLSafeCall(clEnqueueReadBuffer(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE,
-                                               0, width * height, dst, 0, NULL, NULL));
-        }
-        else
-        {
-            openCLSafeCall(clEnqueueReadBufferRect(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE,
-                                                   buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
-        }
-    }
-}
-
-void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset,
-                        const void *src, size_t spitch,
-                        size_t width, size_t height, int src_offset)
-{
-    size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
-    size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
-    size_t region[3] = {width, height, 1};
-
-    openCLSafeCall(clEnqueueCopyBufferRect(getClCommandQueue(ctx), (cl_mem)src, (cl_mem)dst, src_origin, dst_origin,
-                                           region, spitch, 0, dpitch, 0, 0, 0, 0));
-}
-
-void openCLFree(void *devPtr)
-{
-    openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
-#ifdef MEMORY_CORRUPTION_GUARD
-#ifdef CHECK_MEMORY_CORRUPTION
-    bool failBefore = false, failAfter = false;
-#endif
-    CheckBuffers data;
-    {
-        cv::AutoLock lock(getInitializationMutex());
-        std::map<cl_mem, CheckBuffers>::iterator i = __check_buffers.find((cl_mem)devPtr);
-        if (i != __check_buffers.end())
-        {
-            data = i->second;
-            __check_buffers.erase(i);
-        }
-    }
-    if (data.mainBuffer != NULL)
-    {
-#ifdef CHECK_MEMORY_CORRUPTION
-        Context* ctx = Context::getContext();
-        std::vector<uchar> checkBefore(__memory_corruption_guard_bytes);
-        std::vector<uchar> checkAfter(__memory_corruption_guard_bytes);
-        openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(ctx),
-                data.mainBuffer, CL_FALSE, 0, __memory_corruption_guard_bytes, &checkBefore[0],
-                0, NULL, NULL));
-        openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(ctx),
-                data.mainBuffer, CL_FALSE, __memory_corruption_guard_bytes + data.size, __memory_corruption_guard_bytes, &checkAfter[0],
-                0, NULL, NULL));
-        clFinish(getClCommandQueue(ctx));
-
-        std::vector<int> tmp(__memory_corruption_guard_bytes / sizeof(int),
-                __memory_corruption_check_pattern);
-
-        if (memcmp(&checkBefore[0], &tmp[0], __memory_corruption_guard_bytes) != 0)
-        {
-            failBefore = true;
-        }
-        if (memcmp(&checkAfter[0], &tmp[0], __memory_corruption_guard_bytes) != 0)
-        {
-            failAfter = true;
-        }
-#else
-        // TODO FIXIT Attach clReleaseMemObject call to event completion callback
-        // TODO 2013/12/04 Disable workaround
-        // Context* ctx = Context::getContext();
-        // clFinish(getClCommandQueue(ctx));
-#endif
-        openCLSafeCall(clReleaseMemObject(data.mainBuffer));
-    }
-#if defined(CHECK_MEMORY_CORRUPTION)
-    if (failBefore)
-    {
-#ifdef CHECK_MEMORY_CORRUPTION_PRINT_ERROR
-        std::cerr << "ERROR: Memory corruption detected: before buffer: " << cv::format("widthInBytes=%d height=%d", (int)data.widthInBytes, (int)data.height) << std::endl;
-#endif
-#ifdef CHECK_MEMORY_CORRUPTION_RAISE_ERROR
-        CV_Error(CV_StsInternal, "Memory corruption detected: before buffer");
-#endif
-    }
-    if (failAfter)
-    {
-#ifdef CHECK_MEMORY_CORRUPTION_PRINT_ERROR
-        std::cerr << "ERROR: Memory corruption detected: after buffer: " << cv::format("widthInBytes=%d height=%d", (int)data.widthInBytes, (int)data.height) << std::endl;
-#endif
-#ifdef CHECK_MEMORY_CORRUPTION_RAISE_ERROR
-        CV_Error(CV_StsInternal, "Memory corruption detected: after buffer");
-#endif
-    }
-#endif // CHECK_MEMORY_CORRUPTION
-#endif // MEMORY_CORRUPTION_GUARD
-}
-
-cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName)
-{
-    return openCLGetKernelFromSource(ctx, source, kernelName, NULL);
-}
-
-cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName,
-                                    const char *build_options)
-{
-    cl_kernel kernel;
-    cl_int status = 0;
-    CV_Assert(ProgramCache::getProgramCache() != NULL);
-    cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, build_options);
-    CV_Assert(program != NULL);
-    kernel = clCreateKernel(program, kernelName.c_str(), &status);
-    openCLVerifyCall(status);
-    openCLVerifyCall(clReleaseProgram(program));
-    return kernel;
-}
-
-void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThreads)
-{
-    size_t kernelWorkGroupSize;
-    openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(ctx),
-                                            CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
-    CV_Assert( localThreads[0] <= ctx->getDeviceInfo().maxWorkItemSizes[0] );
-    CV_Assert( localThreads[1] <= ctx->getDeviceInfo().maxWorkItemSizes[1] );
-    CV_Assert( localThreads[2] <= ctx->getDeviceInfo().maxWorkItemSizes[2] );
-    CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize );
-    CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= ctx->getDeviceInfo().maxWorkGroupSize );
-}
-
-#ifdef PRINT_KERNEL_RUN_TIME
-static double total_execute_time = 0;
-static double total_kernel_time = 0;
-#endif
-
-static std::string removeDuplicatedWhiteSpaces(const char * buildOptions)
-{
-    if (buildOptions == NULL)
-        return "";
-
-    size_t length = strlen(buildOptions), didx = 0, sidx = 0;
-    while (sidx < length && buildOptions[sidx] == 0)
-        ++sidx;
-
-    std::string opt;
-    opt.resize(length);
-
-    for ( ; sidx < length; ++sidx)
-        if (buildOptions[sidx] != ' ')
-            opt[didx++] = buildOptions[sidx];
-        else if ( !(didx > 0 && opt[didx - 1] == ' ') )
-            opt[didx++] = buildOptions[sidx];
-
-    return opt;
-}
-
-cl_kernel openCLGetKernelFromSource(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, int channels,
-                          int depth, const char *build_options)
-{
-    //construct kernel name
-    //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
-    //for example split_C2_D3, represent the split kernel with channels = 2 and dataType Depth = 3(Data type is short)
-    std::stringstream idxStr;
-    if(channels != -1)
-        idxStr << "_C" << channels;
-    if(depth != -1)
-        idxStr << "_D" << depth;
-    kernelName += idxStr.str();
-
-    std::string fixedOptions = removeDuplicatedWhiteSpaces(build_options);
-    cl_kernel kernel = openCLGetKernelFromSource(ctx, source, kernelName, fixedOptions.c_str());
-    return kernel;
-}
-
-void openCLExecuteKernel(Context *ctx, cl_kernel kernel, size_t globalThreads[3],
-                          size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args)
-{
-    if ( localThreads != NULL)
-    {
-        globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
-        globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
-        globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
-
-        cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
-    }
-    for(size_t i = 0; i < args.size(); i ++)
-        openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
-
-#ifndef PRINT_KERNEL_RUN_TIME
-    openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
-                                          localThreads, 0, NULL, NULL));
-#else
-    cl_event event = NULL;
-    openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
-                                          localThreads, 0, NULL, &event));
-
-    cl_ulong start_time, end_time, queue_time;
-    double execute_time = 0;
-    double total_time   = 0;
-
-    openCLSafeCall(clWaitForEvents(1, &event));
-    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
-                                           sizeof(cl_ulong), &start_time, 0));
-
-    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
-                                           sizeof(cl_ulong), &end_time, 0));
-
-    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
-                                           sizeof(cl_ulong), &queue_time, 0));
-
-    execute_time = (double)(end_time - start_time) / (1000 * 1000);
-    total_time = (double)(end_time - queue_time) / (1000 * 1000);
-
-    total_execute_time += execute_time;
-    total_kernel_time += total_time;
-    clReleaseEvent(event);
-#endif
-
-    clFlush(getClCommandQueue(ctx));
-    openCLSafeCall(clReleaseKernel(kernel));
-}
-
-void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
-                          size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-                          int depth, const char *build_options)
-{
-    cl_kernel kernel = openCLGetKernelFromSource(ctx, source, kernelName, channels, depth, build_options);
-
-    openCLExecuteKernel(ctx, kernel, globalThreads, localThreads, args);
-}
-
-void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName,
-                         size_t globalThreads[3], size_t localThreads[3],
-                         std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
-{
-    openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args,
-                        channels, depth, NULL);
-}
-void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName,
-                         size_t globalThreads[3], size_t localThreads[3],
-                         std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
-
-{
-#ifndef PRINT_KERNEL_RUN_TIME
-    openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
-                         build_options);
-#else
-    String data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"};
-    std::cout << std::endl;
-    std::cout << "Function Name: " << kernelName;
-    if(depth >= 0)
-        std::cout << " |data type: " << data_type[depth];
-    std::cout << " |channels: " << channels;
-    std::cout << " |Time Unit: " << "ms" << std::endl;
-
-    total_execute_time = 0;
-    total_kernel_time = 0;
-    std::cout << "-------------------------------------" << std::endl;
-
-    std::cout << std::setiosflags(std::ios::left) << std::setw(15) << "execute time";
-    std::cout << std::setiosflags(std::ios::left) << std::setw(15) << "launch time";
-    std::cout << std::setiosflags(std::ios::left) << std::setw(15) << "kernel time" << std::endl;
-    int i = 0;
-    for(i = 0; i < RUN_TIMES; i++)
-        openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
-                             build_options);
-
-    std::cout << "average kernel execute time: " << total_execute_time / RUN_TIMES << std::endl; // "ms" << std::endl;
-    std::cout << "average kernel total time:  " << total_kernel_time / RUN_TIMES << std::endl; // "ms" << std::endl;
-#endif
-}
-
-void openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramSource& source, String kernelName,
-                         size_t globalThreads[3], size_t localThreads[3],
-                         std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
-
-{
-    //construct kernel name
-    //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
-    //for example split_C2_D2, represent the split kernel with channels = 2 and dataType Depth = 2 (Data type is char)
-    std::stringstream idxStr;
-    if(channels != -1)
-        idxStr << "_C" << channels;
-    if(depth != -1)
-        idxStr << "_D" << depth;
-    kernelName += idxStr.str();
-
-    std::string name = std::string("custom_") + source.name;
-    ProgramEntry program = { name.c_str(), source.programStr, source.programHash };
-    cl_kernel kernel = openCLGetKernelFromSource(ctx, &program, kernelName, build_options);
-
-    CV_Assert(globalThreads != NULL);
-    if ( localThreads != NULL)
-    {
-        globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
-        globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
-        globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
-
-        cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
-    }
-    for(size_t i = 0; i < args.size(); i ++)
-        openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
-
-    openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
-                    localThreads, 0, NULL, NULL));
-
-    clFinish(getClCommandQueue(ctx));
-    openCLSafeCall(clReleaseKernel(kernel));
-}
-
-cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
-                     const size_t size)
-{
-    int status;
-    cl_mem con_struct;
-
-    con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status);
-    openCLSafeCall(status);
-
-    openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size,
-                                        value, 0, 0, 0));
-
-    return con_struct;
-}
-
-}//namespace ocl
-}//namespace cv
diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp
deleted file mode 100644
index 56f0213..0000000
--- a/modules/ocl/src/cl_programcache.cpp
+++ /dev/null
@@ -1,514 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Guoping Long, longguoping@gmail.com
-//    Niko Li, newlife20080214@gmail.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include <iomanip>
-#include <fstream>
-#include "cl_programcache.hpp"
-
-namespace cv { namespace ocl {
-
-/*
- * The binary caching system to eliminate redundant program source compilation.
- * Strictly, this is not a cache because we do not implement evictions right now.
- * We shall add such features to trade-off memory consumption and performance when necessary.
- */
-
-cv::Mutex ProgramCache::mutexFiles;
-cv::Mutex ProgramCache::mutexCache;
-
-ProgramCache* _programCache = NULL;
-ProgramCache* ProgramCache::getProgramCache()
-{
-    if (NULL == _programCache)
-    {
-        cv::AutoLock lock(getInitializationMutex());
-        if (NULL == _programCache)
-            _programCache = new ProgramCache();
-    }
-    return _programCache;
-}
-
-ProgramCache::ProgramCache()
-{
-    codeCache.clear();
-    cacheSize = 0;
-}
-
-ProgramCache::~ProgramCache()
-{
-    releaseProgram();
-    if (this == _programCache)
-    {
-        cv::AutoLock lock(getInitializationMutex());
-        if (this == _programCache)
-            _programCache = NULL;
-    }
-}
-
-cl_program ProgramCache::progLookup(const String& srcsign)
-{
-    std::map<String, cl_program>::iterator iter;
-    iter = codeCache.find(srcsign);
-    if(iter != codeCache.end())
-        return iter->second;
-    else
-        return NULL;
-}
-
-void ProgramCache::addProgram(const String& srcsign, cl_program program)
-{
-    if (!progLookup(srcsign))
-    {
-        clRetainProgram(program);
-        codeCache.insert(std::map<String, cl_program>::value_type(srcsign, program));
-    }
-}
-
-void ProgramCache::releaseProgram()
-{
-    std::map<String, cl_program>::iterator iter;
-    for(iter = codeCache.begin(); iter != codeCache.end(); iter++)
-    {
-        openCLSafeCall(clReleaseProgram(iter->second));
-    }
-    codeCache.clear();
-    cacheSize = 0;
-}
-
-static bool enable_disk_cache = true;
-static String binpath = "";
-
-void setBinaryDiskCache(int mode, String path)
-{
-    enable_disk_cache = false;
-    binpath = "";
-
-    if(mode == CACHE_NONE)
-    {
-        return;
-    }
-    enable_disk_cache =
-#if defined(_DEBUG) || defined(DEBUG)
-        (mode & CACHE_DEBUG)   == CACHE_DEBUG;
-#else
-        (mode & CACHE_RELEASE) == CACHE_RELEASE;
-#endif
-    if(enable_disk_cache && !path.empty())
-    {
-        binpath = path;
-    }
-}
-
-void setBinaryPath(const char *path)
-{
-    binpath = path;
-}
-
-static const int MAX_ENTRIES = 64;
-
-struct ProgramFileCache
-{
-    struct CV_DECL_ALIGNED(1) ProgramFileHeader
-    {
-        int hashLength;
-        //char hash[];
-    };
-
-    struct CV_DECL_ALIGNED(1) ProgramFileTable
-    {
-        int numberOfEntries;
-        //int firstEntryOffset[];
-    };
-
-    struct CV_DECL_ALIGNED(1) ProgramFileConfigurationEntry
-    {
-        int nextEntry;
-        int dataSize;
-        int optionsLength;
-        //char options[];
-        // char data[];
-    };
-
-    String fileName_;
-    const char* hash_;
-    std::fstream f;
-
-    ProgramFileCache(const String& fileName, const char* hash)
-        : fileName_(fileName), hash_(hash)
-    {
-        if (hash_ != NULL)
-        {
-            f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
-            if(f.is_open())
-            {
-                int hashLength = 0;
-                f.read((char*)&hashLength, sizeof(int));
-                std::vector<char> fhash(hashLength + 1);
-                f.read(&fhash[0], hashLength);
-                if (f.eof() || strncmp(hash_, &fhash[0], hashLength) != 0)
-                {
-                    f.close();
-                    remove(fileName_.c_str());
-                    return;
-                }
-            }
-        }
-    }
-
-    int getHash(const String& options)
-    {
-        int hash = 0;
-        for (size_t i = 0; i < options.length(); i++)
-        {
-            hash = (hash << 2) ^ (hash >> 17) ^ options[i];
-        }
-        return (hash + (hash >> 16)) & (MAX_ENTRIES - 1);
-    }
-
-    bool readConfigurationFromFile(const String& options, std::vector<char>& buf)
-    {
-        if (hash_ == NULL)
-            return false;
-
-        if (!f.is_open())
-            return false;
-
-        f.seekg(0, std::fstream::end);
-        size_t fileSize = (size_t)f.tellg();
-        if (fileSize == 0)
-        {
-            std::cerr << "Invalid file (empty): " << fileName_ << std::endl;
-            f.close();
-            remove(fileName_.c_str());
-            return false;
-        }
-        f.seekg(0, std::fstream::beg);
-
-        int hashLength = 0;
-        f.read((char*)&hashLength, sizeof(int));
-        CV_Assert(hashLength > 0);
-        f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg);
-
-        int numberOfEntries = 0;
-        f.read((char*)&numberOfEntries, sizeof(int));
-        CV_Assert(numberOfEntries > 0);
-        if (numberOfEntries != MAX_ENTRIES)
-        {
-            std::cerr << "Invalid file: " << fileName_ << std::endl;
-            f.close();
-            remove(fileName_.c_str());
-            return false;
-        }
-
-        std::vector<int> firstEntryOffset(numberOfEntries);
-        f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
-
-        int entryNum = getHash(options);
-
-        int entryOffset = firstEntryOffset[entryNum];
-        ProgramFileConfigurationEntry entry;
-        while (entryOffset > 0)
-        {
-            f.seekg(entryOffset, std::fstream::beg);
-            assert(sizeof(entry) == sizeof(int)*3);
-            f.read((char*)&entry, sizeof(entry));
-            std::vector<char> foptions(entry.optionsLength);
-            if ((int)options.length() == entry.optionsLength)
-            {
-                if (entry.optionsLength > 0)
-                    f.read(&foptions[0], entry.optionsLength);
-                if (memcmp(&foptions[0], options.c_str(), entry.optionsLength) == 0)
-                {
-                    buf.resize(entry.dataSize);
-                    f.read(&buf[0], entry.dataSize);
-                    f.seekg(0, std::fstream::beg);
-                    return true;
-                }
-            }
-            if (entry.nextEntry <= 0)
-                break;
-            entryOffset = entry.nextEntry;
-        }
-        return false;
-    }
-
-    bool writeConfigurationToFile(const String& options, std::vector<char>& buf)
-    {
-        if (hash_ == NULL)
-            return true; // don't save programs without hash
-
-        if (!f.is_open())
-        {
-            f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
-            if (!f.is_open())
-            {
-                f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
-                if (!f.is_open())
-                    return false;
-            }
-        }
-
-        f.seekg(0, std::fstream::end);
-        size_t fileSize = (size_t)f.tellg();
-        if (fileSize == 0)
-        {
-            f.seekp(0, std::fstream::beg);
-            int hashLength = strlen(hash_);
-            f.write((char*)&hashLength, sizeof(int));
-            f.write(hash_, hashLength);
-
-            int numberOfEntries = MAX_ENTRIES;
-            f.write((char*)&numberOfEntries, sizeof(int));
-            std::vector<int> firstEntryOffset(MAX_ENTRIES, 0);
-            f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
-            f.close();
-            f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
-            CV_Assert(f.is_open());
-            f.seekg(0, std::fstream::end);
-            fileSize = (size_t)f.tellg();
-        }
-        f.seekg(0, std::fstream::beg);
-
-        int hashLength = 0;
-        f.read((char*)&hashLength, sizeof(int));
-        CV_Assert(hashLength > 0);
-        f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg);
-
-        int numberOfEntries = 0;
-        f.read((char*)&numberOfEntries, sizeof(int));
-        CV_Assert(numberOfEntries > 0);
-        if (numberOfEntries != MAX_ENTRIES)
-        {
-            std::cerr << "Invalid file: " << fileName_ << std::endl;
-            f.close();
-            remove(fileName_.c_str());
-            return false;
-        }
-
-        size_t tableEntriesOffset = (size_t)f.tellg();
-        std::vector<int> firstEntryOffset(numberOfEntries);
-        f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
-
-        int entryNum = getHash(options);
-
-        int entryOffset = firstEntryOffset[entryNum];
-        ProgramFileConfigurationEntry entry;
-        while (entryOffset > 0)
-        {
-            f.seekg(entryOffset, std::fstream::beg);
-            assert(sizeof(entry) == sizeof(int)*3);
-            f.read((char*)&entry, sizeof(entry));
-            std::vector<char> foptions(entry.optionsLength);
-            if ((int)options.length() == entry.optionsLength)
-            {
-                if (entry.optionsLength > 0)
-                    f.read(&foptions[0], entry.optionsLength);
-                CV_Assert(memcmp(&foptions, options.c_str(), entry.optionsLength) != 0);
-            }
-            if (entry.nextEntry <= 0)
-                break;
-            entryOffset = entry.nextEntry;
-        }
-        if (entryOffset > 0)
-        {
-            f.seekp(entryOffset, std::fstream::beg);
-            entry.nextEntry = fileSize;
-            f.write((char*)&entry, sizeof(entry));
-        }
-        else
-        {
-            firstEntryOffset[entryNum] = fileSize;
-            f.seekp(tableEntriesOffset, std::fstream::beg);
-            f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
-        }
-        f.seekp(fileSize, std::fstream::beg);
-        entry.nextEntry = 0;
-        entry.dataSize = buf.size();
-        entry.optionsLength = options.length();
-        f.write((char*)&entry, sizeof(entry));
-        f.write(options.c_str(), entry.optionsLength);
-        f.write(&buf[0], entry.dataSize);
-        return true;
-    }
-
-    cl_program getOrBuildProgram(const Context* ctx, const cv::ocl::ProgramEntry* source, const String& options)
-    {
-        cl_int status = 0;
-        cl_program program = NULL;
-        std::vector<char> binary;
-        if (!enable_disk_cache || !readConfigurationFromFile(options, binary))
-        {
-            program = clCreateProgramWithSource(getClContext(ctx), 1, (const char**)&source->programStr, NULL, &status);
-            openCLVerifyCall(status);
-            cl_device_id device = getClDeviceID(ctx);
-            status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL);
-            if(status == CL_SUCCESS)
-            {
-                if (enable_disk_cache)
-                {
-                    size_t binarySize;
-                    openCLSafeCall(clGetProgramInfo(program,
-                                            CL_PROGRAM_BINARY_SIZES,
-                                            sizeof(size_t),
-                                            &binarySize, NULL));
-
-                    std::vector<char> binary(binarySize);
-
-                    char* ptr = &binary[0];
-                    openCLSafeCall(clGetProgramInfo(program,
-                                            CL_PROGRAM_BINARIES,
-                                            sizeof(char*),
-                                            &ptr,
-                                            NULL));
-
-                    if (!writeConfigurationToFile(options, binary))
-                    {
-                        std::cerr << "Can't write data to file: " << fileName_ << std::endl;
-                    }
-                }
-            }
-        }
-        else
-        {
-            cl_device_id device = getClDeviceID(ctx);
-            size_t size = binary.size();
-            const char* ptr = &binary[0];
-            program = clCreateProgramWithBinary(getClContext(ctx),
-                    1, &device,
-                    (const size_t *)&size, (const unsigned char **)&ptr,
-                    NULL, &status);
-            openCLVerifyCall(status);
-            status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL);
-        }
-
-        if(status != CL_SUCCESS)
-        {
-            if (status == CL_BUILD_PROGRAM_FAILURE || status == CL_INVALID_BUILD_OPTIONS)
-            {
-                size_t buildLogSize = 0;
-                openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx),
-                        CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize));
-                std::vector<char> buildLog; buildLog.resize(buildLogSize);
-                memset(&buildLog[0], 0, buildLogSize);
-                openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx),
-                        CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[0], NULL));
-                std::cout << std::endl << "BUILD LOG: "
-                        << (source->name ? source->name : "dynamic program") << ": "
-                        << options << "\n";
-                std::cout << &buildLog[0] << std::endl;
-            }
-            openCLVerifyCall(status);
-        }
-        return program;
-    }
-};
-
-cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source,
-                                    const char *build_options)
-{
-    std::stringstream src_sign;
-
-    if (source->name)
-    {
-        src_sign << source->name;
-        src_sign << getClContext(ctx);
-        if (NULL != build_options)
-        {
-            src_sign << "_" << build_options;
-        }
-
-        {
-            cv::AutoLock lockCache(mutexCache);
-            cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
-            if (!!program)
-            {
-                clRetainProgram(program);
-                return program;
-            }
-        }
-    }
-
-    cv::AutoLock lockCache(mutexFiles);
-
-    // second check
-    if (source->name)
-    {
-        cv::AutoLock lockCache(mutexCache);
-        cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
-        if (!!program)
-        {
-            clRetainProgram(program);
-            return program;
-        }
-    }
-
-    String all_build_options;
-    if (!ctx->getDeviceInfo().compilationExtraOptions.empty())
-        all_build_options += ctx->getDeviceInfo().compilationExtraOptions;
-    if (build_options != NULL)
-    {
-        all_build_options += " ";
-        all_build_options += build_options;
-    }
-    const DeviceInfo& devInfo = ctx->getDeviceInfo();
-    String filename = binpath + (source->name ? source->name : "NULL") + "_" + devInfo.platform->platformName + "_" + devInfo.deviceName + ".clb";
-
-    ProgramFileCache programFileCache(filename, source->programHash);
-    cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options);
-
-    //Cache the binary for future use if build_options is null
-    if (source->name)
-    {
-        cv::AutoLock lockCache(mutexCache);
-        this->addProgram(src_sign.str(), program);
-    }
-    return program;
-}
-
-} // namespace ocl
-} // namespace cv
diff --git a/modules/ocl/src/cl_programcache.hpp b/modules/ocl/src/cl_programcache.hpp
deleted file mode 100644
index ebf3e76..0000000
--- a/modules/ocl/src/cl_programcache.hpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-namespace cv {
-namespace ocl {
-
-class ProgramCache
-{
-protected:
-    ProgramCache();
-    ~ProgramCache();
-public:
-    static ProgramCache *getProgramCache();
-
-    cl_program getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source,
-                          const char *build_options);
-
-    void releaseProgram();
-protected:
-    //lookup the binary given the file name
-    // (with acquired mutexCache)
-    cl_program progLookup(const String& srcsign);
-
-    //add program to the cache
-    // (with acquired mutexCache)
-    void addProgram(const String& srcsign, cl_program program);
-
-    std::map <String, cl_program> codeCache;
-    unsigned int cacheSize;
-
-    //The presumed watermark for the cache volume (256MB). Is it enough?
-    //We may need more delicate algorithms when necessary later.
-    //Right now, let's just leave it along.
-    static const unsigned MAX_PROG_CACHE_SIZE = 1024;
-
-    // acquire both mutexes in this order: 1) mutexFiles 2) mutexCache
-    static cv::Mutex mutexFiles;
-    static cv::Mutex mutexCache;
-};
-
-}//namespace ocl
-}//namespace cv
diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
deleted file mode 100644
index 408ba4c..0000000
--- a/modules/ocl/src/color.cpp
+++ /dev/null
@@ -1,656 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Wang Weiyan, wangweiyanster@gmail.com
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
-                           const std::string & additionalOptions = std::string(),
-                           const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
-{
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-    int pixels_per_work_item = 1;
-
-    if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
-    {
-        if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
-            pixels_per_work_item =  4;
-        else if (src.cols % 2 == 0)
-            pixels_per_work_item =  2;
-        else
-            pixels_per_work_item =  1;
-    }
-
-    String build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
-    if (!additionalOptions.empty())
-        build_options = build_options + additionalOptions;
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    if (!data1.empty())
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
-    if (!data2.empty())
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
-
-    size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
-#ifdef ANDROID
-    size_t lt[3] = { 16, 10, 1 };
-#else
-    size_t lt[3] = { 16, 16, 1 };
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
-                           const std::string & additionalOptions = std::string(),
-                           const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
-{
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
-    if (!additionalOptions.empty())
-        build_options += additionalOptions;
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    if (!data1.empty())
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
-    if (!data2.empty())
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
-
-   size_t gt[3] = { dst.cols, dst.rows, 1 };
-#ifdef ANDROID
-    size_t lt[3] = { 16, 10, 1 };
-#else
-    size_t lt[3] = { 16, 16, 1 };
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void fromGray_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
-                         const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
-{
-    std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
-    if (!additionalOptions.empty())
-        build_options += additionalOptions;
-
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    if (!data.empty())
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
-
-    size_t gt[3] = { dst.cols, dst.rows, 1 };
-#ifdef ANDROID
-    size_t lt[3] = { 16, 10, 1 };
-#else
-    size_t lt[3] = { 16, 16, 1 };
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
-                         const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
-{
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-    int pixels_per_work_item = 1;
-
-    if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
-    {
-        if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
-            pixels_per_work_item =  4;
-        else if (src.cols % 2 == 0)
-            pixels_per_work_item =  2;
-        else
-            pixels_per_work_item =  1;
-    }
-
-    std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), dst.channels(), bidx, pixels_per_work_item);
-    if (!additionalOptions.empty())
-        build_options += additionalOptions;
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    if (!data.empty())
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
-
-    size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
-#ifdef ANDROID
-    size_t lt[3] = { 16, 10, 1 };
-#else
-    size_t lt[3] = { 16, 16, 1 };
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void toRGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
-                         const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
-{
-    String build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
-    if (!additionalOptions.empty())
-        build_options = build_options + additionalOptions;
-
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    if (!data.empty())
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
-
-    size_t gt[3] = {src.cols, src.rows, 1};
-#ifdef ANDROID
-    size_t lt[3] = {16, 10, 1};
-#else
-    size_t lt[3] = {16, 16, 1};
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void fromHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
-                         const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
-{
-    std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
-    if (!additionalOptions.empty())
-        build_options += additionalOptions;
-
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    if (!data.empty())
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
-
-    size_t gt[3] = { dst.cols, dst.rows, 1 };
-#ifdef ANDROID
-    size_t lt[3] = { 16, 10, 1 };
-#else
-    size_t lt[3] = { 16, 16, 1 };
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
-{
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
-                                  src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = { dst.cols, dst.rows, 1 };
-#ifdef ANDROID
-    size_t lt[3] = { 16, 10, 1 };
-#else
-    size_t lt[3] = { 16, 16, 1 };
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
-{
-    String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
-                                  src.depth(), greenbits, dst.channels(), bidx);
-    int src_offset = src.offset >> 1, src_step = src.step >> 1;
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = { dst.cols, dst.rows, 1 };
-#ifdef ANDROID
-    size_t lt[3] = { 16, 10, 1 };
-#else
-    size_t lt[3] = { 16, 16, 1 };
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
-{
-    String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
-                                  src.depth(), greenbits, src.channels(), bidx);
-    int src_offset = (int)src.offset, src_step = (int)src.step;
-    int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = { dst.cols, dst.rows, 1 };
-#ifdef ANDROID
-    size_t lt[3] = { 16, 10, 1 };
-#else
-    size_t lt[3] = { 16, 16, 1 };
-#endif
-    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
-{
-    Size sz = src.size();
-    int scn = src.channels(), depth = src.depth(), bidx;
-
-    CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F);
-
-    switch (code)
-    {
-    case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR:
-    case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA:
-    {
-        CV_Assert(scn == 3 || scn == 4);
-        dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3;
-        bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR);
-        dst.create(sz, CV_MAKE_TYPE(depth, dcn));
-        RGB_caller(src, dst, reverse);
-        break;
-    }
-    case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
-    case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555:
-    {
-        CV_Assert((scn == 3 || scn == 4) && depth == CV_8U );
-        bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 ||
-            code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2;
-        int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 ||
-            code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5;
-        dst.create(sz, CV_8UC2);
-        toRGB5x5_caller(src, dst, bidx, greenbits, "RGB2RGB5x5");
-        break;
-    }
-    case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
-    case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
-    {
-        dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3;
-        CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U);
-        bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR ||
-            code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2;
-        int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB ||
-            code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5;
-        dst.create(sz, CV_MAKETYPE(depth, dcn));
-        fromRGB5x5_caller(src, dst, bidx, greenbits, "RGB5x52RGB");
-        break;
-    }
-    case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY:
-    {
-        CV_Assert(scn == 2 && depth == CV_8U);
-        dst.create(sz, CV_8UC1);
-        int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5;
-        fromRGB5x5_caller(src, dst, -1, greenbits, "BGR5x52Gray");
-        break;
-    }
-    case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555:
-    {
-        CV_Assert(scn == 1 && depth == CV_8U);
-        dst.create(sz, CV_8UC2);
-        int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5;
-        toRGB5x5_caller(src, dst, -1, greenbits, "Gray2BGR5x5");
-        break;
-    }
-    case COLOR_RGB2GRAY: case COLOR_BGR2GRAY: case COLOR_RGBA2GRAY: case COLOR_BGRA2GRAY:
-    {
-        CV_Assert(scn == 3 || scn == 4);
-        bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
-        dst.create(sz, CV_MAKETYPE(depth, 1));
-        fromRGB_caller(src, dst, bidx, "RGB2Gray");
-        break;
-    }
-    case COLOR_GRAY2BGR: case COLOR_GRAY2BGRA:
-    {
-        CV_Assert(scn == 1);
-        dcn  = code == COLOR_GRAY2BGRA ? 4 : 3;
-        dst.create(sz, CV_MAKETYPE(depth, dcn));
-        fromGray_caller(src, dst, 0, "Gray2RGB");
-        break;
-    }
-    case COLOR_BGR2YUV: case COLOR_RGB2YUV:
-    {
-        CV_Assert(scn == 3 || scn == 4);
-        bidx = code == COLOR_BGR2YUV ? 0 : 2;
-        dst.create(sz, CV_MAKETYPE(depth, 3));
-        fromRGB_caller(src, dst, bidx, "RGB2YUV");
-        break;
-    }
-    case COLOR_YUV2BGR: case COLOR_YUV2RGB:
-    {
-        if( dcn <= 0 )
-            dcn = 3;
-        CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
-        bidx = code == COLOR_YUV2BGR ? 0 : 2;
-        dst.create(sz, CV_MAKETYPE(depth, dcn));
-        toRGB_caller(src, dst, bidx, "YUV2RGB");
-        break;
-    }
-    case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12:
-    case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12:
-    {
-        CV_Assert(scn == 1);
-        CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
-        dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3;
-        bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ? 0 : 2;
-
-        Size dstSz(sz.width, sz.height * 2 / 3);
-        dst.create(dstSz, CV_MAKETYPE(depth, dcn));
-        toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12");
-        break;
-    }
-    case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
-    {
-        CV_Assert(scn == 3 || scn == 4);
-        bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
-        dst.create(sz, CV_MAKETYPE(depth, 3));
-        fromRGB_caller(src, dst, bidx, "RGB2YCrCb");
-        break;
-    }
-    case COLOR_YCrCb2BGR: case COLOR_YCrCb2RGB:
-    {
-        if( dcn <= 0 )
-            dcn = 3;
-        CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
-        bidx = code == COLOR_YCrCb2BGR ? 0 : 2;
-        dst.create(sz, CV_MAKETYPE(depth, dcn));
-        toRGB_caller(src, dst, bidx, "YCrCb2RGB");
-        break;
-    }
-    case COLOR_BGR2XYZ: case COLOR_RGB2XYZ:
-    {
-        CV_Assert(scn == 3 || scn == 4);
-        bidx = code == COLOR_BGR2XYZ ? 0 : 2;
-        dst.create(sz, CV_MAKE_TYPE(depth, 3));
-
-        Mat c;
-        if (depth == CV_32F)
-        {
-            float coeffs[] =
-            {
-                0.412453f, 0.357580f, 0.180423f,
-                0.212671f, 0.715160f, 0.072169f,
-                0.019334f, 0.119193f, 0.950227f
-            };
-            if (bidx == 0)
-            {
-                std::swap(coeffs[0], coeffs[2]);
-                std::swap(coeffs[3], coeffs[5]);
-                std::swap(coeffs[6], coeffs[8]);
-            }
-            Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
-        }
-        else
-        {
-            int coeffs[] =
-            {
-                1689,    1465,    739,
-                871,     2929,    296,
-                79,      488,     3892
-            };
-            if (bidx == 0)
-            {
-                std::swap(coeffs[0], coeffs[2]);
-                std::swap(coeffs[3], coeffs[5]);
-                std::swap(coeffs[6], coeffs[8]);
-            }
-            Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
-        }
-        oclMat oclCoeffs(c);
-
-        fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs);
-        break;
-    }
-    case COLOR_XYZ2BGR: case COLOR_XYZ2RGB:
-    {
-        if (dcn <= 0)
-            dcn = 3;
-        CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
-        bidx = code == COLOR_XYZ2BGR ? 0 : 2;
-        dst.create(sz, CV_MAKE_TYPE(depth, dcn));
-
-        Mat c;
-        if (depth == CV_32F)
-        {
-            float coeffs[] =
-            {
-                3.240479f, -1.53715f, -0.498535f,
-                -0.969256f, 1.875991f, 0.041556f,
-                0.055648f, -0.204043f, 1.057311f
-            };
-            if (bidx == 0)
-            {
-                std::swap(coeffs[0], coeffs[6]);
-                std::swap(coeffs[1], coeffs[7]);
-                std::swap(coeffs[2], coeffs[8]);
-            }
-            Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
-        }
-        else
-        {
-            int coeffs[] =
-            {
-                13273,  -6296,  -2042,
-                -3970,   7684,    170,
-                  228,   -836,   4331
-            };
-            if (bidx == 0)
-            {
-                std::swap(coeffs[0], coeffs[6]);
-                std::swap(coeffs[1], coeffs[7]);
-                std::swap(coeffs[2], coeffs[8]);
-            }
-            Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
-        }
-        oclMat oclCoeffs(c);
-
-        toRGB_caller(src, dst, bidx, "XYZ2RGB", "", oclCoeffs);
-        break;
-    }
-    case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
-    case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
-    {
-        CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F));
-        bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS ||
-            code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2;
-        int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV ||
-            code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256;
-        bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL;
-        dst.create(sz, CV_MAKETYPE(depth, 3));
-        std::string kernelName = std::string("RGB2") + (is_hsv ? "HSV" : "HLS");
-
-        if (is_hsv && depth == CV_8U)
-        {
-            static oclMat sdiv_data;
-            static oclMat hdiv_data180;
-            static oclMat hdiv_data256;
-            static int sdiv_table[256];
-            static int hdiv_table180[256];
-            static int hdiv_table256[256];
-            static volatile bool initialized180 = false, initialized256 = false;
-            volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;
-
-            if (!initialized)
-            {
-                int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12;
-                oclMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;
-
-                sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
-
-                int v = 255 << hsv_shift;
-                if (!initialized180 && !initialized256)
-                {
-                    for(int i = 1; i < 256; i++ )
-                        sdiv_table[i] = saturate_cast<int>(v/(1.*i));
-                    sdiv_data.upload(Mat(1, 256, CV_32SC1, sdiv_table));
-                }
-
-                v = hrange << hsv_shift;
-                for (int i = 1; i < 256; i++ )
-                    hdiv_table[i] = saturate_cast<int>(v/(6.*i));
-
-                hdiv_data.upload(Mat(1, 256, CV_32SC1, hdiv_table));
-                initialized = true;
-            }
-
-            toHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
-            return;
-        }
-
-        toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
-        break;
-    }
-    case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
-    case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
-    {
-        if (dcn <= 0)
-            dcn = 3;
-        CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F));
-        bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR ||
-            code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2;
-        int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
-            code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255;
-        bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
-                code == COLOR_HSV2BGR_FULL || code == COLOR_HSV2RGB_FULL;
-
-        dst.create(sz, CV_MAKETYPE(depth, dcn));
-
-        std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
-        fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
-        break;
-    }
-    case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:
-        {
-            CV_Assert(scn == 4 && depth == CV_8U);
-            dst.create(sz, CV_MAKETYPE(depth, 4));
-            std::string kernelName = code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA";
-
-            fromRGB_caller(src, dst, 0, kernelName);
-            break;
-        }
-    default:
-        CV_Error(Error::StsBadFlag, "Unknown/unsupported color conversion code" );
-    }
-}
-
-void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn)
-{
-    cvtColor_caller(src, dst, code, dcn);
-}
diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp
deleted file mode 100644
index ccbd960..0000000
--- a/modules/ocl/src/columnsum.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Chunpeng Zhang, chunpeng@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
-{
-    CV_Assert(src.type() == CV_32FC1);
-    dst.create(src.size(), src.type());
-
-    int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
-    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset));
-
-    size_t globalThreads[3] = {dst.cols, 1, 1};
-    size_t localThreads[3]  = {256, 1, 1};
-
-    openCLExecuteKernel(src.clCxt, &imgproc_columnsum, "columnSum", globalThreads, localThreads, args, src.oclchannels(), src.depth());
-
-}
diff --git a/modules/ocl/src/error.cpp b/modules/ocl/src/error.cpp
deleted file mode 100644
index a1e2d80..0000000
--- a/modules/ocl/src/error.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace cv
-{
-    namespace ocl
-    {
-
-        const char *getOpenCLErrorString( int err )
-        {
-            switch(err)
-            {
-            case CL_DEVICE_NOT_FOUND:
-                return "CL_DEVICE_NOT_FOUND";
-            case CL_DEVICE_NOT_AVAILABLE:
-                return "CL_DEVICE_NOT_AVAILABLE";
-            case CL_COMPILER_NOT_AVAILABLE:
-                return "CL_COMPILER_NOT_AVAILABLE";
-            case CL_MEM_OBJECT_ALLOCATION_FAILURE:
-                return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-            case CL_OUT_OF_RESOURCES:
-                return "CL_OUT_OF_RESOURCES";
-            case CL_OUT_OF_HOST_MEMORY:
-                return "CL_OUT_OF_HOST_MEMORY";
-            case CL_PROFILING_INFO_NOT_AVAILABLE:
-                return "CL_PROFILING_INFO_NOT_AVAILABLE";
-            case CL_MEM_COPY_OVERLAP:
-                return "CL_MEM_COPY_OVERLAP";
-            case CL_IMAGE_FORMAT_MISMATCH:
-                return "CL_IMAGE_FORMAT_MISMATCH";
-            case CL_IMAGE_FORMAT_NOT_SUPPORTED:
-                return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
-            case CL_BUILD_PROGRAM_FAILURE:
-                return "CL_BUILD_PROGRAM_FAILURE";
-            case CL_MAP_FAILURE:
-                return "CL_MAP_FAILURE";
-            case CL_MISALIGNED_SUB_BUFFER_OFFSET:
-                return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
-            case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
-                return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
-            case CL_INVALID_VALUE:
-                return "CL_INVALID_VALUE";
-            case CL_INVALID_DEVICE_TYPE:
-                return "CL_INVALID_DEVICE_TYPE";
-            case CL_INVALID_PLATFORM:
-                return "CL_INVALID_PLATFORM";
-            case CL_INVALID_DEVICE:
-                return "CL_INVALID_DEVICE";
-            case CL_INVALID_CONTEXT:
-                return "CL_INVALID_CONTEXT";
-            case CL_INVALID_QUEUE_PROPERTIES:
-                return "CL_INVALID_QUEUE_PROPERTIES";
-            case CL_INVALID_COMMAND_QUEUE:
-                return "CL_INVALID_COMMAND_QUEUE";
-            case CL_INVALID_HOST_PTR:
-                return "CL_INVALID_HOST_PTR";
-            case CL_INVALID_MEM_OBJECT:
-                return "CL_INVALID_MEM_OBJECT";
-            case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
-                return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
-            case CL_INVALID_IMAGE_SIZE:
-                return "CL_INVALID_IMAGE_SIZE";
-            case CL_INVALID_SAMPLER:
-                return "CL_INVALID_SAMPLER";
-            case CL_INVALID_BINARY:
-                return "CL_INVALID_BINARY";
-            case CL_INVALID_BUILD_OPTIONS:
-                return "CL_INVALID_BUILD_OPTIONS";
-            case CL_INVALID_PROGRAM:
-                return "CL_INVALID_PROGRAM";
-            case CL_INVALID_PROGRAM_EXECUTABLE:
-                return "CL_INVALID_PROGRAM_EXECUTABLE";
-            case CL_INVALID_KERNEL_NAME:
-                return "CL_INVALID_KERNEL_NAME";
-            case CL_INVALID_KERNEL_DEFINITION:
-                return "CL_INVALID_KERNEL_DEFINITION";
-            case CL_INVALID_KERNEL:
-                return "CL_INVALID_KERNEL";
-            case CL_INVALID_ARG_INDEX:
-                return "CL_INVALID_ARG_INDEX";
-            case CL_INVALID_ARG_VALUE:
-                return "CL_INVALID_ARG_VALUE";
-            case CL_INVALID_ARG_SIZE:
-                return "CL_INVALID_ARG_SIZE";
-            case CL_INVALID_KERNEL_ARGS:
-                return "CL_INVALID_KERNEL_ARGS";
-            case CL_INVALID_WORK_DIMENSION:
-                return "CL_INVALID_WORK_DIMENSION";
-            case CL_INVALID_WORK_GROUP_SIZE:
-                return "CL_INVALID_WORK_GROUP_SIZE";
-            case CL_INVALID_WORK_ITEM_SIZE:
-                return "CL_INVALID_WORK_ITEM_SIZE";
-            case CL_INVALID_GLOBAL_OFFSET:
-                return "CL_INVALID_GLOBAL_OFFSET";
-            case CL_INVALID_EVENT_WAIT_LIST:
-                return "CL_INVALID_EVENT_WAIT_LIST";
-            case CL_INVALID_EVENT:
-                return "CL_INVALID_EVENT";
-            case CL_INVALID_OPERATION:
-                return "CL_INVALID_OPERATION";
-            case CL_INVALID_GL_OBJECT:
-                return "CL_INVALID_GL_OBJECT";
-            case CL_INVALID_BUFFER_SIZE:
-                return "CL_INVALID_BUFFER_SIZE";
-            case CL_INVALID_MIP_LEVEL:
-                return "CL_INVALID_MIP_LEVEL";
-            case CL_INVALID_GLOBAL_WORK_SIZE:
-                return "CL_INVALID_GLOBAL_WORK_SIZE";
-                //case CL_INVALID_PROPERTY:
-                //    return "CL_INVALID_PROPERTY";
-                //case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
-                //    return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
-                //case CL_PLATFORM_NOT_FOUND_KHR:
-                //    return "CL_PLATFORM_NOT_FOUND_KHR";
-                //    //case CL_INVALID_PROPERTY_EXT:
-                //    //    return "CL_INVALID_PROPERTY_EXT";
-                //case CL_DEVICE_PARTITION_FAILED_EXT:
-                //    return "CL_DEVICE_PARTITION_FAILED_EXT";
-                //case CL_INVALID_PARTITION_COUNT_EXT:
-                //    return "CL_INVALID_PARTITION_COUNT_EXT";
-                //default:
-                //    return "unknown error code";
-            }
-            static char buf[256];
-            sprintf(buf, "%d", err);
-            return buf;
-        }
-    }
-}
diff --git a/modules/ocl/src/fast.cpp b/modules/ocl/src/fast.cpp
deleted file mode 100644
index b32ea28..0000000
--- a/modules/ocl/src/fast.cpp
+++ /dev/null
@@ -1,229 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Peter Andreas Entschev, peter@entschev.com
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-cv::ocl::FAST_OCL::FAST_OCL(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
-    nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
-{
-}
-
-void cv::ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
-{
-    if (image.empty())
-        return;
-
-    (*this)(image, mask, d_keypoints_);
-    downloadKeypoints(d_keypoints_, keypoints);
-}
-
-void cv::ocl::FAST_OCL::downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (d_keypoints.empty())
-        return;
-
-    Mat h_keypoints(d_keypoints);
-    convertKeypoints(h_keypoints, keypoints);
-}
-
-void cv::ocl::FAST_OCL::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (h_keypoints.empty())
-        return;
-
-    CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
-
-    int npoints = h_keypoints.cols;
-
-    keypoints.resize(npoints);
-
-    const float* loc_x = h_keypoints.ptr<float>(X_ROW);
-    const float* loc_y = h_keypoints.ptr<float>(Y_ROW);
-    const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
-
-    for (int i = 0; i < npoints; ++i)
-    {
-        KeyPoint kp(loc_x[i], loc_y[i], static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
-        keypoints[i] = kp;
-    }
-}
-
-void cv::ocl::FAST_OCL::operator ()(const oclMat& img, const oclMat& mask, oclMat& keypoints)
-{
-    calcKeyPointsLocation(img, mask);
-    keypoints.cols = getKeyPoints(keypoints);
-}
-
-int cv::ocl::FAST_OCL::calcKeyPointsLocation(const oclMat& img, const oclMat& mask)
-{
-    CV_Assert(img.type() == CV_8UC1);
-    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
-
-    int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
-
-    ensureSizeIsEnough(ROWS_COUNT, maxKeypoints, CV_32SC1, kpLoc_);
-    kpLoc_.setTo(Scalar::all(0));
-
-    if (nonmaxSupression)
-    {
-        ensureSizeIsEnough(img.size(), CV_32SC1, score_);
-        score_.setTo(Scalar::all(0));
-    }
-
-    count_ = calcKeypointsOCL(img, mask, maxKeypoints);
-    count_ = std::min(count_, maxKeypoints);
-
-    return count_;
-}
-
-int cv::ocl::FAST_OCL::calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints)
-{
-    size_t localThreads[3] = {16, 16, 1};
-    size_t globalThreads[3] = {divUp(img.cols - 6, localThreads[0]) * localThreads[0],
-                               divUp(img.rows - 6, localThreads[1]) * localThreads[1],
-                               1};
-
-    Context *clCxt = Context::getContext();
-    String kernelName = (mask.empty()) ? "calcKeypoints" : "calcKeypointsWithMask";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int counter = 0;
-    int err = CL_SUCCESS;
-    cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(),
-                                    CL_MEM_COPY_HOST_PTR, sizeof(int),
-                                    &counter, &err);
-
-    int kpLocStep = kpLoc_.step / kpLoc_.elemSize();
-    int scoreStep = score_.step / score_.elemSize();
-    int nms = (nonmaxSupression) ? 1 : 0;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
-    if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nms));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxKeypoints));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&threshold));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.cols));
-    if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&scoreStep));
-
-    openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1);
-
-    openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(),
-                                       counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL));
-    openCLSafeCall(clReleaseMemObject(counterCL));
-
-    return counter;
-}
-
-int cv::ocl::FAST_OCL::nonmaxSupressionOCL(oclMat& keypoints)
-{
-    size_t localThreads[3] = {256, 1, 1};
-    size_t globalThreads[3] = {count_, 1, 1};
-
-    Context *clCxt = Context::getContext();
-    String kernelName = "nonmaxSupression";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int counter = 0;
-    int err = CL_SUCCESS;
-    cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(),
-                                    CL_MEM_COPY_HOST_PTR, sizeof(int),
-                                    &counter, &err);
-
-    int kpLocStep = kpLoc_.step / kpLoc_.elemSize();
-    int sStep = score_.step / score_.elemSize();
-    int kStep = keypoints.step / keypoints.elemSize();
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&count_));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&sStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&kStep));
-
-    openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1);
-
-    openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(),
-                                       counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL));
-    openCLSafeCall(clReleaseMemObject(counterCL));
-
-    return counter;
-}
-
-int cv::ocl::FAST_OCL::getKeyPoints(oclMat& keypoints)
-{
-    if (count_ == 0)
-        return 0;
-
-    if (nonmaxSupression)
-    {
-        ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
-        return nonmaxSupressionOCL(keypoints);
-    }
-
-    kpLoc_.convertTo(keypoints, CV_32FC1);
-    Mat k = keypoints;
-
-    return count_;
-}
-
-void cv::ocl::FAST_OCL::release()
-{
-    kpLoc_.release();
-    score_.release();
-
-    d_keypoints_.release();
-}
diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp
deleted file mode 100644
index 2cfffef..0000000
--- a/modules/ocl/src/fft.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-#if !defined HAVE_CLAMDFFT
-
-void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
-{
-    CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
-}
-
-namespace cv { namespace ocl {
-    void fft_teardown();
-} }
-
-void cv::ocl::fft_teardown() { }
-
-#else
-
-#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
-
-namespace cv
-{
-    namespace ocl
-    {
-        void fft_setup();
-        void fft_teardown();
-
-        enum FftType
-        {
-            C2R = 1, // complex to complex
-            R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
-            C2C = 3  // opencl HERMITIAN_INTERLEAVED to real
-        };
-
-        struct FftPlan
-        {
-        protected:
-            clAmdFftPlanHandle plHandle;
-            FftPlan& operator=(const FftPlan&);
-        public:
-            FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
-            ~FftPlan();
-            inline clAmdFftPlanHandle getPlanHandle() { return plHandle; }
-
-            const Size dft_size;
-            const int src_step, dst_step;
-            const int depth;
-            const int flags;
-            const FftType type;
-        };
-
-        class PlanCache
-        {
-        protected:
-            PlanCache();
-            ~PlanCache();
-            static PlanCache* planCache;
-
-            bool started;
-            std::vector<FftPlan *> planStore;
-            clAmdFftSetupData *setupData;
-        public:
-            friend void fft_setup();
-            friend void fft_teardown();
-
-            static PlanCache* getPlanCache()
-            {
-                if (NULL == planCache)
-                    planCache = new PlanCache();
-                return planCache;
-            }
-
-            // return a baked plan->
-            // if there is one matched plan, return it
-            // if not, bake a new one, put it into the planStore and return it.
-            static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
-
-            // remove a single plan from the store
-            // return true if the plan is successfully removed
-            // else
-            static bool removePlan(clAmdFftPlanHandle );
-        };
-    }
-}
-
-PlanCache* PlanCache::planCache = NULL;
-
-void cv::ocl::fft_setup()
-{
-    PlanCache& pCache = *PlanCache::getPlanCache();
-    if(pCache.started)
-    {
-        return;
-    }
-    if (pCache.setupData == NULL)
-        pCache.setupData = new clAmdFftSetupData;
-
-    openCLSafeCall(clAmdFftInitSetupData( pCache.setupData ));
-    pCache.started = true;
-}
-
-void cv::ocl::fft_teardown()
-{
-    PlanCache& pCache = *PlanCache::getPlanCache();
-
-    if(!pCache.started)
-        return;
-
-    for(size_t i = 0; i < pCache.planStore.size(); i ++)
-        delete pCache.planStore[i];
-    pCache.planStore.clear();
-
-    try
-    {
-        openCLSafeCall( clAmdFftTeardown( ) );
-    }
-    catch (const std::bad_alloc &)
-    { }
-
-    delete pCache.setupData; pCache.setupData = NULL;
-    pCache.started = false;
-}
-
-// bake a new plan
-cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
-    : plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), depth(_depth), flags(_flags), type(_type)
-{
-    fft_setup();
-
-    bool is_1d_input    = (_dft_size.height == 1);
-    int is_row_dft        = flags & DFT_ROWS;
-    int is_scaled_dft   = flags & DFT_SCALE;
-    int is_inverse        = flags & DFT_INVERSE;
-
-    //clAmdFftResultLocation    place;
-    clAmdFftLayout            inLayout;
-    clAmdFftLayout            outLayout;
-    clAmdFftDim                dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
-
-    size_t batchSize         = is_row_dft ? dft_size.height : 1;
-    size_t clLengthsIn[ 3 ]  = {1, 1, 1};
-    size_t clStridesIn[ 3 ]  = {1, 1, 1};
-    //size_t clLengthsOut[ 3 ] = {1, 1, 1};
-    size_t clStridesOut[ 3 ] = {1, 1, 1};
-    clLengthsIn[0]             = dft_size.width;
-    clLengthsIn[1]             = is_row_dft ? 1 : dft_size.height;
-    clStridesIn[0]             = 1;
-    clStridesOut[0]             = 1;
-
-    switch(_type)
-    {
-    case C2C:
-        inLayout        = CLFFT_COMPLEX_INTERLEAVED;
-        outLayout       = CLFFT_COMPLEX_INTERLEAVED;
-        clStridesIn[1]  = src_step / (2*CV_ELEM_SIZE(_depth));
-        clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
-        break;
-    case R2C:
-        inLayout        = CLFFT_REAL;
-        outLayout       = CLFFT_HERMITIAN_INTERLEAVED;
-        clStridesIn[1]  = src_step / CV_ELEM_SIZE(_depth);
-        clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
-        break;
-    case C2R:
-        inLayout        = CLFFT_HERMITIAN_INTERLEAVED;
-        outLayout       = CLFFT_REAL;
-        clStridesIn[1]  = src_step / (2*CV_ELEM_SIZE(_depth));
-        clStridesOut[1] = dst_step / CV_ELEM_SIZE(_depth);
-        break;
-    default:
-        //std::runtime_error("does not support this convertion!");
-        std::cout << "Does not support this convertion!" << std::endl;
-        throw std::exception();
-        break;
-    }
-
-    clStridesIn[2]  = is_row_dft ? clStridesIn[1]  : dft_size.width * clStridesIn[1];
-    clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
-
-    openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) );
-
-    openCLSafeCall( clAmdFftSetPlanPrecision( plHandle, depth == CV_64F ? CLFFT_DOUBLE : CLFFT_SINGLE ) );
-    openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
-    openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
-    openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
-
-    openCLSafeCall( clAmdFftSetPlanInStride  ( plHandle, dim, clStridesIn ) );
-    openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
-    openCLSafeCall( clAmdFftSetPlanDistance  ( plHandle, clStridesIn[ dim ], clStridesOut[ dim ]) );
-
-    float scale_ = is_scaled_dft ? 1.f / _dft_size.area() : 1.f;
-    openCLSafeCall( clAmdFftSetPlanScale  ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
-
-    //ready to bake
-    openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) );
-}
-
-cv::ocl::FftPlan::~FftPlan()
-{
-    openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
-}
-
-cv::ocl::PlanCache::PlanCache()
-    : started(false),
-      planStore(std::vector<cv::ocl::FftPlan *>()),
-      setupData(NULL)
-{
-}
-
-cv::ocl::PlanCache::~PlanCache()
-{
-    fft_teardown();
-}
-
-FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
-{
-    PlanCache& pCache = *PlanCache::getPlanCache();
-    std::vector<FftPlan *>& pStore = pCache.planStore;
-    // go through search
-    for(size_t i = 0; i < pStore.size(); i ++)
-    {
-        FftPlan *plan = pStore[i];
-        if(
-            plan->dft_size.width == _dft_size.width &&
-            plan->dft_size.height == _dft_size.height &&
-            plan->flags == _flags &&
-            plan->src_step == _src_step &&
-            plan->dst_step == _dst_step &&
-            plan->depth == _depth &&
-            plan->type == _type
-            )
-        {
-            return plan;
-        }
-    }
-    // no baked plan is found
-    FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _depth, _flags, _type);
-    pStore.push_back(newPlan);
-    return newPlan;
-}
-
-bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle)
-{
-    PlanCache& pCache = *PlanCache::getPlanCache();
-    std::vector<FftPlan *>& pStore = pCache.planStore;
-    for(size_t i = 0; i < pStore.size(); i ++)
-    {
-        if(pStore[i]->getPlanHandle() == plHandle)
-        {
-            pStore.erase(pStore.begin() + i);
-            delete pStore[i];
-            return true;
-        }
-    }
-    return false;
-}
-
-void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
-{
-    CV_Assert(cv::ocl::haveAmdFft());
-
-    if(dft_size == Size(0, 0))
-    {
-        dft_size = src.size();
-    }
-    // check if the given dft size is of optimal dft size
-    CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
-
-    // the two flags are not compatible
-    CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) );
-
-    //bool is_1d_input    = (src.rows == 1);
-    //int is_row_dft        = flags & DFT_ROWS;
-    //int is_scaled_dft        = flags & DFT_SCALE;
-    int is_inverse = flags & DFT_INVERSE;
-    bool is_complex_input = src.channels() == 2;
-    bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
-
-    int depth = src.depth();
-
-    // We don't support real-to-real transform
-    CV_Assert(is_complex_input || is_complex_output);
-    FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
-
-    switch(type)
-    {
-    case C2C:
-        dst.create(src.rows, src.cols, CV_MAKE_TYPE(depth, 2));
-        printf("C2C\n");
-        break;
-    case R2C:
-        dst.create(src.rows, src.cols / 2 + 1, CV_MAKE_TYPE(depth, 2));
-        printf("R2C\n");
-        break;
-    case C2R:
-        CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
-        dst.create(src.rows, dft_size.width, CV_MAKE_TYPE(depth, 1));
-        printf("C2R\n");
-        break;
-    default:
-        //std::runtime_error("does not support this convertion!");
-        std::cout << "Does not support this convertion!" << std::endl;
-        throw std::exception();
-        break;
-    }
-    clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, depth, flags, type)->getPlanHandle();
-
-    //get the buffersize
-    size_t buffersize = 0;
-    openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
-
-    //allocate the intermediate buffer
-    // TODO, bind this with the current FftPlan
-    cl_mem clMedBuffer = NULL;
-    if (buffersize)
-    {
-        cl_int medstatus;
-        clMedBuffer = clCreateBuffer ( *(cl_context*)(src.clCxt->getOpenCLContextPtr()), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
-        openCLSafeCall( medstatus );
-    }
-    cl_command_queue clq = *(cl_command_queue*)(src.clCxt->getOpenCLCommandQueuePtr());
-    openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
-        is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
-        1,
-        &clq,
-        0, NULL, NULL,
-        (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
-    openCLSafeCall( clFinish(clq) );
-    if(clMedBuffer)
-    {
-        openCLFree(clMedBuffer);
-    }
-    fft_teardown();
-}
-
-#endif
diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp
deleted file mode 100644
index b6e1fff..0000000
--- a/modules/ocl/src/filtering.cpp
+++ /dev/null
@@ -1,1710 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Zero Lin, Zero.Lin@amd.com
-//    Zhang Ying, zhangying913@gmail.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-//    Harris Gasparakis, harris.gasparakis@amd.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace
-{
-inline void normalizeAnchor(int &anchor, int ksize)
-{
-    if (anchor < 0)
-        anchor = ksize >> 1;
-
-    CV_Assert(0 <= anchor && anchor < ksize);
-}
-
-inline void normalizeAnchor(Point &anchor, const Size &ksize)
-{
-    normalizeAnchor(anchor.x, ksize.width);
-    normalizeAnchor(anchor.y, ksize.height);
-}
-
-inline void normalizeROI(Rect &roi, const Size &ksize, const Point &/*anchor*/, const Size &src_size)
-{
-    if (roi == Rect(0, 0, -1, -1))
-        roi = Rect(0, 0, src_size.width, src_size.height);
-
-    CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
-    CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height);
-}
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Filter2D
-namespace
-{
-class Filter2DEngine_GPU : public FilterEngine_GPU
-{
-public:
-    Filter2DEngine_GPU(const Ptr<BaseFilter_GPU> &filter2D_) : filter2D(filter2D_) {}
-
-    virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
-    {
-        Size src_size = src.size();
-
-        // Delete those two clause below which exist before, However, the result is also correct
-        // dst.create(src_size, src.type());
-        // dst = Scalar(0.0);
-
-        normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
-
-        oclMat srcROI = src(roi);
-        oclMat dstROI = dst(roi);
-
-        (*filter2D)(srcROI, dstROI);
-    }
-
-    Ptr<BaseFilter_GPU> filter2D;
-};
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D)
-{
-    return makePtr<Filter2DEngine_GPU>(filter2D);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Box Filter
-namespace
-{
-typedef void (*FilterBox_t)(const oclMat & , oclMat & , Size &, const Point, const int);
-
-class GPUBoxFilter : public BaseFilter_GPU
-{
-public:
-    GPUBoxFilter(const Size &ksize_, const Point &anchor_, const int borderType_, FilterBox_t func_) :
-        BaseFilter_GPU(ksize_, anchor_, borderType_), func(func_) {}
-
-    virtual void operator()(const oclMat &src, oclMat &dst)
-    {
-        func(src, dst, ksize, anchor, borderType);
-    }
-
-    FilterBox_t func;
-
-};
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Morphology Filter
-
-namespace
-{
-typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, bool rectKernel);
-
-class MorphFilter_GPU : public BaseFilter_GPU
-{
-public:
-    MorphFilter_GPU(const Size &ksize_, const Point &anchor_, const Mat &kernel_, GPUMorfFilter_t func_) :
-        BaseFilter_GPU(ksize_, anchor_, BORDER_CONSTANT), kernel(kernel_), func(func_), rectKernel(false) {}
-
-    virtual void operator()(const oclMat &src, oclMat &dst)
-    {
-        func(src, dst, kernel, ksize, anchor, rectKernel) ;
-    }
-
-    oclMat kernel;
-    GPUMorfFilter_t func;
-    bool rectKernel;
-};
-}
-
-/*
-**We should be able to support any data types here.
-**Extend this if necessary later.
-**Note that the kernel need to be further refined.
-*/
-static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
-                         Size &ksize, const Point anchor, bool rectKernel)
-{
-    //Normalize the result by default
-    //float alpha = ksize.height * ksize.width;
-    CV_Assert(src.clCxt == dst.clCxt);
-    CV_Assert((src.cols == dst.cols) &&
-              (src.rows == dst.rows));
-    CV_Assert((src.oclchannels() == dst.oclchannels()));
-
-    int srcStep = src.step / src.elemSize();
-    int dstStep = dst.step / dst.elemSize();
-    int srcOffset = src.offset / src.elemSize();
-    int dstOffset = dst.offset / dst.elemSize();
-
-    int srcOffset_x = srcOffset % srcStep;
-    int srcOffset_y = srcOffset / srcStep;
-    Context *clCxt = src.clCxt;
-    String kernelName;
-#ifdef ANDROID
-    size_t localThreads[3] = {16, 8, 1};
-#else
-    size_t localThreads[3] = {16, 16, 1};
-#endif
-    size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
-
-    if (src.type() == CV_8UC1)
-    {
-        kernelName = "morph_C1_D0";
-        globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
-        CV_Assert(localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
-    }
-    else
-    {
-        kernelName = "morph";
-        CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
-    }
-
-    char s[64];
-
-    switch (src.type())
-    {
-    case CV_8UC1:
-        sprintf(s, "-D VAL=255");
-        break;
-    case CV_8UC3:
-    case CV_8UC4:
-        sprintf(s, "-D VAL=255 -D GENTYPE=uchar4");
-        break;
-    case CV_32FC1:
-        sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float");
-        break;
-    case CV_32FC3:
-    case CV_32FC4:
-        sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float4");
-        break;
-    default:
-        CV_Error(Error::StsUnsupportedFormat, "unsupported type");
-    }
-
-    char compile_option[128];
-    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s",
-        anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
-        rectKernel?"-D RECTKERNEL":"",
-        s);
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcOffset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcOffset_y));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcStep));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstStep));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstOffset));
-
-    openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
-}
-
-
-//! data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4
-static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
-                          Size &ksize, const Point anchor, bool rectKernel)
-{
-    //Normalize the result by default
-    //float alpha = ksize.height * ksize.width;
-    CV_Assert(src.clCxt == dst.clCxt);
-    CV_Assert((src.cols == dst.cols) &&
-              (src.rows == dst.rows));
-    CV_Assert((src.oclchannels() == dst.oclchannels()));
-
-    int srcStep = src.step1() / src.oclchannels();
-    int dstStep = dst.step1() / dst.oclchannels();
-    int srcOffset = src.offset /  src.elemSize();
-    int dstOffset = dst.offset /  dst.elemSize();
-
-    int srcOffset_x = srcOffset % srcStep;
-    int srcOffset_y = srcOffset / srcStep;
-    Context *clCxt = src.clCxt;
-    String kernelName;
-#ifdef ANDROID
-    size_t localThreads[3] = {16, 10, 1};
-#else
-    size_t localThreads[3] = {16, 16, 1};
-#endif
-    size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0],
-                               (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
-
-    if (src.type() == CV_8UC1)
-    {
-        kernelName = "morph_C1_D0";
-        globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
-        CV_Assert(localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
-    }
-    else
-    {
-        kernelName = "morph";
-        CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
-    }
-
-    char s[64];
-
-    switch (src.type())
-    {
-    case CV_8UC1:
-        sprintf(s, "-D VAL=0");
-        break;
-    case CV_8UC3:
-    case CV_8UC4:
-        sprintf(s, "-D VAL=0 -D GENTYPE=uchar4");
-        break;
-    case CV_32FC1:
-        sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float");
-        break;
-    case CV_32FC3:
-    case CV_32FC4:
-        sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float4");
-        break;
-    default:
-        CV_Error(Error::StsUnsupportedFormat, "unsupported type");
-    }
-
-    char compile_option[128];
-    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s",
-        anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
-        s, rectKernel?"-D RECTKERNEL":"");
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcOffset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcOffset_y));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcStep));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstStep));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstOffset));
-    openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
-}
-
-Ptr<BaseFilter_GPU> cv::ocl::getMorphologyFilter_GPU(int op, int type, const Mat &_kernel, const Size &ksize, Point anchor)
-{
-    CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
-    CV_Assert(type == CV_8UC1 || type == CV_8UC3 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC3 || type == CV_32FC4);
-
-    normalizeAnchor(anchor, ksize);
-    Mat kernel8U;
-    _kernel.convertTo(kernel8U, CV_8U);
-    Mat kernel = kernel8U.reshape(1, 1);
-
-    bool noZero = true;
-    for(int i = 0; i < kernel.rows * kernel.cols; ++i)
-        if(kernel.at<uchar>(i) != 1)
-            noZero = false;
-
-    MorphFilter_GPU* mfgpu = new MorphFilter_GPU(ksize, anchor, kernel, op == MORPH_ERODE ? GPUErode : GPUDilate);
-    if(noZero)
-        mfgpu->rectKernel = true;
-
-    return Ptr<BaseFilter_GPU>(mfgpu);
-}
-
-namespace
-{
-class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU
-{
-public:
-    MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU> &filter2D_, int iters_) :
-        Filter2DEngine_GPU(filter2D_), iters(iters_) {}
-
-    virtual void apply(const oclMat &src, oclMat &dst)
-    {
-        Filter2DEngine_GPU::apply(src, dst);
-
-        for (int i = 1; i < iters; ++i)
-        {
-            Size wholesize;
-            Point ofs;
-            dst.locateROI(wholesize, ofs);
-            int rows = dst.rows, cols = dst.cols;
-            dst.adjustROI(ofs.y, -ofs.y - rows + dst.wholerows, ofs.x, -ofs.x - cols + dst.wholecols);
-            dst.copyTo(morfBuf);
-            dst.adjustROI(-ofs.y, ofs.y + rows - dst.wholerows, -ofs.x, ofs.x + cols - dst.wholecols);
-            morfBuf.adjustROI(-ofs.y, ofs.y + rows - dst.wholerows, -ofs.x, ofs.x + cols - dst.wholecols);
-            Filter2DEngine_GPU::apply(morfBuf, dst);
-        }
-    }
-
-    int iters;
-    oclMat morfBuf;
-};
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Point &anchor, int iterations)
-{
-    CV_Assert(iterations > 0);
-
-    Size ksize = kernel.size();
-
-    Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor);
-
-    return makePtr<MorphologyFilterEngine_GPU>(filter2D, iterations);
-}
-
-namespace
-{
-void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue)
-{
-    if ((borderType != cv::BORDER_CONSTANT) || (borderValue != morphologyDefaultBorderValue()))
-    {
-        CV_Error(Error::StsBadArg, "unsupported border type");
-    }
-
-    Mat kernel;
-    Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
-
-    normalizeAnchor(anchor, ksize);
-
-    if (iterations == 0 || _kernel.rows *_kernel.cols == 1)
-    {
-        src.copyTo(dst);
-        return;
-    }
-
-    dst.create(src.size(), src.type());
-
-    if (!_kernel.data)
-    {
-        kernel = getStructuringElement(MORPH_RECT, Size(1 + iterations * 2, 1 + iterations * 2));
-        anchor = Point(iterations, iterations);
-        iterations = 1;
-    }
-    else if (iterations > 1 && countNonZero(_kernel) == _kernel.rows * _kernel.cols)
-    {
-        anchor = Point(anchor.x * iterations, anchor.y * iterations);
-        kernel = getStructuringElement(MORPH_RECT, Size(ksize.width + (iterations - 1) * (ksize.width - 1),
-                                       ksize.height + (iterations - 1) * (ksize.height - 1)), anchor);
-        iterations = 1;
-    }
-    else
-        kernel = _kernel;
-
-    Ptr<MorphologyFilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations)
-            .staticCast<MorphologyFilterEngine_GPU>();
-
-    f->apply(src, dst);
-}
-}
-
-void cv::ocl::erode(const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
-                    int borderType, const Scalar &borderValue)
-{
-    bool allZero = true;
-
-    for (int i = 0; i < kernel.rows * kernel.cols; ++i)
-        if (kernel.data[i] != 0)
-            allZero = false;
-
-    if (allZero)
-        kernel.data[0] = 1;
-
-    morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue);
-}
-
-void cv::ocl::dilate(const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
-                     int borderType, const Scalar &borderValue)
-{
-    morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue);
-}
-
-void cv::ocl::morphologyEx(const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations,
-                           int borderType, const Scalar &borderValue)
-{
-    oclMat temp;
-
-    switch (op)
-    {
-    case MORPH_ERODE:
-        erode(src, dst, kernel, anchor, iterations, borderType, borderValue);
-        break;
-    case MORPH_DILATE:
-        dilate(src, dst, kernel, anchor, iterations, borderType, borderValue);
-        break;
-    case MORPH_OPEN:
-        erode(src, temp, kernel, anchor, iterations, borderType, borderValue);
-        dilate(temp, dst, kernel, anchor, iterations, borderType, borderValue);
-        break;
-    case MORPH_CLOSE:
-        dilate(src, temp, kernel, anchor, iterations, borderType, borderValue);
-        erode(temp, dst, kernel, anchor, iterations, borderType, borderValue);
-        break;
-    case MORPH_GRADIENT:
-        erode(src, temp, kernel, anchor, iterations, borderType, borderValue);
-        dilate(src, dst, kernel, anchor, iterations, borderType, borderValue);
-        subtract(dst, temp, dst);
-        break;
-    case MORPH_TOPHAT:
-        erode(src, dst, kernel, anchor, iterations, borderType, borderValue);
-        dilate(dst, temp, kernel, anchor, iterations, borderType, borderValue);
-        subtract(src, temp, dst);
-        break;
-    case MORPH_BLACKHAT:
-        dilate(src, dst, kernel, anchor, iterations, borderType, borderValue);
-        erode(dst, temp, kernel, anchor, iterations, borderType, borderValue);
-        subtract(temp, src, dst);
-        break;
-    default:
-        CV_Error(Error::StsBadArg, "unknown morphological operation");
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Linear Filter
-
-namespace
-{
-typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , const Mat & , const Size &, const Point&, const int);
-
-class LinearFilter_GPU : public BaseFilter_GPU
-{
-public:
-    LinearFilter_GPU(const Size &ksize_, const Point &anchor_, const Mat &kernel_, GPUFilter2D_t func_,
-                     int borderType_) :
-        BaseFilter_GPU(ksize_, anchor_, borderType_), kernel(kernel_), func(func_) {}
-
-    virtual void operator()(const oclMat &src, oclMat &dst)
-    {
-        func(src, dst, kernel, ksize, anchor, borderType) ;
-    }
-
-    Mat kernel;
-    GPUFilter2D_t func;
-};
-}
-
-// prepare kernel: transpose and make double rows (+align). Returns size of aligned row
-// Samples:
-//        a b c
-// Input: d e f
-//        g h i
-// Output, last two zeros is the alignment:
-// a d g a d g 0 0
-// b e h b e h 0 0
-// c f i c f i 0 0
-template <typename T>
-static int _prepareKernelFilter2D(std::vector<T>& data, const Mat &kernel)
-{
-    Mat _kernel; kernel.convertTo(_kernel, DataDepth<T>::value);
-    int size_y_aligned = roundUp(kernel.rows * 2, 4);
-    data.clear(); data.resize(size_y_aligned * kernel.cols, 0);
-    for (int x = 0; x < kernel.cols; x++)
-    {
-        for (int y = 0; y < kernel.rows; y++)
-        {
-            data[x * size_y_aligned + y] = _kernel.at<T>(y, x);
-            data[x * size_y_aligned + y + kernel.rows] = _kernel.at<T>(y, x);
-        }
-    }
-    return size_y_aligned;
-}
-
-static void GPUFilter2D(const oclMat &src, oclMat &dst, const Mat &kernel,
-    const Size &ksize, const Point& anchor, const int borderType)
-{
-    CV_Assert(src.clCxt == dst.clCxt);
-    CV_Assert((src.cols == dst.cols) &&
-              (src.rows == dst.rows));
-    CV_Assert(src.oclchannels() == dst.oclchannels());
-
-    CV_Assert(kernel.cols == ksize.width && kernel.rows == ksize.height);
-    CV_Assert(kernel.channels() == 1);
-
-    CV_Assert(anchor.x >= 0 && anchor.x < kernel.cols);
-    CV_Assert(anchor.y >= 0 && anchor.y < kernel.rows);
-
-    bool useDouble = src.depth() == CV_64F;
-
-    std::vector<float> kernelDataFloat;
-    std::vector<double> kernelDataDouble;
-    int kernel_size_y2_aligned = useDouble ?
-            _prepareKernelFilter2D<double>(kernelDataDouble, kernel)
-            : _prepareKernelFilter2D<float>(kernelDataFloat, kernel);
-    oclMat oclKernelParameter;
-    if (useDouble)
-    {
-        oclKernelParameter.createEx(1, kernelDataDouble.size(), CV_64FC1, DEVICE_MEM_R_ONLY, DEVICE_MEM_DEFAULT);
-        openCLMemcpy2D(src.clCxt, oclKernelParameter.data, kernelDataDouble.size()*sizeof(double),
-                &kernelDataDouble[0], kernelDataDouble.size()*sizeof(double),
-                kernelDataDouble.size()*sizeof(double), 1, clMemcpyHostToDevice);
-    }
-    else
-    {
-        oclKernelParameter.createEx(1, kernelDataFloat.size(), CV_32FC1, DEVICE_MEM_R_ONLY, DEVICE_MEM_DEFAULT);
-        openCLMemcpy2D(src.clCxt, oclKernelParameter.data, kernelDataFloat.size()*sizeof(float),
-                &kernelDataFloat[0], kernelDataFloat.size()*sizeof(float),
-                kernelDataFloat.size()*sizeof(float), 1, clMemcpyHostToDevice);
-    }
-
-    size_t tryWorkItems = src.clCxt->getDeviceInfo().maxWorkItemSizes[0];
-    do {
-        size_t BLOCK_SIZE = tryWorkItems;
-        while (BLOCK_SIZE > 32 && BLOCK_SIZE >= (size_t)ksize.width * 2 && BLOCK_SIZE > (size_t)src.cols * 2)
-            BLOCK_SIZE /= 2;
-#if 1 // TODO Mode with several blocks requires a much more VGPRs, so this optimization is not actual for the current devices
-        size_t BLOCK_SIZE_Y = 1;
-#else
-        size_t BLOCK_SIZE_Y = 8; // TODO Check heuristic value on devices
-        while (BLOCK_SIZE_Y < BLOCK_SIZE / 8 && BLOCK_SIZE_Y * src.clCxt->getDeviceInfo().maxComputeUnits * 32 < (size_t)src.rows)
-            BLOCK_SIZE_Y *= 2;
-#endif
-
-        CV_Assert((size_t)ksize.width <= BLOCK_SIZE);
-
-        bool isIsolatedBorder = (borderType & BORDER_ISOLATED) != 0;
-
-        std::vector<std::pair<size_t , const void *> > args;
-
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-        cl_uint stepBytes = src.step;
-        args.push_back( std::make_pair( sizeof(cl_uint), (void *)&stepBytes));
-        int offsetXBytes = src.offset % src.step;
-        int offsetX = offsetXBytes / src.elemSize();
-        CV_Assert((int)(offsetX * src.elemSize()) == offsetXBytes);
-        int offsetY = src.offset / src.step;
-        int endX = (offsetX + src.cols);
-        int endY = (offsetY + src.rows);
-        cl_int rect[4] = {offsetX, offsetY, endX, endY};
-        if (!isIsolatedBorder)
-        {
-            rect[2] = src.wholecols;
-            rect[3] = src.wholerows;
-        }
-        args.push_back( std::make_pair( sizeof(cl_int)*4, (void *)&rect[0]));
-
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
-        cl_uint _stepBytes = dst.step;
-        args.push_back( std::make_pair( sizeof(cl_uint), (void *)&_stepBytes));
-        int _offsetXBytes = dst.offset % dst.step;
-        int _offsetX = _offsetXBytes / dst.elemSize();
-        CV_Assert((int)(_offsetX * dst.elemSize()) == _offsetXBytes);
-        int _offsetY = dst.offset / dst.step;
-        int _endX = (_offsetX + dst.cols);
-        int _endY = (_offsetY + dst.rows);
-        cl_int _rect[4] = {_offsetX, _offsetY, _endX, _endY};
-        args.push_back( std::make_pair( sizeof(cl_int)*4, (void *)&_rect[0]));
-
-        float borderValue[4] = {0, 0, 0, 0}; // DON'T move into 'if' body
-        double borderValueDouble[4] = {0, 0, 0, 0}; // DON'T move into 'if' body
-        if ((borderType & ~BORDER_ISOLATED) == BORDER_CONSTANT)
-        {
-            if (useDouble)
-                args.push_back( std::make_pair( sizeof(double) * src.oclchannels(), (void *)&borderValue[0]));
-            else
-                args.push_back( std::make_pair( sizeof(float) * src.oclchannels(), (void *)&borderValueDouble[0]));
-        }
-
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&oclKernelParameter.data));
-
-        const char* btype = NULL;
-
-        switch (borderType & ~BORDER_ISOLATED)
-        {
-        case BORDER_CONSTANT:
-            btype = "BORDER_CONSTANT";
-            break;
-        case BORDER_REPLICATE:
-            btype = "BORDER_REPLICATE";
-            break;
-        case BORDER_REFLECT:
-            btype = "BORDER_REFLECT";
-            break;
-        case BORDER_WRAP:
-            CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!");
-            return;
-        case BORDER_REFLECT101:
-            btype = "BORDER_REFLECT_101";
-            break;
-        }
-
-        int requiredTop = anchor.y;
-        int requiredLeft = BLOCK_SIZE; // not this: anchor.x;
-        int requiredBottom = ksize.height - 1 - anchor.y;
-        int requiredRight = BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x;
-        int h = isIsolatedBorder ? src.rows : src.wholerows;
-        int w = isIsolatedBorder ? src.cols : src.wholecols;
-        bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight;
-
-        char build_options[1024];
-        sprintf(build_options, "-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d "
-                "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D KERNEL_SIZE_Y2_ALIGNED=%d "
-                "-D %s -D %s -D %s",
-                (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y,
-                src.depth(), src.oclchannels(), useDouble ? 1 : 0,
-                anchor.x, anchor.y, ksize.width, ksize.height, kernel_size_y2_aligned,
-                btype,
-                extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
-                isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
-
-        size_t lt[3] = {BLOCK_SIZE, 1, 1};
-        size_t gt[3] = {divUp(dst.cols, BLOCK_SIZE - (ksize.width - 1)) * BLOCK_SIZE, divUp(dst.rows, BLOCK_SIZE_Y), 1};
-
-        cl_kernel kernel = openCLGetKernelFromSource(src.clCxt, &filtering_filter2D, "filter2D", -1, -1, build_options);
-
-        size_t kernelWorkGroupSize;
-        openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(src.clCxt),
-                                                CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
-        if (lt[0] > kernelWorkGroupSize)
-        {
-            clReleaseKernel(kernel);
-            CV_Assert(BLOCK_SIZE > kernelWorkGroupSize);
-            tryWorkItems = kernelWorkGroupSize;
-            continue;
-        }
-
-        openCLExecuteKernel(src.clCxt, kernel, gt, lt, args); // kernel will be released here
-    } while (false);
-}
-
-Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int /*srcType*/, int /*dstType*/, const Mat &kernel, const Size &ksize,
-        const Point &anchor, int borderType)
-{
-    Point norm_archor = anchor;
-    normalizeAnchor(norm_archor, ksize);
-
-    return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, norm_archor, kernel, GPUFilter2D,
-                               borderType));
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Point &anchor,
-        int borderType)
-{
-    Size ksize = kernel.size(); // TODO remove duplicated parameter
-    Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, ksize, anchor, borderType);
-
-    return createFilter2D_GPU(linearFilter);
-}
-
-void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor, double delta, int borderType)
-{
-    CV_Assert(delta == 0);
-
-    if (ddepth < 0)
-        ddepth = src.depth();
-
-    dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
-
-    Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor, borderType);
-    f->apply(src, dst);
-}
-
-const int optimizedSepFilterLocalSize = 16;
-static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst,
-                                   const Mat &row_kernel, const Mat &col_kernel, int bordertype = BORDER_DEFAULT)
-{
-    size_t lt2[3] = {optimizedSepFilterLocalSize, optimizedSepFilterLocalSize, 1};
-    size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
-
-    unsigned int src_pitch = src.step;
-    unsigned int dst_pitch = dst.step;
-
-    int src_offset_x = (src.offset % src.step) / src.elemSize();
-    int src_offset_y = src.offset / src.step;
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
-
-    args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src_offset_x ));
-    args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src_offset_y ));
-
-    args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dst.offset ));
-    args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
-
-    args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.wholecols ));
-    args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.wholerows ));
-
-    args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dst.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dst.rows ));
-
-    String option = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d",(int)lt2[0], (int)lt2[1],
-        row_kernel.rows / 2, col_kernel.rows / 2 );
-
-    option += " -D KERNEL_MATRIX_X=";
-    for(int i=0; i<row_kernel.rows; i++)
-        option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
-    option += "0x0";
-
-    option += " -D KERNEL_MATRIX_Y=";
-    for(int i=0; i<col_kernel.rows; i++)
-        option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
-    option += "0x0";
-
-    switch(src.type())
-    {
-    case CV_8UC1:
-        option += " -D SRCTYPE=uchar -D CONVERT_SRCTYPE=convert_float -D WORKTYPE=float";
-        break;
-    case CV_32FC1:
-        option += " -D SRCTYPE=float -D CONVERT_SRCTYPE= -D WORKTYPE=float";
-        break;
-    case CV_8UC2:
-        option += " -D SRCTYPE=uchar2 -D CONVERT_SRCTYPE=convert_float2 -D WORKTYPE=float2";
-        break;
-    case CV_32FC2:
-        option += " -D SRCTYPE=float2 -D CONVERT_SRCTYPE= -D WORKTYPE=float2";
-        break;
-    case CV_8UC3:
-        option += " -D SRCTYPE=uchar3 -D CONVERT_SRCTYPE=convert_float3 -D WORKTYPE=float3";
-        break;
-    case CV_32FC3:
-        option += " -D SRCTYPE=float3 -D CONVERT_SRCTYPE= -D WORKTYPE=float3";
-        break;
-    case CV_8UC4:
-        option += " -D SRCTYPE=uchar4 -D CONVERT_SRCTYPE=convert_float4 -D WORKTYPE=float4";
-        break;
-    case CV_32FC4:
-        option += " -D SRCTYPE=float4 -D CONVERT_SRCTYPE= -D WORKTYPE=float4";
-        break;
-    default:
-        CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
-        break;
-    }
-    switch(dst.type())
-    {
-    case CV_8UC1:
-        option += " -D DSTTYPE=uchar -D CONVERT_DSTTYPE=convert_uchar_sat";
-        break;
-    case CV_8UC2:
-        option += " -D DSTTYPE=uchar2 -D CONVERT_DSTTYPE=convert_uchar2_sat";
-        break;
-    case CV_8UC3:
-        option += " -D DSTTYPE=uchar3 -D CONVERT_DSTTYPE=convert_uchar3_sat";
-        break;
-    case CV_8UC4:
-        option += " -D DSTTYPE=uchar4 -D CONVERT_DSTTYPE=convert_uchar4_sat";
-        break;
-    case CV_32FC1:
-        option += " -D DSTTYPE=float -D CONVERT_DSTTYPE=";
-        break;
-    case CV_32FC2:
-        option += " -D DSTTYPE=float2 -D CONVERT_DSTTYPE=";
-        break;
-    case CV_32FC3:
-        option += " -D DSTTYPE=float3 -D CONVERT_DSTTYPE=";
-        break;
-    case CV_32FC4:
-        option += " -D DSTTYPE=float4 -D CONVERT_DSTTYPE=";
-        break;
-    default:
-        CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
-        break;
-    }
-    switch(bordertype)
-    {
-    case cv::BORDER_CONSTANT:
-        option += " -D BORDER_CONSTANT";
-        break;
-    case cv::BORDER_REPLICATE:
-        option += " -D BORDER_REPLICATE";
-        break;
-    case cv::BORDER_REFLECT:
-        option += " -D BORDER_REFLECT";
-        break;
-    case cv::BORDER_REFLECT101:
-        option += " -D BORDER_REFLECT_101";
-        break;
-    case cv::BORDER_WRAP:
-        option += " -D BORDER_WRAP";
-        break;
-    default:
-        CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
-        break;
-    }
-
-    openCLExecuteKernel(src.clCxt, &filtering_sep_filter_singlepass, "sep_filter_singlepass", gt2, lt2, args,
-        -1, -1, option.c_str() );
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// SeparableFilter
-
-namespace
-{
-class SeparableFilterEngine_GPU : public FilterEngine_GPU
-{
-public:
-    SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter_,
-                              const Ptr<BaseColumnFilter_GPU> &columnFilter_) :
-        rowFilter(rowFilter_), columnFilter(columnFilter_)
-    {
-        ksize = Size(rowFilter->ksize, columnFilter->ksize);
-        anchor = Point(rowFilter->anchor, columnFilter->anchor);
-    }
-
-    virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
-    {
-        Size src_size = src.size();
-
-        int cn = src.oclchannels();
-        dstBuf.create(src_size.height + ksize.height - 1, src_size.width, CV_MAKETYPE(CV_32F, cn));
-
-        normalizeROI(roi, ksize, anchor, src_size);
-
-        srcROI = src(roi);
-        dstROI = dst(roi);
-
-        (*rowFilter)(srcROI, dstBuf);
-        (*columnFilter)(dstBuf, dstROI);
-    }
-
-    Ptr<BaseRowFilter_GPU> rowFilter;
-    Ptr<BaseColumnFilter_GPU> columnFilter;
-    Size ksize;
-    Point anchor;
-    oclMat dstBuf;
-    oclMat srcROI;
-    oclMat dstROI;
-    oclMat dstBufROI;
-};
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
-        const Ptr<BaseColumnFilter_GPU> &columnFilter)
-{
-    return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
-}
-
-namespace
-{
-class SingleStepSeparableFilterEngine_GPU : public FilterEngine_GPU
-{
-public:
-    SingleStepSeparableFilterEngine_GPU( const Mat &rowKernel_, const Mat &columnKernel_, const int btype )
-    {
-        bordertype = btype;
-        rowKernel = rowKernel_;
-        columnKernel = columnKernel_;
-    }
-
-    virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
-    {
-        normalizeROI(roi, Size(rowKernel.rows, columnKernel.rows), Point(-1,-1), src.size());
-
-        oclMat srcROI = src(roi);
-        oclMat dstROI = dst(roi);
-
-        sepFilter2D_SinglePass(src, dst, rowKernel, columnKernel, bordertype);
-    }
-
-    Mat rowKernel;
-    Mat columnKernel;
-    int bordertype;
-};
-}
-
-
-static void GPUFilterBox(const oclMat &src, oclMat &dst,
-                         Size &ksize, const Point anchor, const int borderType)
-{
-    //Normalize the result by default
-    float alpha = 1.0f / (ksize.height * ksize.width);
-
-    CV_Assert(src.clCxt == dst.clCxt);
-    CV_Assert((src.cols == dst.cols) &&
-              (src.rows == dst.rows));
-    CV_Assert(src.oclchannels() == dst.oclchannels());
-
-    size_t tryWorkItems = src.clCxt->getDeviceInfo().maxWorkItemSizes[0];
-    do {
-        size_t BLOCK_SIZE = tryWorkItems;
-        while (BLOCK_SIZE > 32 && BLOCK_SIZE >= (size_t)ksize.width * 2 && BLOCK_SIZE > (size_t)src.cols * 2)
-            BLOCK_SIZE /= 2;
-        size_t BLOCK_SIZE_Y = 8; // TODO Check heuristic value on devices
-        while (BLOCK_SIZE_Y < BLOCK_SIZE / 8 && BLOCK_SIZE_Y * src.clCxt->getDeviceInfo().maxComputeUnits * 32 < (size_t)src.rows)
-            BLOCK_SIZE_Y *= 2;
-
-        CV_Assert((size_t)ksize.width <= BLOCK_SIZE);
-
-        bool isIsolatedBorder = (borderType & BORDER_ISOLATED) != 0;
-
-        std::vector<std::pair<size_t , const void *> > args;
-
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-        cl_uint stepBytes = src.step;
-        args.push_back( std::make_pair( sizeof(cl_uint), (void *)&stepBytes));
-        int offsetXBytes = src.offset % src.step;
-        int offsetX = offsetXBytes / src.elemSize();
-        CV_Assert((int)(offsetX * src.elemSize()) == offsetXBytes);
-        int offsetY = src.offset / src.step;
-        int endX = (offsetX + src.cols);
-        int endY = (offsetY + src.rows);
-        cl_int rect[4] = {offsetX, offsetY, endX, endY};
-        if (!isIsolatedBorder)
-        {
-            rect[2] = src.wholecols;
-            rect[3] = src.wholerows;
-        }
-        args.push_back( std::make_pair( sizeof(cl_int)*4, (void *)&rect[0]));
-
-        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
-        cl_uint _stepBytes = dst.step;
-        args.push_back( std::make_pair( sizeof(cl_uint), (void *)&_stepBytes));
-        int _offsetXBytes = dst.offset % dst.step;
-        int _offsetX = _offsetXBytes / dst.elemSize();
-        CV_Assert((int)(_offsetX * dst.elemSize()) == _offsetXBytes);
-        int _offsetY = dst.offset / dst.step;
-        int _endX = (_offsetX + dst.cols);
-        int _endY = (_offsetY + dst.rows);
-        cl_int _rect[4] = {_offsetX, _offsetY, _endX, _endY};
-        args.push_back( std::make_pair( sizeof(cl_int)*4, (void *)&_rect[0]));
-
-        bool useDouble = src.depth() == CV_64F;
-
-        float borderValue[4] = {0, 0, 0, 0}; // DON'T move into 'if' body
-        double borderValueDouble[4] = {0, 0, 0, 0}; // DON'T move into 'if' body
-        if ((borderType & ~BORDER_ISOLATED) == BORDER_CONSTANT)
-        {
-            if (useDouble)
-                args.push_back( std::make_pair( sizeof(double) * src.oclchannels(), (void *)&borderValue[0]));
-            else
-                args.push_back( std::make_pair( sizeof(float) * src.oclchannels(), (void *)&borderValueDouble[0]));
-        }
-
-        double alphaDouble = alpha; // DON'T move into 'if' body
-        if (useDouble)
-            args.push_back( std::make_pair( sizeof(double), (void *)&alphaDouble));
-        else
-            args.push_back( std::make_pair( sizeof(float), (void *)&alpha));
-
-        const char* btype = NULL;
-
-        switch (borderType & ~BORDER_ISOLATED)
-        {
-        case BORDER_CONSTANT:
-            btype = "BORDER_CONSTANT";
-            break;
-        case BORDER_REPLICATE:
-            btype = "BORDER_REPLICATE";
-            break;
-        case BORDER_REFLECT:
-            btype = "BORDER_REFLECT";
-            break;
-        case BORDER_WRAP:
-            CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!");
-            return;
-        case BORDER_REFLECT101:
-            btype = "BORDER_REFLECT_101";
-            break;
-        }
-
-        int requiredTop = anchor.y;
-        int requiredLeft = BLOCK_SIZE; // not this: anchor.x;
-        int requiredBottom = ksize.height - 1 - anchor.y;
-        int requiredRight = BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x;
-        int h = isIsolatedBorder ? src.rows : src.wholerows;
-        int w = isIsolatedBorder ? src.cols : src.wholecols;
-        bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight;
-
-        CV_Assert(w >= ksize.width && h >= ksize.height); // TODO Other cases are not tested well
-
-        char build_options[1024];
-        sprintf(build_options, "-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s -D %s -D %s",
-                (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y,
-                src.depth(), src.oclchannels(), useDouble ? 1 : 0,
-                anchor.x, anchor.y, ksize.width, ksize.height,
-                btype,
-                extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
-                isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
-
-        size_t lt[3] = {BLOCK_SIZE, 1, 1};
-        size_t gt[3] = {divUp(dst.cols, BLOCK_SIZE - (ksize.width - 1)) * BLOCK_SIZE, divUp(dst.rows, BLOCK_SIZE_Y), 1};
-
-        cl_kernel kernel = openCLGetKernelFromSource(src.clCxt, &filtering_boxFilter, "boxFilter", -1, -1, build_options);
-
-        size_t kernelWorkGroupSize;
-        openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(src.clCxt),
-                                                CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
-        if (lt[0] > kernelWorkGroupSize)
-        {
-            clReleaseKernel(kernel);
-            CV_Assert(BLOCK_SIZE > kernelWorkGroupSize);
-            tryWorkItems = kernelWorkGroupSize;
-            continue;
-        }
-
-        openCLExecuteKernel(src.clCxt, kernel, gt, lt, args); // kernel will be released here
-    } while (false);
-}
-
-Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int /*srcType*/, int /*dstType*/,
-        const Size &ksize, Point anchor, int borderType)
-{
-    normalizeAnchor(anchor, ksize);
-
-    return Ptr<BaseFilter_GPU>(new GPUBoxFilter(ksize, anchor,
-                               borderType, GPUFilterBox));
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createBoxFilter_GPU(int srcType, int dstType,
-        const Size &ksize, const Point &anchor, int borderType)
-{
-    Ptr<BaseFilter_GPU> boxFilter = getBoxFilter_GPU(srcType, dstType, ksize, anchor, borderType);
-    return createFilter2D_GPU(boxFilter);
-}
-
-void cv::ocl::boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
-                        Point anchor, int borderType)
-{
-    int sdepth = src.depth(), cn = src.channels();
-
-    if (ddepth < 0)
-    {
-        ddepth = sdepth;
-    }
-
-    dst.create(src.size(), CV_MAKETYPE(ddepth, cn));
-
-    Ptr<FilterEngine_GPU> f = createBoxFilter_GPU(src.type(),
-                              dst.type(), ksize, anchor, borderType);
-    f->apply(src, dst);
-}
-
-namespace
-{
-typedef void (*gpuFilter1D_t)(const oclMat &src, const oclMat &dst, oclMat kernel, int ksize, int anchor, int bordertype);
-
-class GpuLinearRowFilter : public BaseRowFilter_GPU
-{
-public:
-    GpuLinearRowFilter(int ksize_, int anchor_, const oclMat &kernel_, gpuFilter1D_t func_, int bordertype_) :
-        BaseRowFilter_GPU(ksize_, anchor_, bordertype_), kernel(kernel_), func(func_) {}
-
-    virtual void operator()(const oclMat &src, oclMat &dst)
-    {
-        func(src, dst, kernel, ksize, anchor, bordertype);
-    }
-
-    oclMat kernel;
-    gpuFilter1D_t func;
-};
-}
-
-template <typename T> struct index_and_sizeof;
-template <> struct index_and_sizeof<uchar>
-{
-    enum { index = 1 };
-};
-template <> struct index_and_sizeof<char>
-{
-    enum { index = 2 };
-};
-template <> struct index_and_sizeof<ushort>
-{
-    enum { index = 3 };
-};
-template <> struct index_and_sizeof<short>
-{
-    enum { index = 4 };
-};
-template <> struct index_and_sizeof<int>
-{
-    enum { index = 5 };
-};
-template <> struct index_and_sizeof<float>
-{
-    enum { index = 6 };
-};
-
-template <typename T>
-void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
-{
-    CV_Assert(bordertype <= BORDER_REFLECT_101);
-    CV_Assert(ksize == (anchor << 1) + 1);
-    int channels = src.oclchannels();
-
-#ifdef ANDROID
-    size_t localThreads[3] = { 16, 10, 1 };
-#else
-    size_t localThreads[3] = { 16, 16, 1 };
-#endif
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" };
-    std::string buildOptions = format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s",
-            anchor, (int)localThreads[0], (int)localThreads[1], channels, borderMap[bordertype]);
-
-    if (src.depth() == CV_8U)
-    {
-        switch (channels)
-        {
-        case 1:
-            globalThreads[0] = (dst.cols + 3) >> 2;
-            break;
-        case 2:
-            globalThreads[0] = (dst.cols + 1) >> 1;
-            break;
-        case 4:
-            globalThreads[0] = dst.cols;
-            break;
-        }
-    }
-
-    int src_pix_per_row = src.step / src.elemSize();
-    int src_offset_x = (src.offset % src.step) / src.elemSize();
-    int src_offset_y = src.offset / src.step;
-    int dst_pix_per_row = dst.step / dst.elemSize();
-    int ridusy = (dst.rows - src.rows) >> 1;
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_pix_per_row));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset_x));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset_y));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_pix_per_row));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&ridusy));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
-
-    openCLExecuteKernel(src.clCxt, &filter_sep_row, "row_filter", globalThreads, localThreads,
-                        args, channels, src.depth(), buildOptions.c_str());
-}
-
-Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
-{
-    static const gpuFilter1D_t gpuFilter1D_callers[6] =
-    {
-        linearRowFilter_gpu<uchar>,
-        linearRowFilter_gpu<char>,
-        linearRowFilter_gpu<ushort>,
-        linearRowFilter_gpu<short>,
-        linearRowFilter_gpu<int>,
-        linearRowFilter_gpu<float>
-    };
-
-    Mat temp = rowKernel.reshape(1, 1);
-    oclMat mat_kernel(temp);
-
-
-    int ksize = temp.cols;
-
-    //CV_Assert(ksize < 16);
-
-    normalizeAnchor(anchor, ksize);
-
-    return makePtr<GpuLinearRowFilter>(ksize, anchor, mat_kernel,
-        gpuFilter1D_callers[CV_MAT_DEPTH(srcType)], bordertype);
-}
-
-namespace
-{
-class GpuLinearColumnFilter : public BaseColumnFilter_GPU
-{
-public:
-    GpuLinearColumnFilter(int ksize_, int anchor_, const oclMat &kernel_, gpuFilter1D_t func_, int bordertype_) :
-        BaseColumnFilter_GPU(ksize_, anchor_, bordertype_), kernel(kernel_), func(func_) {}
-
-    virtual void operator()(const oclMat &src, oclMat &dst)
-    {
-        func(src, dst, kernel, ksize, anchor, bordertype);
-    }
-
-    oclMat kernel;
-    gpuFilter1D_t func;
-};
-}
-
-template <typename T>
-void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
-{
-    Context *clCxt = src.clCxt;
-    int channels = src.oclchannels();
-
-#ifdef ANDROID
-    size_t localThreads[3] = {16, 10, 1};
-#else
-    size_t localThreads[3] = {16, 16, 1};
-#endif
-    String kernelName = "col_filter";
-
-    char btype[30];
-
-    switch (bordertype)
-    {
-    case 0:
-        sprintf(btype, "BORDER_CONSTANT");
-        break;
-    case 1:
-        sprintf(btype, "BORDER_REPLICATE");
-        break;
-    case 2:
-        sprintf(btype, "BORDER_REFLECT");
-        break;
-    case 3:
-        sprintf(btype, "BORDER_WRAP");
-        break;
-    case 4:
-        sprintf(btype, "BORDER_REFLECT_101");
-        break;
-    }
-
-    char compile_option[256];
-
-
-    size_t globalThreads[3];
-    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
-    globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2];
-
-    if (dst.depth() == CV_8U)
-    {
-        switch (channels)
-        {
-        case 1:
-            globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
-            sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
-                    anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float", "uchar", "convert_uchar_sat");
-            break;
-        case 2:
-            globalThreads[0] = ((dst.cols + 1) / 2 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
-            sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
-                    anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float2", "uchar2", "convert_uchar2_sat");
-            break;
-        case 3:
-        case 4:
-            globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
-            sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
-                    anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float4", "uchar4", "convert_uchar4_sat");
-            break;
-        }
-    }
-    else
-    {
-        globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
-
-        switch (dst.type())
-        {
-        case CV_32SC1:
-            sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
-                    anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float", "int", "convert_int_sat");
-            break;
-        case CV_32SC3:
-        case CV_32SC4:
-            sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
-                    anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float4", "int4", "convert_int4_sat");
-            break;
-        case CV_32FC1:
-            sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
-                    anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float", "float", "");
-            break;
-        case CV_32FC3:
-        case CV_32FC4:
-            sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
-                    anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float4", "float4", "");
-            break;
-        }
-    }
-
-    //sanity checks
-    CV_Assert(clCxt == dst.clCxt);
-    CV_Assert(src.cols == dst.cols);
-    CV_Assert(src.oclchannels() == dst.oclchannels());
-    CV_Assert(ksize == (anchor << 1) + 1);
-    int src_pix_per_row, dst_pix_per_row;
-    int dst_offset_in_pixel;
-    src_pix_per_row = src.step / src.elemSize();
-    dst_pix_per_row = dst.step / dst.elemSize();
-    dst_offset_in_pixel = dst.offset / dst.elemSize();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_pix_per_row));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_pix_per_row));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_offset_in_pixel));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
-
-    openCLExecuteKernel(clCxt, &filter_sep_col, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
-}
-
-Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int /*bufType*/, int dstType, const Mat &columnKernel, int anchor, int bordertype, double /*delta*/)
-{
-    static const gpuFilter1D_t gpuFilter1D_callers[6] =
-    {
-        linearColumnFilter_gpu<uchar>,
-        linearColumnFilter_gpu<char>,
-        linearColumnFilter_gpu<ushort>,
-        linearColumnFilter_gpu<short>,
-        linearColumnFilter_gpu<int>,
-        linearColumnFilter_gpu<float>
-    };
-
-    Mat temp = columnKernel.reshape(1, 1);
-    oclMat mat_kernel(temp);
-
-    int ksize = temp.cols;
-    normalizeAnchor(anchor, ksize);
-
-    return makePtr<GpuLinearColumnFilter>(ksize, anchor, mat_kernel,
-        gpuFilter1D_callers[CV_MAT_DEPTH(dstType)], bordertype);
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int dstType,
-        const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype, Size imgSize )
-{
-    int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);
-    int cn = CV_MAT_CN(srcType);
-    int bdepth = std::max(std::max(sdepth, ddepth), CV_32F);
-    int bufType = CV_MAKETYPE(bdepth, cn);
-    Context* clCxt = Context::getContext();
-
-    //if image size is non-degenerate and large enough
-    //and if filter support is reasonable to satisfy larger local memory requirements,
-    //then we can use single pass routine to avoid extra runtime calls overhead
-    if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) &&
-        rowKernel.rows <= 21 && columnKernel.rows <= 21 &&
-        (rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 &&
-        imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) &&
-        imgSize.height > optimizedSepFilterLocalSize + (columnKernel.rows>>1) )
-    {
-        return Ptr<FilterEngine_GPU>(new SingleStepSeparableFilterEngine_GPU(rowKernel, columnKernel, bordertype));
-    }
-    else
-    {
-        Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
-        Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
-
-        return createSeparableFilter_GPU(rowFilter, columnFilter);
-    }
-}
-
-void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
-{
-    if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
-    {
-        if ((bordertype & cv::BORDER_ISOLATED) != 0)
-        {
-            bordertype &= ~cv::BORDER_ISOLATED;
-
-            if ((bordertype != cv::BORDER_CONSTANT) &&
-                    (bordertype != cv::BORDER_REPLICATE))
-            {
-                CV_Error(Error::StsBadArg, "unsupported border type");
-            }
-        }
-    }
-
-    if (ddepth < 0)
-        ddepth = src.depth();
-
-    dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
-
-    Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype, src.size());
-    f->apply(src, dst);
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType, Size imgSize )
-{
-    Mat kx, ky;
-    getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
-    return createSeparableLinearFilter_GPU(srcType, dstType,
-                                           kx, ky, Point(-1, -1), 0, borderType, imgSize);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Deriv Filter
-void cv::ocl::Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType)
-{
-    Mat kx, ky;
-    getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
-
-    if (scale != 1)
-    {
-        // usually the smoothing part is the slowest to compute,
-        // so try to scale it instead of the faster differenciating part
-        if (dx == 0)
-            kx *= scale;
-        else
-            ky *= scale;
-    }
-
-    sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, borderType);
-}
-
-void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale, double delta , int bordertype)
-{
-    Mat kx, ky;
-    getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);
-
-    if (scale != 1)
-    {
-        // usually the smoothing part is the slowest to compute,
-        // so try to scale it instead of the faster differenciating part
-        if (dx == 0)
-            kx *= scale;
-        else
-            ky *= scale;
-    }
-
-    sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, bordertype);
-}
-
-void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale,
-        double delta, int borderType)
-{
-    CV_Assert(delta == 0);
-
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_Assert(ksize == 1 || ksize == 3);
-
-    double K[2][9] =
-    {
-        {0, 1, 0, 1, -4, 1, 0, 1, 0},
-        {2, 0, 2, 0, -8, 0, 2, 0, 2}
-    };
-    Mat kernel(3, 3, CV_64F, (void *)K[ksize == 3 ? 1 : 0]);
-
-    if (scale != 1)
-        kernel *= scale;
-
-    filter2D(src, dst, ddepth, kernel, Point(-1, -1), 0, borderType);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Gaussian Filter
-
-Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype, Size imgSize)
-{
-    int depth = CV_MAT_DEPTH(type);
-
-    if (sigma2 <= 0)
-        sigma2 = sigma1;
-
-    // automatic detection of kernel size from sigma
-    if (ksize.width <= 0 && sigma1 > 0)
-        ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
-
-    if (ksize.height <= 0 && sigma2 > 0)
-        ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
-
-    CV_Assert(ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1);
-
-    sigma1 = std::max(sigma1, 0.0);
-    sigma2 = std::max(sigma2, 0.0);
-
-    Mat kx = getGaussianKernel(ksize.width, sigma1, std::max(depth, CV_32F));
-    Mat ky;
-
-    if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON)
-        ky = kx;
-    else
-        ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
-
-    return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype, imgSize);
-}
-
-void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2, int bordertype)
-{
-    if (bordertype != BORDER_CONSTANT)
-    {
-        if (src.rows == 1)
-            ksize.height = 1;
-
-        if (src.cols == 1)
-            ksize.width = 1;
-    }
-
-    if (ksize.width == 1 && ksize.height == 1)
-    {
-        src.copyTo(dst);
-        return;
-    }
-
-    if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
-    {
-        if ((bordertype & cv::BORDER_ISOLATED) != 0)
-        {
-            bordertype &= ~cv::BORDER_ISOLATED;
-
-            if ((bordertype != cv::BORDER_CONSTANT) &&
-                    (bordertype != cv::BORDER_REPLICATE))
-            {
-                CV_Error(Error::StsBadArg, "unsupported border type");
-            }
-        }
-    }
-
-    dst.create(src.size(), src.type());
-
-    Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype, src.size());
-    f->apply(src, dst);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Adaptive Bilateral Filter
-
-void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor, Point anchor, int borderType)
-{
-    CV_Assert((ksize.width & 1) && (ksize.height & 1));  // ksize must be odd
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);  // source must be 8bit RGB image
-    if( sigmaSpace <= 0 )
-        sigmaSpace = 1;
-    Mat lut(Size(ksize.width, ksize.height), CV_32FC1);
-    double sigma2 = sigmaSpace * sigmaSpace;
-    int idx = 0;
-    int w = ksize.width / 2;
-    int h = ksize.height / 2;
-
-    int ABF_GAUSSIAN_ocl = 1;
-
-    if(ABF_GAUSSIAN_ocl)
-    {
-        for(int y=-h; y<=h; y++)
-            for(int x=-w; x<=w; x++)
-        {
-            lut.at<float>(idx++) = expf( (float)(-0.5 * (x * x + y * y)/sigma2));
-        }
-    }
-    else
-    {
-        for(int y=-h; y<=h; y++)
-            for(int x=-w; x<=w; x++)
-        {
-            lut.at<float>(idx++) = (float) (sigma2 / (sigma2 + x * x + y * y));
-        }
-    }
-
-    oclMat dlut(lut);
-    int depth = src.depth();
-    int cn = src.oclchannels();
-
-    normalizeAnchor(anchor, ksize);
-    const static String kernelName = "adaptiveBilateralFilter";
-
-    dst.create(src.size(), src.type());
-
-    char btype[30];
-    switch(borderType)
-    {
-    case BORDER_CONSTANT:
-        sprintf(btype, "BORDER_CONSTANT");
-        break;
-    case BORDER_REPLICATE:
-        sprintf(btype, "BORDER_REPLICATE");
-        break;
-    case BORDER_REFLECT:
-        sprintf(btype, "BORDER_REFLECT");
-        break;
-    case BORDER_WRAP:
-        sprintf(btype, "BORDER_WRAP");
-        break;
-    case BORDER_REFLECT101:
-        sprintf(btype, "BORDER_REFLECT_101");
-        break;
-    default:
-        CV_Error(Error::StsBadArg, "This border type is not supported");
-        break;
-    }
-
-    //the following constants may be adjusted for performance concerns
-    const static size_t blockSizeX = 64, blockSizeY = 1, EXTRA = ksize.height - 1;
-
-    //Normalize the result by default
-    const float alpha = ksize.height * ksize.width;
-
-    const size_t gSize = blockSizeX - ksize.width / 2 * 2;
-    const size_t globalSizeX = (src.cols) % gSize == 0 ?
-        src.cols / gSize * blockSizeX :
-        (src.cols / gSize + 1) * blockSizeX;
-    const size_t rows_per_thread = 1 + EXTRA;
-    const size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
-        ((src.rows + rows_per_thread - 1) / rows_per_thread) :
-        (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
-
-    size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
-    size_t localThreads[3]  = { blockSizeX, blockSizeY, 1};
-
-    char build_options[250];
-
-    //LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
-    sprintf(build_options,
-        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d -D MAX_VAR_VAL=%f -D ABF_GAUSSIAN=%d"
-        " -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
-        static_cast<int>(EXTRA), static_cast<float>(maxSigmaColor*maxSigmaColor), static_cast<int>(ABF_GAUSSIAN_ocl),
-        static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
-    args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
-    args.push_back(std::make_pair(sizeof(cl_mem), &dlut.data));
-    int lut_step = dlut.step1();
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&lut_step));
-
-    openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName,
-        globalThreads, localThreads, args, cn, depth, build_options);
-}
diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp
deleted file mode 100644
index 50a2fdc..0000000
--- a/modules/ocl/src/gemm.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-namespace cv { namespace ocl {
-
-// used for clAmdBlas library to avoid redundant setup/teardown
-void clBlasSetup();
-void clBlasTeardown();
-
-}} /* namespace cv { namespace ocl */
-
-
-#if !defined HAVE_CLAMDBLAS
-void cv::ocl::gemm(const oclMat&, const oclMat&, double,
-                   const oclMat&, double, oclMat&, int)
-{
-    CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
-}
-
-void cv::ocl::clBlasSetup()
-{
-    CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
-}
-
-void cv::ocl::clBlasTeardown()
-{
-    //intentionally do nothing
-}
-
-#else
-#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
-using namespace cv;
-
-static bool clBlasInitialized = false;
-
-void cv::ocl::clBlasSetup()
-{
-    if(!clBlasInitialized)
-    {
-        AutoLock lock(getInitializationMutex());
-        if(!clBlasInitialized)
-        {
-            openCLSafeCall(clAmdBlasSetup());
-            clBlasInitialized = true;
-        }
-    }
-}
-
-void cv::ocl::clBlasTeardown()
-{
-    AutoLock lock(getInitializationMutex());
-    if(clBlasInitialized)
-    {
-        clAmdBlasTeardown();
-        clBlasInitialized = false;
-    }
-}
-
-void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
-                   const oclMat &src3, double beta, oclMat &dst, int flags)
-{
-    CV_Assert(src1.cols == src2.rows &&
-              (src3.empty() || (src1.rows == src3.rows && src2.cols == src3.cols)));
-    CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
-    if(!src3.empty())
-    {
-        src3.copyTo(dst);
-    }
-    else
-    {
-        dst.create(src1.rows, src2.cols, src1.type());
-        dst.setTo(Scalar::all(0));
-    }
-
-    clBlasSetup();
-
-    const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
-    const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
-    const clAmdBlasOrder     order  = clAmdBlasRowMajor;
-
-    const int M = src1.rows;
-    const int N = src2.cols;
-    const int K = src1.cols;
-    int lda     = src1.step;
-    int ldb     = src2.step;
-    int ldc     = dst.step;
-    int offa    = src1.offset;
-    int offb    = src2.offset;
-    int offc    = dst.offset;
-
-    cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr();
-    switch(src1.type())
-    {
-    case CV_32FC1:
-        lda  /= sizeof(float);
-        ldb  /= sizeof(float);
-        ldc  /= sizeof(float);
-        offa /= sizeof(float);
-        offb /= sizeof(float);
-        offc /= sizeof(float);
-
-        openCLSafeCall
-        (
-            clAmdBlasSgemmEx(order, transA, transB, M, N, K,
-                             alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
-                             beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
-        );
-        break;
-    case CV_64FC1:
-        lda  /= sizeof(double);
-        ldb  /= sizeof(double);
-        ldc  /= sizeof(double);
-        offa /= sizeof(double);
-        offb /= sizeof(double);
-        offc /= sizeof(double);
-        openCLSafeCall
-        (
-            clAmdBlasDgemmEx(order, transA, transB, M, N, K,
-                             alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
-                             beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
-        );
-        break;
-    case CV_32FC2:
-    {
-        lda  /= (2*sizeof(float));
-        ldb  /= (2*sizeof(float));
-        ldc  /= (2*sizeof(float));
-        offa /= (2*sizeof(float));
-        offb /= (2*sizeof(float));
-        offc /= (2*sizeof(float));
-        cl_float2 alpha_2 = {{alpha, 0}};
-        cl_float2 beta_2  = {{beta, 0}};
-        openCLSafeCall
-        (
-            clAmdBlasCgemmEx(order, transA, transB, M, N, K,
-                             alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
-                             beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
-        );
-    }
-    break;
-    case CV_64FC2:
-    {
-        lda  /= (2*sizeof(double));
-        ldb  /= (2*sizeof(double));
-        ldc  /= (2*sizeof(double));
-        offa /= (2*sizeof(double));
-        offb /= (2*sizeof(double));
-        offc /= (2*sizeof(double));
-        cl_double2 alpha_2 = {{alpha, 0}};
-        cl_double2 beta_2  = {{beta, 0}};
-        openCLSafeCall
-        (
-            clAmdBlasZgemmEx(order, transA, transB, M, N, K,
-                             alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
-                             beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
-        );
-    }
-    break;
-    }
-}
-#endif
diff --git a/modules/ocl/src/gftt.cpp b/modules/ocl/src/gftt.cpp
deleted file mode 100644
index 2523136..0000000
--- a/modules/ocl/src/gftt.cpp
+++ /dev/null
@@ -1,300 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-// compact structure for corners
-struct DefCorner
-{
-    float eig;  //eigenvalue of corner
-    short x;    //x coordinate of corner point
-    short y;    //y coordinate of corner point
-};
-
-// compare procedure for corner
-//it is used for sort on the host side
-struct DefCornerCompare :
-        public std::binary_function<DefCorner, DefCorner, bool>
-{
-    bool operator()(const DefCorner a, const DefCorner b) const
-    {
-        return a.eig > b.eig;
-    }
-};
-
-// find corners on matrix and put it into array
-static void findCorners_caller(
-    const oclMat&   eig_mat,        //input matrix worth eigenvalues
-    oclMat&         eigMinMax,      //input with min and max values of eigenvalues
-    const float     qualityLevel,
-    const oclMat&   mask,
-    oclMat&         corners,        //output array with detected corners
-    oclMat&         counter)        //output value with number of detected corners, have to be 0 before call
-{
-    String  opt;
-    std::vector<int> k;
-    Context * cxt = Context::getContext();
-
-    std::vector< std::pair<size_t, const void*> > args;
-
-    const int mask_strip = mask.step / mask.elemSize1();
-
-    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&(eig_mat.data)));
-
-    int src_pitch = (int)eig_mat.step;
-    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&src_pitch ));
-    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&mask.data ));
-    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&corners.data ));
-    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&mask_strip));
-    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&eigMinMax.data ));
-    args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
-    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&eig_mat.rows ));
-    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&eig_mat.cols ));
-    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&corners.cols ));
-    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&counter.data ));
-
-    size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-    if(!mask.empty())
-        opt += " -D WITH_MASK=1";
-
-     openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
-}
-
-
-static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
-{
-    size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
-    CV_Assert(groupnum != 0);
-
-    int dbsize = groupnum * 2 * src.elemSize();
-
-    ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
-
-    cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);
-
-    int all_cols = src.step / src.elemSize();
-    int pre_cols = (src.offset % src.step) / src.elemSize();
-    int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
-    int invalid_cols = pre_cols + sec_cols;
-    int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
-    int offset = src.offset / src.elemSize();
-
-    {
-        // first parallel pass
-        std::vector<std::pair<size_t , const void *> > args;
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
-        size_t globalThreads[3] = {groupnum * 256, 1, 1};
-        size_t localThreads[3] = {256, 1, 1};
-        openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
-                            args, -1, -1, "-D T=float -D DEPTH_5");
-    }
-
-    {
-        // run final "serial" kernel to find accumulate results from threads and reset corner counter
-        std::vector<std::pair<size_t , const void *> > args;
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
-        size_t globalThreads[3] = {1, 1, 1};
-        size_t localThreads[3] = {1, 1, 1};
-        openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
-                            args, -1, -1);
-    }
-}
-
-void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
-{
-    CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
-    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
-
-    ensureSizeIsEnough(image.size(), CV_32F, eig_);
-
-    if (useHarrisDetector)
-        cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
-    else
-        cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
-
-    ensureSizeIsEnough(1,1, CV_32SC1, counter_);
-
-    // find max eigenvalue and reset detected counters
-    minMaxEig_caller(eig_, eig_minmax_, counter_);
-
-    // allocate buffer for kernels
-    int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
-    ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
-
-    int total = tmpCorners_.cols; // by default the number of corner is full array
-    std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
-
-    // find points with high eigenvalue and put it into the output array
-    findCorners_caller(eig_, eig_minmax_, static_cast<float>(qualityLevel), mask, tmpCorners_, counter_);
-
-    // send non-blocking request to read real non-zero number of corners to sort it on the HOST side
-    openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0, sizeof(int), &total, 0, NULL, NULL));
-
-    if (total == 0)
-    {
-        // check for trivial case
-        corners.release();
-        return;
-    }
-
-    // blocking read whole corners array (sorted or not sorted)
-    openCLReadBuffer(tmpCorners_.clCxt, (cl_mem)tmpCorners_.data, &tmp[0], tmpCorners_.cols * sizeof(DefCorner));
-
-    // sort detected corners on cpu side.
-    tmp.resize(total);
-    std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
-
-    // estimate maximal size of final output array
-    int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
-    int D2 = (int)ceil(minDistance * minDistance);
-
-    // allocate output buffer
-    std::vector<Point2f> tmp2;
-    tmp2.reserve(total_max);
-
-
-    if (minDistance < 1)
-    {
-        // we have not distance restriction. then just copy with conversion maximal allowed points into output array
-        for (int i = 0; i < total_max; ++i)
-            tmp2.push_back(Point2f(tmp[i].x, tmp[i].y));
-    }
-    else
-    {
-        // we have distance restriction. then start coping to output array from the first element and check distance for each next one
-        const int cell_size = cvRound(minDistance);
-        const int grid_width = (image.cols + cell_size - 1) / cell_size;
-        const int grid_height = (image.rows + cell_size - 1) / cell_size;
-
-        std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
-
-        for (int i = 0; i < total ; ++i)
-        {
-            DefCorner p = tmp[i];
-            bool good = true;
-
-            int x_cell = static_cast<int>(p.x / cell_size);
-            int y_cell = static_cast<int>(p.y / cell_size);
-
-            int x1 = x_cell - 1;
-            int y1 = y_cell - 1;
-            int x2 = x_cell + 1;
-            int y2 = y_cell + 1;
-
-            // boundary check
-            x1 = std::max(0, x1);
-            y1 = std::max(0, y1);
-            x2 = std::min(grid_width - 1, x2);
-            y2 = std::min(grid_height - 1, y2);
-
-            for (int yy = y1; yy <= y2; yy++)
-            {
-                for (int xx = x1; xx <= x2; xx++)
-                {
-                    std::vector<Point2i>& m = grid[yy * grid_width + xx];
-                    if (m.empty())
-                        continue;
-                    for(size_t j = 0; j < m.size(); j++)
-                    {
-                        int dx = p.x - m[j].x;
-                        int dy = p.y - m[j].y;
-
-                        if (dx * dx + dy * dy < D2)
-                        {
-                            good = false;
-                            goto break_out_;
-                        }
-                    }
-                }
-            }
-
-            break_out_:
-
-            if(good)
-            {
-                grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x, p.y));
-                tmp2.push_back(Point2f(p.x, p.y));
-
-                if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
-                    break;
-            }
-        }
-
-    }
-
-    int final_size = static_cast<int>(tmp2.size());
-    if (final_size > 0)
-        corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
-    else
-        corners.release();
-}
-
-void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
-{
-    CV_DbgAssert(points.type() == CV_32FC2);
-    points_v.resize(points.cols);
-    openCLSafeCall(clEnqueueReadBuffer(
-        *(cl_command_queue*)getClCommandQueuePtr(),
-        reinterpret_cast<cl_mem>(points.data),
-        CL_TRUE,
-        0,
-        points.cols * sizeof(Point2f),
-        &points_v[0],
-        0,
-        NULL,
-        NULL));
-}
diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
deleted file mode 100644
index d38b3ba..0000000
--- a/modules/ocl/src/haar.cpp
+++ /dev/null
@@ -1,1224 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Wang Weiyan, wangweiyanster@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Wu Xinglong, wxl370@126.com
-//    Wang Yao, bitwangyaoyao@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-/* these settings affect the quality of detection: change with care */
-#define CV_ADJUST_FEATURES  1
-#define CV_ADJUST_WEIGHTS   0
-#define CV_HAAR_FEATURE_MAX 3
-typedef int sumtype;
-typedef double sqsumtype;
-
-typedef struct CvHidHaarFeature
-{
-    struct
-    {
-        sumtype *p0, *p1, *p2, *p3;
-        float weight;
-    }
-    rect[CV_HAAR_FEATURE_MAX];
-}
-CvHidHaarFeature;
-
-
-typedef struct CvHidHaarTreeNode
-{
-    CvHidHaarFeature feature;
-    float threshold;
-    int left;
-    int right;
-}
-CvHidHaarTreeNode;
-
-
-typedef struct CvHidHaarClassifier
-{
-    int count;
-    //CvHaarFeature* orig_feature;
-    CvHidHaarTreeNode *node;
-    float *alpha;
-}
-CvHidHaarClassifier;
-
-
-typedef struct CvHidHaarStageClassifier
-{
-    int  count;
-    float threshold;
-    CvHidHaarClassifier *classifier;
-    int two_rects;
-
-    struct CvHidHaarStageClassifier *next;
-    struct CvHidHaarStageClassifier *child;
-    struct CvHidHaarStageClassifier *parent;
-}
-CvHidHaarStageClassifier;
-
-
-struct CvHidHaarClassifierCascade
-{
-    int  count;
-    int  is_stump_based;
-    int  has_tilted_features;
-    int  is_tree;
-    double inv_window_area;
-    CvMat sum, sqsum, tilted;
-    CvHidHaarStageClassifier *stage_classifier;
-    sqsumtype *pq0, *pq1, *pq2, *pq3;
-    sumtype *p0, *p1, *p2, *p3;
-
-    void **ipp_stages;
-};
-typedef struct
-{
-    int width_height;
-    int grpnumperline_totalgrp;
-    int imgoff;
-    float factor;
-} detect_piramid_info;
-#ifdef _MSC_VER
-#define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
-
-typedef _ALIGNED_ON(128) struct  GpuHidHaarTreeNode
-{
-    _ALIGNED_ON(64) int p[CV_HAAR_FEATURE_MAX][4];
-    float weight[CV_HAAR_FEATURE_MAX] ;
-    float threshold ;
-    _ALIGNED_ON(16) float alpha[3] ;
-    _ALIGNED_ON(4) int left ;
-    _ALIGNED_ON(4) int right ;
-}
-GpuHidHaarTreeNode;
-
-
-typedef  _ALIGNED_ON(32) struct  GpuHidHaarClassifier
-{
-    _ALIGNED_ON(4) int count;
-    _ALIGNED_ON(8) GpuHidHaarTreeNode *node ;
-    _ALIGNED_ON(8) float *alpha ;
-}
-GpuHidHaarClassifier;
-
-
-typedef _ALIGNED_ON(64) struct   GpuHidHaarStageClassifier
-{
-    _ALIGNED_ON(4) int  count ;
-    _ALIGNED_ON(4) float threshold ;
-    _ALIGNED_ON(4) int two_rects ;
-    _ALIGNED_ON(8) GpuHidHaarClassifier *classifier ;
-    _ALIGNED_ON(8) struct GpuHidHaarStageClassifier *next;
-    _ALIGNED_ON(8) struct GpuHidHaarStageClassifier *child ;
-    _ALIGNED_ON(8) struct GpuHidHaarStageClassifier *parent ;
-}
-GpuHidHaarStageClassifier;
-
-
-typedef _ALIGNED_ON(64) struct  GpuHidHaarClassifierCascade
-{
-    _ALIGNED_ON(4) int  count ;
-    _ALIGNED_ON(4) int  is_stump_based ;
-    _ALIGNED_ON(4) int  has_tilted_features ;
-    _ALIGNED_ON(4) int  is_tree ;
-    _ALIGNED_ON(4) int pq0 ;
-    _ALIGNED_ON(4) int pq1 ;
-    _ALIGNED_ON(4) int pq2 ;
-    _ALIGNED_ON(4) int pq3 ;
-    _ALIGNED_ON(4) int p0 ;
-    _ALIGNED_ON(4) int p1 ;
-    _ALIGNED_ON(4) int p2 ;
-    _ALIGNED_ON(4) int p3 ;
-    _ALIGNED_ON(4) float inv_window_area ;
-} GpuHidHaarClassifierCascade;
-#else
-#define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
-
-typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode
-{
-    int p[CV_HAAR_FEATURE_MAX][4] _ALIGNED_ON(64);
-    float weight[CV_HAAR_FEATURE_MAX];// _ALIGNED_ON(16);
-    float threshold;// _ALIGNED_ON(4);
-    float alpha[3] _ALIGNED_ON(16);
-    int left _ALIGNED_ON(4);
-    int right _ALIGNED_ON(4);
-}
-GpuHidHaarTreeNode;
-
-typedef struct _ALIGNED_ON(32) GpuHidHaarClassifier
-{
-    int count _ALIGNED_ON(4);
-    GpuHidHaarTreeNode *node _ALIGNED_ON(8);
-    float *alpha _ALIGNED_ON(8);
-}
-GpuHidHaarClassifier;
-
-
-typedef struct _ALIGNED_ON(64) GpuHidHaarStageClassifier
-{
-    int  count _ALIGNED_ON(4);
-    float threshold _ALIGNED_ON(4);
-    int two_rects _ALIGNED_ON(4);
-    GpuHidHaarClassifier *classifier _ALIGNED_ON(8);
-    struct GpuHidHaarStageClassifier *next _ALIGNED_ON(8);
-    struct GpuHidHaarStageClassifier *child _ALIGNED_ON(8);
-    struct GpuHidHaarStageClassifier *parent _ALIGNED_ON(8);
-}
-GpuHidHaarStageClassifier;
-
-
-typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
-{
-    int  count _ALIGNED_ON(4);
-    int  is_stump_based _ALIGNED_ON(4);
-    int  has_tilted_features _ALIGNED_ON(4);
-    int  is_tree _ALIGNED_ON(4);
-    int pq0 _ALIGNED_ON(4);
-    int pq1 _ALIGNED_ON(4);
-    int pq2 _ALIGNED_ON(4);
-    int pq3 _ALIGNED_ON(4);
-    int p0 _ALIGNED_ON(4);
-    int p1 _ALIGNED_ON(4);
-    int p2 _ALIGNED_ON(4);
-    int p3 _ALIGNED_ON(4);
-    float inv_window_area _ALIGNED_ON(4);
-} GpuHidHaarClassifierCascade;
-#endif
-
-const int icv_object_win_border = 1;
-const float icv_stage_threshold_bias = 0.0001f;
-double globaltime = 0;
-
-/* create more efficient internal representation of haar classifier cascade */
-static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier)
-{
-    GpuHidHaarClassifierCascade *out = 0;
-
-    int i, j, k, l;
-    int datasize;
-    int total_classifiers = 0;
-    int total_nodes = 0;
-    char errorstr[256];
-
-    GpuHidHaarStageClassifier *stage_classifier_ptr;
-    GpuHidHaarClassifier *haar_classifier_ptr;
-    GpuHidHaarTreeNode *haar_node_ptr;
-
-    CvSize orig_window_size;
-    int has_tilted_features = 0;
-
-    if( !CV_IS_HAAR_CLASSIFIER(cascade) )
-        CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier pointer" );
-
-    if( cascade->hid_cascade )
-        CV_Error( CV_StsError, "hid_cascade has been already created" );
-
-    if( !cascade->stage_classifier )
-        CV_Error( CV_StsNullPtr, "" );
-
-    if( cascade->count <= 0 )
-        CV_Error( CV_StsOutOfRange, "Negative number of cascade stages" );
-
-    orig_window_size = cascade->orig_window_size;
-
-    /* check input structure correctness and calculate total memory size needed for
-    internal representation of the classifier cascade */
-    for( i = 0; i < cascade->count; i++ )
-    {
-        CvHaarStageClassifier *stage_classifier = cascade->stage_classifier + i;
-
-        if( !stage_classifier->classifier ||
-                stage_classifier->count <= 0 )
-        {
-            sprintf( errorstr, "header of the stage classifier #%d is invalid "
-                     "(has null pointers or non-positive classfier count)", i );
-            CV_Error( CV_StsError, errorstr );
-        }
-
-        total_classifiers += stage_classifier->count;
-
-        for( j = 0; j < stage_classifier->count; j++ )
-        {
-            CvHaarClassifier *classifier = stage_classifier->classifier + j;
-
-            total_nodes += classifier->count;
-            for( l = 0; l < classifier->count; l++ )
-            {
-                for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
-                {
-                    if( classifier->haar_feature[l].rect[k].r.width )
-                    {
-                        CvRect r = classifier->haar_feature[l].rect[k].r;
-                        int tilted = classifier->haar_feature[l].tilted;
-                        has_tilted_features |= tilted != 0;
-                        if( r.width < 0 || r.height < 0 || r.y < 0 ||
-                                r.x + r.width > orig_window_size.width
-                                ||
-                                (!tilted &&
-                                 (r.x < 0 || r.y + r.height > orig_window_size.height))
-                                ||
-                                (tilted && (r.x - r.height < 0 ||
-                                            r.y + r.width + r.height > orig_window_size.height)))
-                        {
-                            sprintf( errorstr, "rectangle #%d of the classifier #%d of "
-                                     "the stage classifier #%d is not inside "
-                                     "the reference (original) cascade window", k, j, i );
-                            CV_Error( CV_StsNullPtr, errorstr );
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    // this is an upper boundary for the whole hidden cascade size
-    datasize = sizeof(GpuHidHaarClassifierCascade)                   +
-               sizeof(GpuHidHaarStageClassifier) * cascade->count    +
-               sizeof(GpuHidHaarClassifier)      * total_classifiers +
-               sizeof(GpuHidHaarTreeNode)        * total_nodes;
-
-    *totalclassifier = total_classifiers;
-    *size = datasize;
-    out = (GpuHidHaarClassifierCascade *)cvAlloc( datasize );
-    memset( out, 0, sizeof(*out) );
-
-    /* init header */
-    out->count = cascade->count;
-    stage_classifier_ptr = (GpuHidHaarStageClassifier *)(out + 1);
-    haar_classifier_ptr = (GpuHidHaarClassifier *)(stage_classifier_ptr + cascade->count);
-    haar_node_ptr = (GpuHidHaarTreeNode *)(haar_classifier_ptr + total_classifiers);
-
-    out->is_stump_based = 1;
-    out->has_tilted_features = has_tilted_features;
-    out->is_tree = 0;
-
-    /* initialize internal representation */
-    for( i = 0; i < cascade->count; i++ )
-    {
-        CvHaarStageClassifier *stage_classifier = cascade->stage_classifier + i;
-        GpuHidHaarStageClassifier *hid_stage_classifier = stage_classifier_ptr + i;
-
-        hid_stage_classifier->count = stage_classifier->count;
-        hid_stage_classifier->threshold = stage_classifier->threshold - icv_stage_threshold_bias;
-        hid_stage_classifier->classifier = haar_classifier_ptr;
-        hid_stage_classifier->two_rects = 1;
-        haar_classifier_ptr += stage_classifier->count;
-
-        for( j = 0; j < stage_classifier->count; j++ )
-        {
-            CvHaarClassifier *classifier         = stage_classifier->classifier + j;
-            GpuHidHaarClassifier *hid_classifier = hid_stage_classifier->classifier + j;
-            int node_count = classifier->count;
-
-            float *alpha_ptr = &haar_node_ptr->alpha[0];
-
-            hid_classifier->count = node_count;
-            hid_classifier->node = haar_node_ptr;
-            hid_classifier->alpha = alpha_ptr;
-
-            for( l = 0; l < node_count; l++ )
-            {
-                GpuHidHaarTreeNode *node     = hid_classifier->node + l;
-                CvHaarFeature      *feature = classifier->haar_feature + l;
-
-                memset( node, -1, sizeof(*node) );
-                node->threshold = classifier->threshold[l];
-                node->left      = classifier->left[l];
-                node->right     = classifier->right[l];
-
-                if( fabs(feature->rect[2].weight) < DBL_EPSILON ||
-                        feature->rect[2].r.width == 0 ||
-                        feature->rect[2].r.height == 0 )
-                {
-                    node->p[2][0] = 0;
-                    node->p[2][1] = 0;
-                    node->p[2][2] = 0;
-                    node->p[2][3] = 0;
-                    node->weight[2] = 0;
-                }
-                else
-                    hid_stage_classifier->two_rects = 0;
-
-                memcpy( node->alpha, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
-                haar_node_ptr = haar_node_ptr + 1;
-            }
-            out->is_stump_based &= node_count == 1;
-        }
-    }
-
-    cascade->hid_cascade = (CvHidHaarClassifierCascade *)out;
-    assert( (char *)haar_node_ptr - (char *)out <= datasize );
-
-    return out;
-}
-
-
-#define sum_elem_ptr(sum,row,col)  \
-    ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
-
-#define sqsum_elem_ptr(sqsum,row,col)  \
-    ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
-
-#define calc_sum(rect,offset) \
-    ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
-
-
-static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade,
-                                      double scale,
-                                      int step)
-{
-    GpuHidHaarClassifierCascade *cascade;
-    int coi0 = 0, coi1 = 0;
-    int i;
-    int datasize;
-    int total;
-    CvRect equRect;
-    double weight_scale;
-    GpuHidHaarStageClassifier *stage_classifier;
-
-    if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
-        CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier pointer" );
-
-    if( scale <= 0 )
-        CV_Error( CV_StsOutOfRange, "Scale must be positive" );
-
-    if( coi0 || coi1 )
-        CV_Error( CV_BadCOI, "COI is not supported" );
-
-    if( !_cascade->hid_cascade )
-        gpuCreateHidHaarClassifierCascade(_cascade, &datasize, &total);
-
-    cascade = (GpuHidHaarClassifierCascade *) _cascade->hid_cascade;
-    stage_classifier = (GpuHidHaarStageClassifier *) (cascade + 1);
-
-    _cascade->scale = scale;
-    _cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale );
-    _cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale );
-
-    equRect.x = equRect.y = cvRound(scale);
-    equRect.width = cvRound((_cascade->orig_window_size.width - 2) * scale);
-    equRect.height = cvRound((_cascade->orig_window_size.height - 2) * scale);
-    weight_scale = 1. / (equRect.width * equRect.height);
-    cascade->inv_window_area = weight_scale;
-
-    cascade->pq0 = equRect.x;
-    cascade->pq1 = equRect.y;
-    cascade->pq2 = equRect.x + equRect.width;
-    cascade->pq3 = equRect.y + equRect.height;
-
-    cascade->p0 = equRect.x;
-    cascade->p1 = equRect.y;
-    cascade->p2 = equRect.x + equRect.width;
-    cascade->p3 = equRect.y + equRect.height;
-
-
-    /* init pointers in haar features according to real window size and
-    given image pointers */
-    for( i = 0; i < _cascade->count; i++ )
-    {
-        int j, k, l;
-        for( j = 0; j < stage_classifier[i].count; j++ )
-        {
-            for( l = 0; l < stage_classifier[i].classifier[j].count; l++ )
-            {
-                CvHaarFeature *feature =
-                    &_cascade->stage_classifier[i].classifier[j].haar_feature[l];
-                GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l];
-                double sum0 = 0, area0 = 0;
-                CvRect r[3];
-
-                int base_w = -1, base_h = -1;
-                int new_base_w = 0, new_base_h = 0;
-                int kx, ky;
-                int flagx = 0, flagy = 0;
-                int x0 = 0, y0 = 0;
-                int nr;
-
-                /* align blocks */
-                for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
-                {
-                    if(!hidnode->p[k][0])
-                        break;
-                    r[k] = feature->rect[k].r;
-                    base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width - 1) );
-                    base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x - 1) );
-                    base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height - 1) );
-                    base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y - 1) );
-                }
-
-                nr = k;
-                base_w += 1;
-                base_h += 1;
-                if(base_w == 0)
-                    base_w = 1;
-                kx = r[0].width / base_w;
-                if(base_h == 0)
-                    base_h = 1;
-                ky = r[0].height / base_h;
-
-                if( kx <= 0 )
-                {
-                    flagx = 1;
-                    new_base_w = cvRound( r[0].width * scale ) / kx;
-                    x0 = cvRound( r[0].x * scale );
-                }
-
-                if( ky <= 0 )
-                {
-                    flagy = 1;
-                    new_base_h = cvRound( r[0].height * scale ) / ky;
-                    y0 = cvRound( r[0].y * scale );
-                }
-
-                for( k = 0; k < nr; k++ )
-                {
-                    CvRect tr;
-                    double correction_ratio;
-
-                    if( flagx )
-                    {
-                        tr.x = (r[k].x - r[0].x) * new_base_w / base_w + x0;
-                        tr.width = r[k].width * new_base_w / base_w;
-                    }
-                    else
-                    {
-                        tr.x = cvRound( r[k].x * scale );
-                        tr.width = cvRound( r[k].width * scale );
-                    }
-
-                    if( flagy )
-                    {
-                        tr.y = (r[k].y - r[0].y) * new_base_h / base_h + y0;
-                        tr.height = r[k].height * new_base_h / base_h;
-                    }
-                    else
-                    {
-                        tr.y = cvRound( r[k].y * scale );
-                        tr.height = cvRound( r[k].height * scale );
-                    }
-
-#if CV_ADJUST_WEIGHTS
-                    {
-                        // RAINER START
-                        const float orig_feature_size =  (float)(feature->rect[k].r.width) * feature->rect[k].r.height;
-                        const float orig_norm_size = (float)(_cascade->orig_window_size.width) * (_cascade->orig_window_size.height);
-                        const float feature_size = float(tr.width * tr.height);
-                        //const float normSize    = float(equRect.width*equRect.height);
-                        float target_ratio = orig_feature_size / orig_norm_size;
-                        //float isRatio = featureSize / normSize;
-                        //correctionRatio = targetRatio / isRatio / normSize;
-                        correction_ratio = target_ratio / feature_size;
-                        // RAINER END
-                    }
-#else
-                    correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5);
-#endif
-
-                    if( !feature->tilted )
-                    {
-                        hidnode->p[k][0] = tr.x;
-                        hidnode->p[k][1] = tr.y;
-                        hidnode->p[k][2] = tr.x + tr.width;
-                        hidnode->p[k][3] = tr.y + tr.height;
-                    }
-                    else
-                    {
-                        hidnode->p[k][2] = (tr.y + tr.width) * step + tr.x + tr.width;
-                        hidnode->p[k][3] = (tr.y + tr.width + tr.height) * step + tr.x + tr.width - tr.height;
-                        hidnode->p[k][0] = tr.y * step + tr.x;
-                        hidnode->p[k][1] = (tr.y + tr.height) * step + tr.x - tr.height;
-                    }
-                    hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
-                    if( k == 0 )
-                        area0 = tr.width * tr.height;
-                    else
-                        sum0 += hidnode->weight[k] * tr.width * tr.height;
-                }
-                hidnode->weight[0] = (float)(-sum0 / area0);
-            } /* l */
-        } /* j */
-    }
-}
-
-static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade)
-{
-    GpuHidHaarClassifierCascade *cascade;
-    int i;
-    int datasize;
-    int total;
-    CvRect equRect;
-    double weight_scale;
-    GpuHidHaarStageClassifier *stage_classifier;
-
-    if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
-        CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier pointer" );
-
-    if( !_cascade->hid_cascade )
-        gpuCreateHidHaarClassifierCascade(_cascade, &datasize, &total);
-
-    cascade = (GpuHidHaarClassifierCascade *) _cascade->hid_cascade;
-    stage_classifier = (GpuHidHaarStageClassifier *) cascade + 1;
-
-    _cascade->scale = 1.0;
-    _cascade->real_window_size.width =  _cascade->orig_window_size.width ;
-    _cascade->real_window_size.height = _cascade->orig_window_size.height;
-
-    equRect.x = equRect.y = 1;
-    equRect.width = _cascade->orig_window_size.width - 2;
-    equRect.height = _cascade->orig_window_size.height - 2;
-    weight_scale = 1;
-    cascade->inv_window_area = weight_scale;
-
-    cascade->p0 = equRect.x;
-    cascade->p1 = equRect.y;
-    cascade->p2 = equRect.height;
-    cascade->p3 = equRect.width ;
-    for( i = 0; i < _cascade->count; i++ )
-    {
-        int j, l;
-        for( j = 0; j < stage_classifier[i].count; j++ )
-        {
-            for( l = 0; l < stage_classifier[i].classifier[j].count; l++ )
-            {
-                const CvHaarFeature *feature =
-                    &_cascade->stage_classifier[i].classifier[j].haar_feature[l];
-                GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l];
-
-                for( int k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
-                {
-                    const CvRect tr = feature->rect[k].r;
-                    if (tr.width == 0)
-                        break;
-                    double correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5);
-                    hidnode->p[k][0] = tr.x;
-                    hidnode->p[k][1] = tr.y;
-                    hidnode->p[k][2] = tr.width;
-                    hidnode->p[k][3] = tr.height;
-                    hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
-                }
-            } /* l */
-        } /* j */
-    }
-}
-void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv::Rect>& faces,
-                                            double scaleFactor, int minNeighbors, int flags,
-                                            Size minSize, Size maxSize)
-//CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemStorage *storage, double scaleFactor,
-//        int minNeighbors, int flags, CvSize minSize, CvSize maxSize)
-{
-    CvHaarClassifierCascade *cascade = (CvHaarClassifierCascade*)getOldCascade();
-
-    const double GROUP_EPS = 0.2;
-
-    cv::ConcurrentRectVector allCandidates;
-    std::vector<cv::Rect> rectList;
-    std::vector<int> rweights;
-    double factor;
-    int datasize=0;
-    int totalclassifier=0;
-
-    GpuHidHaarClassifierCascade *gcascade;
-    GpuHidHaarStageClassifier    *stage;
-    GpuHidHaarClassifier         *classifier;
-    GpuHidHaarTreeNode           *node;
-
-    int *candidate;
-    cl_int status;
-
-    bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
-
-    if( maxSize.height == 0 || maxSize.width == 0 )
-    {
-        maxSize.height = gimg.rows;
-        maxSize.width = gimg.cols;
-    }
-
-    if( !CV_IS_HAAR_CLASSIFIER(cascade) )
-        CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
-
-    //if( !storage )
-    //    CV_Error( CV_StsNullPtr, "Null storage pointer" );
-
-    if( CV_MAT_DEPTH(gimg.type()) != CV_8U )
-        CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
-
-    if( scaleFactor <= 1 )
-        CV_Error( CV_StsOutOfRange, "scale factor must be > 1" );
-
-    if( findBiggestObject )
-        flags &= ~CV_HAAR_SCALE_IMAGE;
-
-    if( !cascade->hid_cascade )
-        gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
-
-    //result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), storage );
-
-    if( CV_MAT_CN(gimg.type()) > 1 )
-    {
-        oclMat gtemp;
-        cvtColor( gimg, gtemp, COLOR_BGR2GRAY );
-        gimg = gtemp;
-    }
-
-    if( findBiggestObject )
-        flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING);
-
-    if( gimg.cols < minSize.width || gimg.rows < minSize.height )
-        CV_Error(CV_StsError, "Image too small");
-
-    cl_command_queue qu = getClCommandQueue(Context::getContext());
-    if( (flags & CV_HAAR_SCALE_IMAGE) )
-    {
-        CvSize winSize0 = cascade->orig_window_size;
-        int totalheight = 0;
-        int indexy = 0;
-        CvSize sz;
-        std::vector<CvSize> sizev;
-        std::vector<float> scalev;
-        for(factor = 1.f;; factor *= scaleFactor)
-        {
-            CvSize winSize( cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) );
-            sz.width     = cvRound( gimg.cols / factor ) + 1;
-            sz.height    = cvRound( gimg.rows / factor ) + 1;
-            CvSize sz1( sz.width - winSize0.width - 1,      sz.height - winSize0.height - 1 );
-
-            if( sz1.width <= 0 || sz1.height <= 0 )
-                break;
-            if( winSize.width > maxSize.width || winSize.height > maxSize.height )
-                break;
-            if( winSize.width < minSize.width || winSize.height < minSize.height )
-                continue;
-
-            totalheight += sz.height;
-            sizev.push_back(sz);
-            scalev.push_back(factor);
-        }
-
-        oclMat gimg1(gimg.rows, gimg.cols, CV_8UC1);
-        oclMat gsum(totalheight + 4, gimg.cols + 1, CV_32SC1);
-        oclMat gsqsum(totalheight + 4, gimg.cols + 1, CV_32FC1);
-
-        int sdepth = 0;
-        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-            sdepth = CV_64FC1;
-        else
-            sdepth = CV_32FC1;
-        sdepth = CV_MAT_DEPTH(sdepth);
-        int type = CV_MAKE_TYPE(sdepth, 1);
-        oclMat gsqsum_t(totalheight + 4, gimg.cols + 1, type);
-
-        cl_mem stagebuffer;
-        cl_mem nodebuffer;
-        cl_mem candidatebuffer;
-        cl_mem scaleinfobuffer;
-        cv::Rect roi, roi2;
-        cv::Mat imgroi, imgroisq;
-        cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
-
-        int grp_per_CU = 12;
-
-        size_t blocksize = 8;
-        size_t localThreads[3] = { blocksize, blocksize , 1 };
-        size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->getDeviceInfo().maxComputeUnits) *localThreads[0],
-                                    localThreads[1], 1
-                                  };
-        int outputsz = 256 * globalThreads[0] / localThreads[0];
-        int loopcount = sizev.size();
-        detect_piramid_info *scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
-
-        for( int i = 0; i < loopcount; i++ )
-        {
-            sz = sizev[i];
-            factor = scalev[i];
-            roi = Rect(0, indexy, sz.width, sz.height);
-            roi2 = Rect(0, 0, sz.width - 1, sz.height - 1);
-            resizeroi = gimg1(roi2);
-            gimgroi = gsum(roi);
-            gimgroisq = gsqsum_t(roi);
-            int width = gimgroi.cols - 1 - cascade->orig_window_size.width;
-            int height = gimgroi.rows - 1 - cascade->orig_window_size.height;
-            scaleinfo[i].width_height = (width << 16) | height;
-
-
-            int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
-            int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
-
-            scaleinfo[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
-            scaleinfo[i].imgoff = gimgroi.offset >> 2;
-            scaleinfo[i].factor = factor;
-            cv::ocl::resize(gimg, resizeroi, Size(sz.width - 1, sz.height - 1), 0, 0, INTER_LINEAR);
-            cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
-
-            indexy += sz.height;
-        }
-        if(gsqsum_t.depth() == CV_64F)
-            gsqsum_t.convertTo(gsqsum, CV_32FC1);
-        else
-            gsqsum = gsqsum_t;
-
-        gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
-        stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
-        classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
-        node       = (GpuHidHaarTreeNode *)(classifier->node);
-
-        int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
-                       sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
-
-        candidate = (int *)malloc(4 * sizeof(int) * outputsz);
-
-        gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
-
-        stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
-        openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
-
-        nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
-
-        openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0, nodenum * sizeof(GpuHidHaarTreeNode),
-                                            node, 0, NULL, NULL));
-        candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz);
-
-        scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
-        openCLSafeCall(clEnqueueWriteBuffer(qu, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
-
-        int startstage = 0;
-        int endstage = gcascade->count;
-        int startnode = 0;
-        int pixelstep = gsum.step / 4;
-        int splitstage = 3;
-        int splitnode = stage[0].count + stage[1].count + stage[2].count;
-        cl_int4 p, pq;
-        p.s[0] = gcascade->p0;
-        p.s[1] = gcascade->p1;
-        p.s[2] = gcascade->p2;
-        p.s[3] = gcascade->p3;
-        pq.s[0] = gcascade->pq0;
-        pq.s[1] = gcascade->pq1;
-        pq.s[2] = gcascade->pq2;
-        pq.s[3] = gcascade->pq3;
-        float correction = gcascade->inv_window_area;
-
-        std::vector<std::pair<size_t, const void *> > args;
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&pixelstep ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&loopcount ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startstage ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitstage ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&endstage ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnode ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitnode ));
-        args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&p ));
-        args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&pq ));
-        args.push_back ( std::make_pair(sizeof(cl_float) , (void *)&correction ));
-
-        if(gcascade->is_stump_based && gsum.clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE))
-        {
-            //setup local group size for "pixel step" = 1
-            localThreads[0] = 16;
-            localThreads[1] = 32;
-            localThreads[2] = 1;
-
-            //calc maximal number of workgroups
-            int WGNumX = 1+(sizev[0].width /(localThreads[0]));
-            int WGNumY = 1+(sizev[0].height/(localThreads[1]));
-            int WGNumZ = loopcount;
-            int WGNumTotal = 0; //accurate number of non-empty workgroups
-            int WGNumSampled = 0; //accurate number of workgroups processed only 1/4 part of all pixels. it is made for large images with scale <= 2
-            oclMat      oclWGInfo(1,sizeof(cl_int4) * WGNumX*WGNumY*WGNumZ,CV_8U);
-            {
-                cl_int4*    pWGInfo = (cl_int4*)clEnqueueMapBuffer(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,true,CL_MAP_WRITE, 0, oclWGInfo.step, 0,0,0,&status);
-                openCLVerifyCall(status);
-                for(int z=0;z<WGNumZ;++z)
-                {
-                    int     Width  = (scaleinfo[z].width_height >> 16)&0xFFFF;
-                    int     Height = (scaleinfo[z].width_height >> 0 )& 0xFFFF;
-                    for(int y=0;y<WGNumY;++y)
-                    {
-                        int     gy = y*localThreads[1];
-                        if(gy>=(Height-cascade->orig_window_size.height))
-                            continue; // no data to process
-                        for(int x=0;x<WGNumX;++x)
-                        {
-                            int     gx = x*localThreads[0];
-                            if(gx>=(Width-cascade->orig_window_size.width))
-                                continue; // no data to process
-
-                            if(scaleinfo[z].factor<=2)
-                            {
-                                WGNumSampled++;
-                            }
-                            // save no-empty workgroup info into array
-                            pWGInfo[WGNumTotal].s[0] = scaleinfo[z].width_height;
-                            pWGInfo[WGNumTotal].s[1] = (gx << 16) | gy;
-                            pWGInfo[WGNumTotal].s[2] = scaleinfo[z].imgoff;
-                            memcpy(&(pWGInfo[WGNumTotal].s[3]),&(scaleinfo[z].factor),sizeof(float));
-                            WGNumTotal++;
-                        }
-                    }
-                }
-                openCLSafeCall(clEnqueueUnmapMemObject(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,pWGInfo,0,0,0));
-                pWGInfo = NULL;
-            }
-
-#define NODE_SIZE 12
-            // pack node info to have less memory loads on the device side
-            oclMat  oclNodesPK(1,sizeof(cl_int) * NODE_SIZE * nodenum,CV_8U);
-            {
-                cl_int  status;
-                cl_int* pNodesPK = (cl_int*)clEnqueueMapBuffer(getClCommandQueue(oclNodesPK.clCxt),(cl_mem)oclNodesPK.datastart,true,CL_MAP_WRITE, 0, oclNodesPK.step, 0,0,0,&status);
-                openCLVerifyCall(status);
-                //use known local data stride to precalulate indexes
-                int DATA_SIZE_X = (localThreads[0]+cascade->orig_window_size.width);
-                // check that maximal value is less than maximal unsigned short
-                assert(DATA_SIZE_X*cascade->orig_window_size.height+cascade->orig_window_size.width < (int)USHRT_MAX);
-                for(int i = 0;i<nodenum;++i)
-                {//process each node from classifier
-                    struct NodePK
-                    {
-                        unsigned short  slm_index[3][4];
-                        float           weight[3];
-                        float           threshold;
-                        float           alpha[2];
-                    };
-                    struct NodePK * pOut = (struct NodePK *)(pNodesPK + NODE_SIZE*i);
-                    for(int k=0;k<3;++k)
-                    {// calc 4 short indexes in shared local mem for each rectangle instead of 2 (x,y) pair.
-                        int* p = &(node[i].p[k][0]);
-                        pOut->slm_index[k][0] = (unsigned short)(p[1]*DATA_SIZE_X+p[0]);
-                        pOut->slm_index[k][1] = (unsigned short)(p[1]*DATA_SIZE_X+p[2]);
-                        pOut->slm_index[k][2] = (unsigned short)(p[3]*DATA_SIZE_X+p[0]);
-                        pOut->slm_index[k][3] = (unsigned short)(p[3]*DATA_SIZE_X+p[2]);
-                    }
-                    //store used float point values for each node
-                    pOut->weight[0] = node[i].weight[0];
-                    pOut->weight[1] = node[i].weight[1];
-                    pOut->weight[2] = node[i].weight[2];
-                    pOut->threshold = node[i].threshold;
-                    pOut->alpha[0] = node[i].alpha[0];
-                   pOut->alpha[1] = node[i].alpha[1];
-                }
-                openCLSafeCall(clEnqueueUnmapMemObject(getClCommandQueue(oclNodesPK.clCxt),(cl_mem)oclNodesPK.datastart,pNodesPK,0,0,0));
-                pNodesPK = NULL;
-            }
-            // add 2 additional buffers (WGinfo and packed nodes) as 2 last args
-            args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&oclNodesPK.datastart ));
-            args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&oclWGInfo.datastart ));
-
-            //form build options for kernel
-            String  options = "-D PACKED_CLASSIFIER";
-            options += format(" -D NODE_SIZE=%d",NODE_SIZE);
-            options += format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width);
-            options += format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height);
-            options += format(" -D STUMP_BASED=%d",gcascade->is_stump_based);
-            options += format(" -D SPLITNODE=%d",splitnode);
-            options += format(" -D SPLITSTAGE=%d",splitstage);
-            options += format(" -D OUTPUTSZ=%d",outputsz);
-
-            // init candiate global count by 0
-            int pattern = 0;
-            openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL));
-
-            if(WGNumTotal>WGNumSampled)
-            {// small images and each pixel is processed
-                // setup global sizes to have linear array of workgroups with WGNum size
-                int     pixelstep = 1;
-                size_t  LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
-                globalThreads[0] = LS[0]*(WGNumTotal-WGNumSampled);
-                globalThreads[1] = LS[1];
-                globalThreads[2] = 1;
-                String options1 = options;
-                options1 += format(" -D PIXEL_STEP=%d",pixelstep);
-                options1 += format(" -D WGSTART=%d",WGNumSampled);
-                options1 += format(" -D LSx=%d",LS[0]);
-                options1 += format(" -D LSy=%d",LS[1]);
-                // execute face detector
-                openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options1.c_str());
-            }
-            if(WGNumSampled>0)
-            {// large images each 4th pixel is processed
-                // setup global sizes to have linear array of workgroups with WGNum size
-                int     pixelstep = 2;
-                size_t  LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
-                globalThreads[0] = LS[0]*WGNumSampled;
-                globalThreads[1] = LS[1];
-                globalThreads[2] = 1;
-                String options2 = options;
-                options2 += format(" -D PIXEL_STEP=%d",pixelstep);
-                options2 += format(" -D WGSTART=%d",0);
-                options2 += format(" -D LSx=%d",LS[0]);
-                options2 += format(" -D LSy=%d",LS[1]);
-                // execute face detector
-                openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options2.c_str());
-            }
-            //read candidate buffer back and put it into host list
-            openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
-            assert(candidate[0]<outputsz);
-            //printf("candidate[0]=%d\n",candidate[0]);
-            for(int i = 1; i <= candidate[0]; i++)
-            {
-                allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],candidate[4 * i + 2], candidate[4 * i + 3]));
-            }
-        }
-        else
-        {
-            const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
-
-            openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
-
-            openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
-
-            for(int i = 0; i < outputsz; i++)
-                if(candidate[4 * i + 2] != 0)
-                    allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
-                    candidate[4 * i + 2], candidate[4 * i + 3]));
-        }
-
-        free(scaleinfo);
-        free(candidate);
-        openCLSafeCall(clReleaseMemObject(stagebuffer));
-        openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
-        openCLSafeCall(clReleaseMemObject(nodebuffer));
-        openCLSafeCall(clReleaseMemObject(candidatebuffer));
-
-    }
-    else
-    {
-        CvSize winsize0 = cascade->orig_window_size;
-        int n_factors = 0;
-        oclMat gsum;
-        oclMat gsqsum;
-        oclMat gsqsum_t;
-        cv::ocl::integral(gimg, gsum, gsqsum_t);
-        if(gsqsum_t.depth() == CV_64F)
-            gsqsum_t.convertTo(gsqsum, CV_32FC1);
-        else
-            gsqsum = gsqsum_t;
-        CvSize sz;
-        std::vector<CvSize> sizev;
-        std::vector<float> scalev;
-        gpuSetHaarClassifierCascade(cascade);
-        gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
-        stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
-        classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
-        node       = (GpuHidHaarTreeNode *)(classifier->node);
-        cl_mem stagebuffer;
-        cl_mem nodebuffer;
-        cl_mem candidatebuffer;
-        cl_mem scaleinfobuffer;
-        cl_mem pbuffer;
-        cl_mem correctionbuffer;
-        for( n_factors = 0, factor = 1;
-                cvRound(factor * winsize0.width) < gimg.cols - 10 &&
-                cvRound(factor * winsize0.height) < gimg.rows - 10;
-                n_factors++, factor *= scaleFactor )
-        {
-            CvSize winSize( cvRound( winsize0.width * factor ), cvRound( winsize0.height * factor ) );
-            if( winSize.width < minSize.width || winSize.height < minSize.height )
-            {
-                continue;
-            }
-            sizev.push_back(winSize);
-            scalev.push_back(factor);
-        }
-        int loopcount = scalev.size();
-        if(loopcount == 0)
-        {
-            loopcount = 1;
-            n_factors = 1;
-            sizev.push_back(minSize);
-            scalev.push_back( std::min(cvRound(minSize.width / winsize0.width), cvRound(minSize.height / winsize0.height)) );
-        }
-        detect_piramid_info *scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
-        cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * loopcount);
-        float *correction = (float *)malloc(sizeof(float) * loopcount);
-        int grp_per_CU = 12;
-        size_t blocksize = 8;
-        size_t localThreads[3] = { blocksize, blocksize , 1 };
-        size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->getDeviceInfo().maxComputeUnits *localThreads[0],
-                                    localThreads[1], 1 };
-        int outputsz = 256 * globalThreads[0] / localThreads[0];
-        int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
-                       sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
-        nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
-                                        nodenum * sizeof(GpuHidHaarTreeNode));
-        openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0,
-                                            nodenum * sizeof(GpuHidHaarTreeNode),
-                                            node, 0, NULL, NULL));
-        cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
-                               loopcount * nodenum * sizeof(GpuHidHaarTreeNode));
-        int startstage = 0;
-        int endstage = gcascade->count;
-        for(int i = 0; i < loopcount; i++)
-        {
-            sz = sizev[i];
-            factor = scalev[i];
-            double ystep = std::max(2., factor);
-            int equRect_x = cvRound(factor * gcascade->p0);
-            int equRect_y = cvRound(factor * gcascade->p1);
-            int equRect_w = cvRound(factor * gcascade->p3);
-            int equRect_h = cvRound(factor * gcascade->p2);
-            p[i].s[0] = equRect_x;
-            p[i].s[1] = equRect_y;
-            p[i].s[2] = equRect_x + equRect_w;
-            p[i].s[3] = equRect_y + equRect_h;
-            correction[i] = 1. / (equRect_w * equRect_h);
-            int width = (gsum.cols - 1 - sz.width  + ystep - 1) / ystep;
-            int height = (gsum.rows - 1 - sz.height + ystep - 1) / ystep;
-            int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
-            int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
-
-            scaleinfo[i].width_height = (width << 16) | height;
-            scaleinfo[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
-            scaleinfo[i].imgoff = 0;
-            scaleinfo[i].factor = factor;
-            int startnodenum = nodenum * i;
-            float factor2 = (float)factor;
-
-            std::vector<std::pair<size_t, const void *> > args1;
-            args1.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
-            args1.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
-            args1.push_back ( std::make_pair(sizeof(cl_float) , (void *)&factor2 ));
-            args1.push_back ( std::make_pair(sizeof(cl_float) , (void *)&correction[i] ));
-            args1.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnodenum ));
-
-            size_t globalThreads2[3] = {nodenum, 1, 1};
-            openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
-        }
-
-        int step = gsum.step / 4;
-        int startnode = 0;
-        int splitstage = 3;
-        stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
-        openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
-        candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz);
-        scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
-        openCLSafeCall(clEnqueueWriteBuffer(qu, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
-        pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
-        openCLSafeCall(clEnqueueWriteBuffer(qu, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
-        correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
-        openCLSafeCall(clEnqueueWriteBuffer(qu, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
-
-        std::vector<std::pair<size_t, const void *> > args;
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&step ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&loopcount ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startstage ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitstage ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&endstage ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnode ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
-        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&nodenum ));
-        const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
-        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
-
-        candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
-
-        for(int i = 0; i < outputsz; i++)
-        {
-            if(candidate[4 * i + 2] != 0)
-                allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3]));
-        }
-
-        free(scaleinfo);
-        free(p);
-        free(correction);
-        clEnqueueUnmapMemObject(qu, candidatebuffer, candidate, 0, 0, 0);
-        openCLSafeCall(clReleaseMemObject(stagebuffer));
-        openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
-        openCLSafeCall(clReleaseMemObject(nodebuffer));
-        openCLSafeCall(clReleaseMemObject(newnodebuffer));
-        openCLSafeCall(clReleaseMemObject(candidatebuffer));
-        openCLSafeCall(clReleaseMemObject(pbuffer));
-        openCLSafeCall(clReleaseMemObject(correctionbuffer));
-    }
-
-    cvFree(&cascade->hid_cascade);
-    rectList.resize(allCandidates.size());
-    if(!allCandidates.empty())
-        std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
-
-    if( minNeighbors != 0 || findBiggestObject )
-        groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS);
-    else
-        rweights.resize(rectList.size(), 0);
-
-    faces.clear();
-    if( findBiggestObject && rectList.size() )
-    {
-        Rect result_comp(0, 0, 0, 0);
-        for( size_t i = 0; i < rectList.size(); i++ )
-        {
-            cv::Rect r = rectList[i];
-            if( r.area() > result_comp.area() )
-            {
-                result_comp = r;
-            }
-        }
-        faces.push_back(result_comp);
-    }
-    else
-    {
-        faces = rectList;
-    }
-}
diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp
deleted file mode 100644
index 3bdb382..0000000
--- a/modules/ocl/src/hog.cpp
+++ /dev/null
@@ -1,1962 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//     Wenju He, wenju@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-#define CELL_WIDTH 8
-#define CELL_HEIGHT 8
-#define CELLS_PER_BLOCK_X 2
-#define CELLS_PER_BLOCK_Y 2
-#define NTHREADS 256
-
-static oclMat gauss_w_lut;
-static bool hog_device_cpu;
-
-namespace cv
-{
-    namespace ocl
-    {
-        namespace device
-        {
-            namespace hog
-            {
-                int cnbins;
-                int cblock_stride_x;
-                int cblock_stride_y;
-                int cnblocks_win_x;
-                int cnblocks_win_y;
-                int cblock_hist_size;
-                int cdescr_size;
-                int cdescr_width;
-                int cdescr_height;
-
-                // A shift value and type that allows qangle to be different
-                // sizes on different hardware
-                int qangle_step_shift;
-                int qangle_type;
-
-                void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
-                                      int nblocks_win_x, int nblocks_win_y);
-
-                void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
-                                   int height, int width, const cv::ocl::oclMat &grad,
-                                   const cv::ocl::oclMat &qangle,
-                                   const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists);
-
-                void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
-                                     int height, int width, cv::ocl::oclMat &block_hists,
-                                     float threshold);
-
-                void classify_hists(int win_height, int win_width, int block_stride_y,
-                                    int block_stride_x, int win_stride_y, int win_stride_x,
-                                    int height, int width, const cv::ocl::oclMat &block_hists,
-                                    const cv::ocl::oclMat &coefs, float free_coef,
-                                    float threshold, cv::ocl::oclMat &labels);
-
-                void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y,
-                                            int block_stride_x, int win_stride_y, int win_stride_x,
-                                            int height, int width, const cv::ocl::oclMat &block_hists,
-                                            cv::ocl::oclMat &descriptors);
-                void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y,
-                                            int block_stride_x, int win_stride_y, int win_stride_x,
-                                            int height, int width, const cv::ocl::oclMat &block_hists,
-                                            cv::ocl::oclMat &descriptors);
-
-                void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
-                                            float angle_scale, cv::ocl::oclMat &grad,
-                                            cv::ocl::oclMat &qangle, bool correct_gamma);
-                void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
-                                            float angle_scale, cv::ocl::oclMat &grad,
-                                            cv::ocl::oclMat &qangle, bool correct_gamma);
-            }
-        }
-    }
-}
-
-using namespace ::cv::ocl::device;
-
-cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_,
-                                      Size cell_size_, int nbins_, double win_sigma_,
-                                      double threshold_L2hys_, bool gamma_correction_, int nlevels_)
-    : win_size(win_size_),
-      block_size(block_size_),
-      block_stride(block_stride_),
-      cell_size(cell_size_),
-      nbins(nbins_),
-      win_sigma(win_sigma_),
-      threshold_L2hys(threshold_L2hys_),
-      gamma_correction(gamma_correction_),
-      nlevels(nlevels_)
-{
-    CV_Assert((win_size.width  - block_size.width ) % block_stride.width  == 0 &&
-              (win_size.height - block_size.height) % block_stride.height == 0);
-
-    CV_Assert(block_size.width % cell_size.width == 0 &&
-        block_size.height % cell_size.height == 0);
-
-    CV_Assert(block_stride == cell_size);
-
-    CV_Assert(cell_size == Size(8, 8));
-
-    Size cells_per_block(block_size.width / cell_size.width,
-        block_size.height / cell_size.height);
-    CV_Assert(cells_per_block == Size(2, 2));
-
-    cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
-    hog::set_up_constants(nbins, block_stride.width, block_stride.height,
-        blocks_per_win.width, blocks_per_win.height);
-
-    effect_size = Size(0, 0);
-
-    if (isCpuDevice())
-        hog_device_cpu = true;
-    else
-        hog_device_cpu = false;
-
-}
-
-size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
-{
-    return numPartsWithin(win_size, block_size, block_stride).area() * getBlockHistogramSize();
-}
-
-size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
-{
-    Size cells_per_block = Size(block_size.width / cell_size.width,
-        block_size.height / cell_size.height);
-    return (size_t)(nbins * cells_per_block.area());
-}
-
-double cv::ocl::HOGDescriptor::getWinSigma() const
-{
-    return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0;
-}
-
-bool cv::ocl::HOGDescriptor::checkDetectorSize() const
-{
-    size_t detector_size = detector.rows * detector.cols;
-    size_t descriptor_size = getDescriptorSize();
-    return detector_size == 0 || detector_size == descriptor_size ||
-        detector_size == descriptor_size + 1;
-}
-
-void cv::ocl::HOGDescriptor::setSVMDetector(const std::vector<float> &_detector)
-{
-    std::vector<float> detector_reordered(_detector.size());
-
-    size_t block_hist_size = getBlockHistogramSize();
-    cv::Size blocks_per_img = numPartsWithin(win_size, block_size, block_stride);
-
-    for (int i = 0; i < blocks_per_img.height; ++i)
-        for (int j = 0; j < blocks_per_img.width; ++j)
-        {
-            const float *src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
-            float *dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
-            for (size_t k = 0; k < block_hist_size; ++k)
-                dst[k] = src[k];
-        }
-
-    this->detector.upload(Mat(detector_reordered).reshape(1, 1));
-
-    size_t descriptor_size = getDescriptorSize();
-    free_coef = _detector.size() > descriptor_size ? _detector[descriptor_size] : 0;
-
-    CV_Assert(checkDetectorSize());
-}
-
-void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
-{
-    if (!image_scale.empty())
-        return;
-
-    if (effect_size == Size(0, 0))
-        effect_size = img.size();
-
-    grad.create(img.size(), CV_32FC2);
-    qangle.create(img.size(), hog::qangle_type);
-
-    const size_t block_hist_size = getBlockHistogramSize();
-    const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
-    block_hists.create(1,
-        static_cast<int>(block_hist_size * blocks_per_img.area()) + 256, CV_32F);
-
-    Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
-    labels.create(1, wins_per_img.area(), CV_8U);
-
-    float sigma = getWinSigma();
-    float scale = 1.f / (2.f * sigma * sigma);
-    Mat gaussian_lut(1, 512, CV_32FC1);
-    int idx = 0;
-    for(int i=-8; i<8; i++)
-        for(int j=-8; j<8; j++)
-            gaussian_lut.at<float>(idx++) = std::exp(-(j * j + i * i) * scale);
-    for(int i=-8; i<8; i++)
-        for(int j=-8; j<8; j++)
-            gaussian_lut.at<float>(idx++) = (8.f - fabs(j + 0.5f)) * (8.f - fabs(i + 0.5f)) / 64.f;
-
-    gauss_w_lut.upload(gaussian_lut);
-}
-
-void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle)
-{
-    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
-
-    float angleScale = (float)(nbins / CV_PI);
-    switch (img.type())
-    {
-    case CV_8UC1:
-        hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img,
-            angleScale, grad, qangle, gamma_correction);
-        break;
-    case CV_8UC4:
-        hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img,
-            angleScale, grad, qangle, gamma_correction);
-        break;
-    }
-}
-
-
-void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
-{
-    computeGradient(img, this->grad, this->qangle);
-
-    hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
-        effect_size.width, grad, qangle, gauss_w_lut, block_hists);
-
-    hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
-        effect_size.width, block_hists, (float)threshold_L2hys);
-}
-
-
-void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
-                                            oclMat &descriptors, int descr_format)
-{
-    CV_Assert(win_stride.width % block_stride.width == 0 &&
-        win_stride.height % block_stride.height == 0);
-
-    init_buffer(img, win_stride);
-
-    computeBlockHistograms(img);
-
-    const size_t block_hist_size = getBlockHistogramSize();
-    Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
-    Size wins_per_img   = numPartsWithin(effect_size, win_size, win_stride);
-
-    descriptors.create(wins_per_img.area(),
-        static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
-
-    switch (descr_format)
-    {
-    case DESCR_FORMAT_ROW_BY_ROW:
-        hog::extract_descrs_by_rows(win_size.height, win_size.width,
-            block_stride.height, block_stride.width, win_stride.height, win_stride.width,
-            effect_size.height, effect_size.width, block_hists, descriptors);
-        break;
-    case DESCR_FORMAT_COL_BY_COL:
-        hog::extract_descrs_by_cols(win_size.height, win_size.width,
-            block_stride.height, block_stride.width, win_stride.height, win_stride.width,
-            effect_size.height, effect_size.width, block_hists, descriptors);
-        break;
-    default:
-        CV_Error(Error::StsBadArg, "Unknown descriptor format");
-    }
-}
-
-
-void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
-                                    double hit_threshold, Size win_stride, Size padding)
-{
-    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
-    CV_Assert(padding == Size(0, 0));
-
-    hits.clear();
-    if (detector.empty())
-        return;
-
-    if (win_stride == Size())
-        win_stride = block_stride;
-    else
-        CV_Assert(win_stride.width % block_stride.width == 0 &&
-            win_stride.height % block_stride.height == 0);
-    init_buffer(img, win_stride);
-
-    computeBlockHistograms(img);
-
-    hog::classify_hists(win_size.height, win_size.width, block_stride.height,
-        block_stride.width, win_stride.height, win_stride.width,
-        effect_size.height, effect_size.width, block_hists, detector,
-        (float)free_coef, (float)hit_threshold, labels);
-
-    labels.download(labels_host);
-    unsigned char *vec = labels_host.ptr();
-    Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
-    for (int i = 0; i < wins_per_img.area(); i++)
-    {
-        int y = i / wins_per_img.width;
-        int x = i - wins_per_img.width * y;
-        if (vec[i])
-            hits.push_back(Point(x * win_stride.width, y * win_stride.height));
-    }
-}
-
-
-
-void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
-                                              double hit_threshold, Size win_stride, Size padding,
-                                              double scale0, int group_threshold)
-{
-    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
-    CV_Assert(scale0 > 1);
-
-    std::vector<double> level_scale;
-    double scale = 1.;
-    int levels = 0;
-
-    for (levels = 0; levels < nlevels; levels++)
-    {
-        level_scale.push_back(scale);
-        if (cvRound(img.cols / scale) < win_size.width ||
-                cvRound(img.rows / scale) < win_size.height || scale0 <= 1)
-            break;
-        scale *= scale0;
-    }
-    levels = std::max(levels, 1);
-    level_scale.resize(levels);
-
-    std::vector<Rect> all_candidates;
-    std::vector<Point> locations;
-
-    if (win_stride == Size())
-        win_stride = block_stride;
-    else
-        CV_Assert(win_stride.width % block_stride.width == 0 &&
-            win_stride.height % block_stride.height == 0);
-    init_buffer(img, win_stride);
-    image_scale.create(img.size(), img.type());
-
-    for (size_t i = 0; i < level_scale.size(); i++)
-    {
-        scale = level_scale[i];
-        effect_size = Size(cvRound(img.cols / scale), cvRound(img.rows / scale));
-        if (effect_size == img.size())
-        {
-            detect(img, locations, hit_threshold, win_stride, padding);
-        }
-        else
-        {
-            resize(img, image_scale, effect_size);
-            detect(image_scale, locations, hit_threshold, win_stride, padding);
-        }
-        Size scaled_win_size(cvRound(win_size.width * scale),
-            cvRound(win_size.height * scale));
-        for (size_t j = 0; j < locations.size(); j++)
-            all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size));
-    }
-
-    found_locations.assign(all_candidates.begin(), all_candidates.end());
-    groupRectangles(found_locations, group_threshold, 0.2);
-}
-
-int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
-{
-    return (size - part_size + stride) / stride;
-}
-
-cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size,
-                                                cv::Size stride)
-{
-    return Size(numPartsWithin(size.width, part_size.width, stride.width),
-        numPartsWithin(size.height, part_size.height, stride.height));
-}
-
-std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
-{
-    return getPeopleDetector64x128();
-}
-
-std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
-{
-    static const float detector[] =
-    {
-        0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f,
-        0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f,
-        0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f,
-        0.254676f, -0.069235f, 0.082566f, 0.147260f, 0.326969f, 0.148888f,
-        0.055270f, -0.087985f, 0.261720f, 0.143442f, 0.026812f, 0.238212f,
-        0.194020f, 0.056341f, -0.025854f, -0.034444f, -0.156631f, 0.205174f,
-        0.089008f, -0.139811f, -0.100147f, -0.037830f, -0.029230f, -0.055641f,
-        0.033248f, -0.016512f, 0.155244f, 0.247315f, -0.124694f, -0.048414f,
-        -0.062219f, 0.193683f, 0.004574f, 0.055089f, 0.093565f, 0.167712f,
-        0.167581f, 0.018895f, 0.215258f, 0.122609f, 0.090520f, -0.067219f,
-        -0.049029f, -0.099615f, 0.241804f, -0.094893f, -0.176248f, 0.001727f,
-        -0.134473f, 0.104442f, 0.050942f, 0.081165f, 0.072156f, 0.121646f,
-        0.002656f, -0.297974f, -0.133587f, -0.060121f, -0.092515f, -0.048974f,
-        -0.084754f, -0.180111f, -0.038590f, 0.086283f, -0.134636f, -0.107249f,
-        0.132890f, 0.141556f, 0.249425f, 0.130273f, -0.030031f, 0.073212f,
-        -0.008155f, 0.019931f, 0.071688f, 0.000300f, -0.019525f, -0.021725f,
-        -0.040993f, -0.086841f, 0.070124f, 0.240033f, 0.265350f, 0.043208f,
-        0.166754f, 0.091453f, 0.060916f, -0.036972f, -0.091043f, 0.079873f,
-        0.219781f, 0.158102f, -0.140618f, -0.043016f, 0.124802f, 0.093668f,
-        0.103208f, 0.094872f, 0.080541f, 0.137711f, 0.160566f, -0.169231f,
-        0.013983f, 0.309508f, -0.004217f, -0.057200f, -0.064489f, 0.014066f,
-        0.361009f, 0.251328f, -0.080983f, -0.044183f, 0.061436f, -0.037381f,
-        -0.078786f, 0.030993f, 0.066314f, 0.037683f, 0.152325f, -0.091683f,
-        0.070203f, 0.217856f, 0.036435f, -0.076462f, 0.006254f, -0.094431f,
-        0.154829f, -0.023038f, -0.196961f, -0.024594f, 0.178465f, -0.050139f,
-        -0.045932f, -0.000965f, 0.109112f, 0.046165f, -0.159373f, -0.008713f,
-        0.041307f, 0.097129f, -0.057211f, -0.064599f, 0.077165f, 0.176167f,
-        0.138322f, 0.065753f, -0.104950f, 0.017933f, 0.136255f, -0.011598f,
-        0.047007f, 0.080550f, 0.068619f, 0.084661f, -0.035493f, -0.091314f,
-        -0.041411f, 0.060971f, -0.101912f, -0.079870f, -0.085977f, -0.022686f,
-        0.079788f, -0.098064f, -0.054603f, 0.040383f, 0.300794f, 0.128603f,
-        0.094844f, 0.047407f, 0.101825f, 0.061832f, -0.162160f, -0.204553f,
-        -0.035165f, 0.101450f, -0.016641f, -0.027140f, -0.134392f, -0.008743f,
-        0.102331f, 0.114853f, 0.009644f, 0.062823f, 0.237339f, 0.167843f,
-        0.053066f, -0.012592f, 0.043158f, 0.002305f, 0.065001f, -0.038929f,
-        -0.020356f, 0.152343f, 0.043469f, -0.029967f, -0.042948f, 0.032481f,
-        0.068488f, -0.110840f, -0.111083f, 0.111980f, -0.002072f, -0.005562f,
-        0.082926f, 0.006635f, -0.108153f, 0.024242f, -0.086464f, -0.189884f,
-        -0.017492f, 0.191456f, -0.007683f, -0.128769f, -0.038017f, -0.132380f,
-        0.091926f, 0.079696f, -0.106728f, -0.007656f, 0.172744f, 0.011576f,
-        0.009883f, 0.083258f, -0.026516f, 0.145534f, 0.153924f, -0.130290f,
-        -0.108945f, 0.124490f, -0.003186f, -0.100485f, 0.015024f, -0.060512f,
-        0.026288f, -0.086713f, -0.169012f, 0.076517f, 0.215778f, 0.043701f,
-        -0.131642f, -0.012585f, -0.045181f, -0.118183f, -0.241544f, -0.167293f,
-        -0.020107f, -0.019917f, -0.101827f, -0.107096f, -0.010503f, 0.044938f,
-        0.189680f, 0.217119f, -0.046086f, 0.044508f, 0.199716f, -0.036004f,
-        -0.148927f, 0.013355f, -0.078279f, 0.030451f, 0.056301f, -0.024609f,
-        0.083224f, 0.099533f, -0.039432f, -0.138880f, 0.005482f, -0.024120f,
-        -0.140468f, -0.066381f, -0.017057f, 0.009260f, -0.058004f, -0.028486f,
-        -0.061610f, 0.007483f, -0.158309f, -0.150687f, -0.044595f, -0.105121f,
-        -0.045763f, -0.006618f, -0.024419f, -0.117713f, -0.119366f, -0.175941f,
-        -0.071542f, 0.119027f, 0.111362f, 0.043080f, 0.034889f, 0.093003f,
-        0.007842f, 0.057368f, -0.108834f, -0.079968f, 0.230959f, 0.020205f,
-        0.011470f, 0.098877f, 0.101310f, -0.030215f, -0.018018f, -0.059552f,
-        -0.106157f, 0.021866f, -0.036471f, 0.080051f, 0.041165f, -0.082101f,
-        0.117726f, 0.030961f, -0.054763f, -0.084102f, -0.185778f, -0.061305f,
-        -0.038089f, -0.110728f, -0.264010f, 0.076675f, -0.077111f, -0.137644f,
-        0.036232f, 0.277995f, 0.019116f, 0.107738f, 0.144003f, 0.080304f,
-        0.215036f, 0.228897f, 0.072713f, 0.077773f, 0.120168f, 0.075324f,
-        0.062730f, 0.122478f, -0.049008f, 0.164912f, 0.162450f, 0.041246f,
-        0.009891f, -0.097827f, -0.038700f, -0.023027f, -0.120020f, 0.203364f,
-        0.248474f, 0.149810f, -0.036276f, -0.082814f, -0.090343f, -0.027143f,
-        -0.075689f, -0.320310f, -0.000500f, -0.143334f, -0.065077f, -0.186936f,
-        0.129372f, 0.116431f, 0.181699f, 0.170436f, 0.418854f, 0.460045f,
-        0.333719f, 0.230515f, 0.047822f, -0.044954f, -0.068086f, 0.140179f,
-        -0.044821f, 0.085550f, 0.092483f, -0.107296f, -0.130670f, -0.206629f,
-        0.114601f, -0.317869f, -0.076663f, 0.038680f, 0.212753f, -0.016059f,
-        -0.126526f, -0.163602f, 0.210154f, 0.099887f, -0.126366f, 0.118453f,
-        0.019309f, -0.021611f, -0.096499f, -0.111809f, -0.200489f, 0.142854f,
-        0.228840f, -0.353346f, -0.179151f, 0.116834f, 0.252389f, -0.031728f,
-        -0.188135f, -0.158998f, 0.386523f, 0.122315f, 0.209944f, 0.394023f,
-        0.359030f, 0.260717f, 0.170335f, 0.013683f, -0.142596f, -0.026138f,
-        -0.011878f, -0.150519f, 0.047159f, -0.107062f, -0.147347f, -0.187689f,
-        -0.186027f, -0.208048f, 0.058468f, -0.073026f, -0.236556f, -0.079788f,
-        -0.146216f, -0.058563f, -0.101361f, -0.071294f, -0.071093f, 0.116919f,
-        0.234304f, 0.306781f, 0.321866f, 0.240000f, 0.073261f, -0.012173f,
-        0.026479f, 0.050173f, 0.166127f, 0.228955f, 0.061905f, 0.156460f,
-        0.205990f, 0.120672f, 0.037350f, 0.167884f, 0.290099f, 0.420900f,
-        -0.012601f, 0.189839f, 0.306378f, 0.118383f, -0.095598f, -0.072360f,
-        -0.132496f, -0.224259f, -0.126021f, 0.022714f, 0.284039f, 0.051369f,
-        -0.000927f, -0.058735f, -0.083354f, -0.141254f, -0.187578f, -0.202669f,
-        0.048902f, 0.246597f, 0.441863f, 0.342519f, 0.066979f, 0.215286f,
-        0.188191f, -0.072240f, -0.208142f, -0.030196f, 0.178141f, 0.136985f,
-        -0.043374f, -0.181098f, 0.091815f, 0.116177f, -0.126690f, -0.386625f,
-        0.368165f, 0.269149f, -0.088042f, -0.028823f, 0.092961f, 0.024099f,
-        0.046112f, 0.176756f, 0.135849f, 0.124955f, 0.195467f, -0.037218f,
-        0.167217f, 0.188938f, 0.053528f, -0.066561f, 0.133721f, -0.070565f,
-        0.115898f, 0.152435f, -0.116993f, -0.110592f, -0.179005f, 0.026668f,
-        0.080530f, 0.075084f, -0.070401f, 0.012497f, 0.021849f, -0.139764f,
-        -0.022020f, -0.096301f, -0.064954f, -0.127446f, -0.013806f, -0.108315f,
-        0.156285f, 0.149867f, -0.011382f, 0.064532f, 0.029168f, 0.027393f,
-        0.069716f, 0.153735f, 0.038459f, 0.230714f, 0.253840f, 0.059522f,
-        -0.045053f, 0.014083f, 0.071103f, 0.068747f, 0.095887f, 0.005832f,
-        0.144887f, 0.026357f, -0.067359f, -0.044151f, -0.123283f, -0.019911f,
-        0.005318f, 0.109208f, -0.003201f, -0.021734f, 0.142025f, -0.066907f,
-        -0.120070f, -0.188639f, 0.012472f, -0.048704f, -0.012366f, -0.184828f,
-        0.168591f, 0.267166f, 0.058208f, -0.044101f, 0.033500f, 0.178558f,
-        0.104550f, 0.122418f, 0.080177f, 0.173246f, 0.298537f, 0.064173f,
-        0.053397f, 0.174341f, 0.230984f, 0.117025f, 0.166242f, 0.227781f,
-        0.120623f, 0.176952f, -0.011393f, -0.086483f, -0.008270f, 0.051700f,
-        -0.153369f, -0.058837f, -0.057639f, -0.060115f, 0.026349f, -0.160745f,
-        -0.037894f, -0.048575f, 0.041052f, -0.022112f, 0.060365f, 0.051906f,
-        0.162657f, 0.138519f, -0.050185f, -0.005938f, 0.071301f, 0.127686f,
-        0.062342f, 0.144400f, 0.072600f, 0.198436f, 0.246219f, -0.078185f,
-        -0.036169f, 0.075934f, 0.047328f, -0.013601f, 0.087205f, 0.019900f,
-        0.022606f, -0.015365f, -0.092506f, 0.075275f, -0.116375f, 0.050500f,
-        0.045118f, 0.166567f, 0.072073f, 0.060371f, 0.131747f, -0.169863f,
-        -0.039352f, -0.047486f, -0.039797f, -0.204312f, 0.021710f, 0.129443f,
-        -0.021173f, 0.173416f, -0.070794f, -0.063986f, 0.069689f, -0.064099f,
-        -0.123201f, -0.017372f, -0.206870f, 0.065863f, 0.113226f, 0.024707f,
-        -0.071341f, -0.066964f, -0.098278f, -0.062927f, 0.075840f, 0.014716f,
-        0.019378f, 0.132699f, -0.074191f, -0.089557f, -0.078446f, -0.197488f,
-        -0.173665f, 0.052583f, 0.044361f, 0.113549f, 0.098492f, 0.077379f,
-        -0.011146f, -0.192593f, -0.164435f, 0.045568f, 0.205699f, 0.049187f,
-        -0.082281f, 0.134874f, 0.185499f, 0.034968f, -0.119561f, -0.112372f,
-        -0.115091f, -0.054042f, -0.183816f, -0.078100f, 0.190695f, 0.091617f,
-        0.004257f, -0.041135f, -0.061453f, -0.141592f, -0.194809f, -0.120638f,
-        0.020168f, 0.109672f, 0.067398f, -0.015238f, -0.239145f, -0.264671f,
-        -0.185176f, 0.050472f, 0.020793f, 0.035678f, 0.022839f, -0.052055f,
-        -0.127968f, -0.113049f, -0.228416f, -0.258281f, -0.053437f, 0.076424f,
-        0.061450f, 0.237478f, 0.003618f, -0.055865f, -0.108087f, -0.028937f,
-        0.045585f, 0.052829f, -0.001471f, 0.022826f, 0.059565f, -0.104430f,
-        -0.077266f, -0.211882f, -0.212078f, 0.028074f, 0.075846f, 0.016265f,
-        0.161879f, 0.134477f, 0.008935f, -0.048041f, 0.074692f, 0.004928f,
-        -0.025156f, 0.192874f, 0.074410f, 0.308732f, 0.267400f, 0.094208f,
-        -0.005251f, 0.042041f, -0.032148f, 0.015588f, 0.252869f, 0.175302f,
-        0.022892f, 0.081673f, 0.063208f, 0.162626f, 0.194426f, 0.233890f,
-        0.262292f, 0.186930f, 0.084079f, -0.286388f, -0.213034f, -0.048867f,
-        -0.207669f, -0.170050f, 0.011673f, -0.092958f, -0.192786f, -0.273536f,
-        0.230904f, 0.266732f, 0.320519f, 0.297155f, 0.548169f, 0.304922f,
-        0.132687f, 0.247333f, 0.212488f, -0.271472f, -0.142105f, -0.002627f,
-        -0.119215f, 0.128383f, 0.100079f, -0.057490f, -0.121902f, -0.228892f,
-        0.202292f, -0.399795f, -0.371326f, -0.095836f, -0.063626f, -0.161375f,
-        -0.311180f, -0.294797f, 0.242122f, 0.011788f, 0.095573f, 0.322523f,
-        0.511840f, 0.322880f, 0.313259f, 0.173331f, 0.002542f, -0.029802f,
-        0.324766f, -0.326170f, -0.340547f, -0.138288f, -0.002963f, -0.114060f,
-        -0.377312f, -0.442570f, 0.212446f, -0.007759f, -0.011576f, 0.169711f,
-        0.308689f, 0.317348f, 0.539390f, 0.332845f, 0.057331f, -0.068180f,
-        0.101994f, 0.266995f, 0.209570f, 0.355730f, 0.091635f, 0.170238f,
-        0.125215f, 0.274154f, 0.070223f, 0.025515f, 0.049946f, -0.000550f,
-        0.043715f, -0.141843f, 0.020844f, 0.129871f, 0.256588f, 0.105015f,
-        0.148339f, 0.170682f, 0.028792f, 0.074037f, 0.160042f, 0.405137f,
-        0.246187f, 0.352160f, 0.168951f, 0.222263f, 0.264439f, 0.065945f,
-        0.021963f, -0.075084f, 0.093105f, 0.027318f, 0.098864f, 0.057566f,
-        -0.080282f, 0.185032f, 0.314419f, 0.333727f, 0.125798f, 0.294919f,
-        0.386002f, 0.217619f, -0.183517f, -0.278622f, -0.002342f, -0.027821f,
-        -0.134266f, -0.331843f, -0.008296f, 0.124564f, 0.053712f, -0.369016f,
-        -0.095036f, 0.209381f, 0.423760f, 0.371760f, 0.106397f, 0.369408f,
-        0.485608f, 0.231201f, -0.138685f, -0.349208f, -0.070083f, 0.028991f,
-        -0.081630f, -0.395992f, -0.146791f, -0.027354f, 0.063396f, -0.272484f,
-        0.058299f, 0.338207f, 0.110767f, -0.052642f, -0.233848f, -0.027448f,
-        0.030328f, 0.155572f, -0.093826f, 0.019331f, 0.120638f, 0.006292f,
-        -0.106083f, -0.236290f, -0.140933f, -0.088067f, -0.025138f, -0.208395f,
-        -0.025502f, 0.144192f, -0.048353f, -0.106144f, -0.305121f, -0.114147f,
-        0.090963f, 0.327727f, 0.035606f, -0.093779f, 0.002651f, -0.171081f,
-        -0.188131f, -0.216571f, -0.209101f, -0.054402f, 0.157147f, -0.057127f,
-        0.066584f, 0.008988f, 0.041191f, 0.034456f, -0.078255f, 0.052099f,
-        -0.022239f, 0.066981f, -0.117520f, -0.072637f, 0.062512f, 0.037570f,
-        -0.057544f, -0.312359f, 0.034357f, -0.031549f, 0.002566f, -0.207375f,
-        -0.070654f, -0.018786f, -0.044815f, -0.012814f, -0.076320f, 0.078183f,
-        0.023877f, 0.117078f, 0.022292f, -0.205424f, -0.060430f, -0.017296f,
-        -0.004827f, -0.321036f, -0.092155f, 0.038837f, 0.073190f, -0.067513f,
-        0.026521f, 0.171945f, 0.087318f, 0.034495f, -0.034089f, 0.154410f,
-        -0.061431f, 0.007435f, -0.111094f, -0.095976f, 0.014741f, -0.132324f,
-        -0.029517f, -0.192160f, 0.098667f, 0.020762f, 0.177050f, -0.064510f,
-        -0.054437f, -0.058678f, -0.001858f, 0.167602f, 0.015735f, 0.054338f,
-        0.016477f, 0.186381f, -0.010667f, 0.054692f, 0.126742f, 0.013140f,
-        0.090353f, -0.133608f, -0.018017f, -0.152619f, 0.027600f, -0.138700f,
-        -0.050274f, 0.045141f, -0.118731f, 0.094797f, -0.167605f, 0.097461f,
-        -0.009131f, 0.199920f, -0.052976f, 0.158194f, 0.178568f, -0.107600f,
-        0.009671f, -0.084072f, -0.040258f, -0.205673f, 0.102891f, 0.223511f,
-        0.042699f, 0.118548f, -0.021274f, 0.110997f, -0.155121f, 0.027696f,
-        -0.149968f, 0.051552f, -0.129219f, 0.173524f, 0.073972f, -0.189045f,
-        -0.034523f, -0.106655f, -0.011843f, -0.197381f, 0.219413f, 0.183197f,
-        -0.054920f, 0.144955f, 0.036517f, -0.085412f, -0.229070f, -0.143710f,
-        -0.049486f, 0.156634f, -0.008673f, -0.064778f, 0.082344f, 0.145673f,
-        0.002912f, -0.210121f, -0.116564f, 0.078425f, 0.220908f, -0.067594f,
-        0.048610f, 0.084912f, -0.066202f, -0.112515f, -0.217767f, -0.082640f,
-        -0.017414f, 0.230265f, -0.070735f, 0.066073f, 0.215256f, 0.071157f,
-        -0.087220f, -0.202235f, -0.011918f, 0.099562f, 0.174716f, -0.063845f,
-        -0.121055f, 0.014367f, 0.132709f, -0.005060f, -0.244606f, -0.179693f,
-        -0.134690f, 0.023239f, -0.193116f, -0.076975f, -0.021164f, -0.001938f,
-        -0.163799f, -0.111437f, -0.210362f, -0.166376f, 0.034754f, 0.010036f,
-        -0.021917f, 0.068014f, -0.086893f, -0.251746f, -0.267171f, 0.037383f,
-        0.003966f, 0.033571f, -0.151506f, 0.025437f, -0.020626f, -0.308454f,
-        -0.343143f, -0.092263f, -0.026261f, -0.028345f, 0.036036f, 0.035169f,
-        0.129470f, 0.122205f, 0.015661f, -0.070612f, -0.094333f, -0.066055f,
-        -0.041083f, 0.159146f, 0.073184f, 0.110044f, 0.174471f, 0.078069f,
-        -0.014881f, 0.008116f, 0.013209f, 0.075857f, 0.195605f, 0.062714f,
-        0.067955f, 0.056544f, -0.153908f, -0.141749f, -0.072550f, 0.033523f,
-        -0.024665f, 0.134487f, 0.079076f, 0.133562f, 0.227130f, 0.018054f,
-        0.004928f, 0.169162f, 0.065152f, 0.072160f, 0.131631f, 0.096303f,
-        0.054288f, 0.106256f, 0.114632f, 0.119038f, 0.515200f, 0.247429f,
-        0.199134f, 0.211957f, 0.127558f, -0.294684f, -0.194890f, -0.049988f,
-        -0.112247f, -0.008122f, -0.006176f, 0.037035f, -0.110881f, -0.249989f,
-        0.152434f, 0.234621f, 0.153340f, 0.349283f, 0.683049f, 0.157174f,
-        0.124844f, 0.099136f, 0.064407f, -0.248400f, -0.155323f, -0.026498f,
-        -0.023450f, 0.049051f, -0.114187f, 0.007195f, -0.176825f, -0.376926f,
-        0.366159f, -0.179938f, -0.148508f, 0.006043f, 0.170048f, 0.097866f,
-        -0.102658f, -0.260430f, 0.248868f, 0.037019f, -0.118111f, 0.078176f,
-        0.194171f, 0.211328f, 0.368612f, 0.361213f, 0.130013f, 0.094650f,
-        0.227396f, -0.178058f, -0.114782f, -0.008093f, 0.231080f, -0.011843f,
-        -0.097917f, -0.325788f, 0.141879f, 0.119738f, -0.230427f, -0.117419f,
-        -0.114153f, 0.037903f, 0.116383f, 0.218773f, -0.101884f, 0.059466f,
-        0.119255f, 0.010874f, -0.031449f, 0.045996f, 0.119931f, 0.273760f,
-        0.311700f, 0.261794f, 0.194809f, 0.339829f, 0.239449f, 0.064140f,
-        0.077597f, 0.098996f, 0.143534f, 0.184602f, 0.037507f, 0.225494f,
-        0.096142f, -0.147370f, -0.207833f, -0.174742f, -0.086391f, -0.038942f,
-        0.159577f, -0.088492f, -0.000989f, 0.108154f, -0.025890f, -0.072713f,
-        0.025997f, -0.006803f, -0.086879f, -0.011290f, -0.269200f, -0.103450f,
-        -0.124910f, -0.116340f, 0.141459f, 0.208800f, 0.042268f, 0.265034f,
-        0.516474f, 0.217591f, -0.018843f, -0.313328f, -0.168363f, 0.047129f,
-        0.090480f, -0.109852f, -0.018761f, 0.210669f, 0.281269f, -0.043591f,
-        -0.034147f, -0.237772f, -0.134843f, -0.072481f, -0.103831f, 0.038355f,
-        0.308619f, 0.148023f, -0.045867f, -0.123950f, -0.210860f, -0.064973f,
-        -0.036308f, -0.046731f, -0.022099f, 0.095776f, 0.409423f, 0.060635f,
-        -0.065196f, 0.051828f, 0.027981f, -0.009609f, -0.137681f, -0.095011f,
-        -0.019045f, 0.177278f, 0.009759f, -0.092119f, -0.016958f, -0.133860f,
-        -0.118421f, -0.032039f, -0.006214f, -0.084541f, 0.063971f, -0.073642f,
-        0.165676f, 0.110443f, 0.044131f, 0.046568f, 0.053292f, -0.055466f,
-        0.015512f, 0.371947f, 0.232102f, -0.016923f, 0.103979f, -0.091758f,
-        0.005907f, 0.209100f, 0.157433f, 0.030518f, 0.250366f, 0.062322f,
-        0.036720f, 0.094676f, 0.017306f, -0.010328f, -0.079012f, 0.016781f,
-        -0.112435f, 0.061795f, 0.042543f, -0.126799f, -0.009975f, -0.056760f,
-        0.046424f, -0.194712f, -0.139399f, -0.037731f, 0.157989f, -0.016261f,
-        0.123345f, 0.230563f, 0.083300f, -0.016392f, 0.059567f, -0.016035f,
-        -0.064767f, 0.231945f, 0.156629f, 0.034602f, 0.145628f, 0.041315f,
-        0.034535f, 0.019967f, -0.089188f, -0.012091f, 0.307857f, 0.211405f,
-        -0.025091f, -0.148249f, -0.129384f, 0.063536f, -0.068603f, -0.067941f,
-        -0.035104f, 0.210832f, 0.063810f, 0.062764f, -0.089889f, -0.030554f,
-        0.014791f, -0.053362f, -0.037818f, -0.196640f, 0.008388f, -0.082654f,
-        0.143056f, 0.064221f, 0.069795f, 0.191040f, 0.097321f, -0.028679f,
-        0.075794f, 0.313154f, 0.086240f, 0.207643f, 0.017809f, 0.122867f,
-        0.224586f, 0.167403f, -0.023884f, 0.047434f, 0.344091f, 0.187745f,
-        0.136177f, 0.141738f, 0.063799f, 0.045233f, -0.077342f, -0.003525f,
-        -0.165041f, -0.025616f, -0.073745f, 0.164439f, 0.011200f, -0.145896f,
-        -0.027954f, -0.061987f, -0.039874f, -0.142775f, 0.151042f, -0.038238f,
-        0.053152f, 0.078615f, 0.086061f, 0.100593f, 0.128046f, -0.071006f,
-        -0.116558f, 0.208445f, 0.051086f, 0.076843f, 0.023191f, -0.084781f,
-        -0.011790f, 0.147807f, -0.048554f, -0.113932f, 0.283322f, 0.190934f,
-        0.092789f, 0.033018f, -0.142428f, -0.142480f, -0.099023f, -0.041020f,
-        -0.042760f, 0.203295f, -0.053475f, 0.042424f, 0.222839f, -0.019167f,
-        -0.133176f, -0.276216f, -0.031998f, 0.117290f, 0.177827f, -0.059973f,
-        -0.064744f, -0.117040f, -0.155482f, -0.099531f, 0.164121f, -0.026682f,
-        -0.093810f, 0.238993f, -0.006506f, 0.007830f, 0.065819f, -0.203643f,
-        -0.100925f, -0.053652f, -0.130770f, 0.026277f, 0.131796f, 0.032742f,
-        0.127186f, 0.116694f, -0.161122f, -0.279773f, -0.252515f, -0.002638f,
-        0.042812f, 0.096776f, -0.123280f, 0.064858f, -0.010455f, -0.219760f,
-        -0.239331f, -0.104363f, -0.058022f, -0.053584f, 0.025611f, 0.005129f,
-        -0.100418f, -0.045712f, -0.194418f, -0.126366f, -0.030530f, 0.051168f,
-        0.215959f, 0.172402f, -0.054700f, -0.185995f, -0.278360f, -0.193693f,
-        -0.040309f, 0.003735f, -0.007770f, 0.123556f, 0.190179f, -0.077315f,
-        0.117403f, 0.212942f, 0.012160f, 0.000113f, 0.027331f, 0.040202f,
-        0.033293f, 0.219438f, 0.184174f, 0.259349f, 0.311206f, 0.082547f,
-        -0.047875f, -0.078417f, 0.010746f, 0.082620f, 0.311931f, 0.307605f,
-        0.003863f, 0.021405f, -0.026388f, -0.019572f, 0.020582f, -0.059353f,
-        0.025199f, 0.261319f, 0.086316f, 0.143614f, 0.107780f, 0.003900f,
-        -0.188397f, -0.038563f, -0.106045f, -0.125154f, -0.010509f, 0.054021f,
-        0.242130f, 0.279152f, 0.215546f, 0.346995f, 0.440856f, 0.237452f,
-        0.234154f, 0.301646f, 0.168929f, -0.208358f, -0.126848f, 0.010260f,
-        0.121018f, -0.062975f, -0.052848f, 0.050341f, -0.061103f, -0.266482f,
-        0.107186f, 0.140221f, 0.280065f, 0.287889f, 0.373198f, 0.151596f,
-        0.013593f, 0.115616f, 0.014616f, -0.281710f, -0.237597f, -0.117305f,
-        -0.000034f, -0.136739f, -0.196275f, -0.095225f, -0.125310f, -0.250514f,
-        0.236804f, -0.071805f, -0.037421f, 0.048230f, 0.321596f, 0.063632f,
-        0.024039f, -0.029133f, 0.230983f, 0.160593f, -0.154355f, -0.013086f,
-        -0.079929f, 0.094692f, 0.160391f, 0.180239f, 0.053895f, 0.100759f,
-        0.288631f, 0.038191f, 0.181692f, 0.229682f, 0.440166f, 0.063401f,
-        0.006273f, 0.020865f, 0.338695f, 0.256244f, -0.043927f, 0.115617f,
-        0.003296f, 0.173965f, 0.021318f, -0.040936f, -0.118932f, 0.182380f,
-        0.235922f, -0.053233f, -0.015053f, -0.101057f, 0.095341f, 0.051111f,
-        0.161831f, 0.032614f, 0.159496f, 0.072375f, 0.025089f, 0.023748f,
-        0.029151f, 0.161284f, -0.117717f, -0.036191f, -0.176822f, -0.162006f,
-        0.226542f, -0.078329f, 0.043079f, -0.119172f, 0.054614f, -0.101365f,
-        -0.064541f, -0.115304f, 0.135170f, 0.298872f, 0.098060f, 0.089428f,
-        -0.007497f, 0.110391f, -0.028824f, 0.020835f, -0.036804f, 0.125411f,
-        0.192105f, -0.048931f, 0.003086f, -0.010681f, 0.074698f, -0.016263f,
-        0.096063f, 0.060267f, -0.007277f, 0.139139f, -0.080635f, 0.036628f,
-        0.086058f, 0.131979f, 0.085707f, 0.025301f, 0.226094f, 0.194759f,
-        0.042193f, -0.157846f, -0.068402f, -0.141450f, -0.112659f, -0.076305f,
-        -0.069085f, -0.114332f, -0.102005f, 0.132193f, -0.067042f, 0.106643f,
-        0.198964f, 0.171616f, 0.167237f, -0.033730f, -0.026755f, 0.083621f,
-        0.149459f, -0.002799f, -0.000318f, 0.011753f, 0.065889f, -0.089375f,
-        -0.049610f, 0.224579f, 0.216548f, -0.034908f, -0.017851f, -0.088144f,
-        0.007530f, 0.240268f, 0.073270f, 0.013263f, 0.175323f, 0.012082f,
-        0.093993f, 0.015282f, 0.105854f, 0.107990f, 0.077798f, -0.096166f,
-        -0.079607f, 0.177820f, 0.142392f, 0.033337f, -0.078100f, -0.081616f,
-        -0.046993f, 0.139459f, 0.020272f, -0.123161f, 0.175269f, 0.105217f,
-        0.057328f, 0.080909f, -0.012612f, -0.097081f, 0.082060f, -0.096716f,
-        -0.063921f, 0.201884f, 0.128166f, -0.035051f, -0.032227f, -0.068139f,
-        -0.115915f, 0.095080f, -0.086007f, -0.067543f, 0.030776f, 0.032712f,
-        0.088937f, 0.054336f, -0.039329f, -0.114022f, 0.171672f, -0.112321f,
-        -0.217646f, 0.065186f, 0.060223f, 0.192174f, 0.055580f, -0.131107f,
-        -0.144338f, 0.056730f, -0.034707f, -0.081616f, -0.135298f, -0.000614f,
-        0.087189f, 0.014614f, 0.067709f, 0.107689f, 0.225780f, 0.084361f,
-        -0.008544f, 0.051649f, -0.048369f, -0.037739f, -0.060710f, 0.002654f,
-        0.016935f, 0.085563f, -0.015961f, -0.019265f, 0.111788f, 0.062376f,
-        0.202019f, 0.047713f, 0.042261f, 0.069716f, 0.242913f, 0.021052f,
-        -0.072812f, -0.155920f, -0.026436f, 0.035621f, -0.079300f, -0.028787f,
-        -0.048329f, 0.084718f, -0.060565f, -0.083750f, -0.164075f, -0.040742f,
-        -0.086219f, 0.015271f, -0.005204f, -0.016038f, 0.045816f, -0.050433f,
-        -0.077652f, 0.117109f, 0.009611f, -0.009045f, -0.008634f, -0.055373f,
-        -0.085968f, 0.028527f, -0.054736f, -0.168089f, 0.175839f, 0.071205f,
-        -0.023603f, 0.037907f, -0.004561f, -0.022634f, 0.123831f, 0.094469f,
-        -0.072920f, -0.133642f, -0.014032f, -0.142754f, -0.026999f, -0.199409f,
-        0.013268f, 0.226989f, 0.048650f, -0.170988f, -0.050141f, 0.007880f,
-        0.061880f, 0.019078f, -0.043578f, -0.038139f, 0.134814f, 0.054097f,
-        -0.081670f, 0.176838f, 0.047920f, -0.038176f, 0.050406f, -0.107181f,
-        -0.036279f, 0.027060f, 0.081594f, -0.002820f, 0.090507f, -0.033338f,
-        -0.059571f, 0.013404f, -0.099860f, 0.073371f, 0.342805f, 0.098305f,
-        -0.150910f, -0.020822f, -0.056960f, 0.046262f, -0.043413f, -0.149405f,
-        -0.129105f, -0.010899f, -0.014229f, -0.179949f, -0.113044f, -0.049468f,
-        -0.065513f, 0.090269f, -0.011919f, 0.087846f, 0.095796f, 0.146127f,
-        0.101599f, 0.078066f, -0.084348f, -0.100002f, -0.020134f, -0.050169f,
-        0.062122f, 0.014640f, 0.019143f, 0.036543f, 0.180924f, -0.013976f,
-        -0.066768f, -0.001090f, -0.070419f, -0.004839f, -0.001504f, 0.034483f,
-        -0.044954f, -0.050336f, -0.088638f, -0.174782f, -0.116082f, -0.205507f,
-        0.015587f, -0.042839f, -0.096879f, -0.144097f, -0.050268f, -0.196796f,
-        0.109639f, 0.271411f, 0.173732f, 0.108070f, 0.156437f, 0.124255f,
-        0.097242f, 0.238693f, 0.083941f, 0.109105f, 0.223940f, 0.267188f,
-        0.027385f, 0.025819f, 0.125070f, 0.093738f, 0.040353f, 0.038645f,
-        -0.012730f, 0.144063f, 0.052931f, -0.009138f, 0.084193f, 0.160272f,
-        -0.041366f, 0.011951f, -0.121446f, -0.106713f, -0.047566f, 0.047984f,
-        -0.255224f, -0.076116f, 0.098685f, -0.150845f, -0.171513f, -0.156590f,
-        0.058331f, 0.187493f, 0.413018f, 0.554265f, 0.372242f, 0.237943f,
-        0.124571f, 0.110829f, 0.010322f, -0.174477f, -0.067627f, -0.001979f,
-        0.142913f, 0.040597f, 0.019907f, 0.025963f, -0.043585f, -0.120732f,
-        0.099937f, 0.091059f, 0.247307f, 0.204226f, -0.042753f, -0.068580f,
-        -0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f,
-        -0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f,
-        -9.063785f
-    };
-    return std::vector<float>(detector, detector + sizeof(detector) / sizeof(detector[0]));
-}
-
-
-
-
-std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128()
-{
-    static const float detector[] =
-    {
-        0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
-        0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
-        0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
-        0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
-        -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
-        -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
-        -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
-        0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
-        0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
-        0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
-        0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
-        0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f,
-        5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f,
-        0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f,
-        0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f,
-        0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f,
-        0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f,
-        0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f,
-        0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f,
-        -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f,
-        -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f,
-        -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f,
-        0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f,
-        0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f,
-        -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f,
-        0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f,
-        -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f,
-        0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f,
-        0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f,
-        -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f,
-        -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f,
-        -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f,
-        0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f,
-        -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f,
-        0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f,
-        0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f,
-        -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f,
-        0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f,
-        3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f,
-        -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f,
-        -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f,
-        -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f,
-        -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f,
-        -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f,
-        -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f,
-        -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f,
-        0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f,
-        0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f,
-        0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f,
-        0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f,
-        0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f,
-        8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f,
-        -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f,
-        -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f,
-        -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f,
-        0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f,
-        -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f,
-        -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f,
-        0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f,
-        -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f,
-        -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f,
-        0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f,
-        0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f,
-        2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f,
-        0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f,
-        -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f,
-        6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f,
-        0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f,
-        -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f,
-        -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f,
-        -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f,
-        1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f,
-        -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f,
-        -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f,
-        -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f,
-        0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f,
-        0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f,
-        0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f,
-        -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f,
-        -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f,
-        -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f,
-        0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f,
-        -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f,
-        8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f,
-        -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f,
-        -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f,
-        -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f,
-        9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f,
-        0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f,
-        -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f,
-        2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f,
-        2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f,
-        -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f,
-        0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f,
-        -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f,
-        8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f,
-        -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f,
-        -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f,
-        8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f,
-        0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f,
-        0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f,
-        0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f,
-        -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f,
-        0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f,
-        0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f,
-        0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f,
-        -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f,
-        -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f,
-        0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f,
-        0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f,
-        -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f,
-        1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f,
-        0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f,
-        -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f,
-        -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f,
-        -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f,
-        0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f,
-        -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f,
-        0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f,
-        -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f,
-        0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f,
-        0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f,
-        -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f,
-        -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f,
-        0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f,
-        9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f,
-        -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f,
-        -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f,
-        -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f,
-        -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f,
-        0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f,
-        -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f,
-        7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f,
-        -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f,
-        7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f,
-        0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f,
-        0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f,
-        0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f,
-        -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f,
-        -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f,
-        0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f,
-        -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f,
-        0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f,
-        0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f,
-        -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f,
-        0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f,
-        -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f,
-        0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f,
-        -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f,
-        -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f,
-        -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f,
-        -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f,
-        -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f,
-        0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f,
-        0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f,
-        -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f,
-        0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f,
-        0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f,
-        0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f,
-        -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f,
-        -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f,
-        0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f,
-        -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f,
-        8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f,
-        -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f,
-        -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f,
-        -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f,
-        -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f,
-        0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f,
-        0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f,
-        0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f,
-        0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f,
-        0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f,
-        0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f,
-        -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f,
-        0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f,
-        0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f,
-        0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f,
-        0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f,
-        -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f,
-        0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f,
-        4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f,
-        -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f,
-        -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f,
-        0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f,
-        0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f,
-        0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f,
-        0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f,
-        -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f,
-        0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f,
-        -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f,
-        -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f,
-        -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f,
-        0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f,
-        -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f,
-        -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f,
-        -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f,
-        -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f,
-        -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f,
-        8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f,
-        -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f,
-        -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f,
-        -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f,
-        3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f,
-        -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f,
-        -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f,
-        -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f,
-        0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f,
-        1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f,
-        0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f,
-        -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f,
-        -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f,
-        -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f,
-        -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f,
-        0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f,
-        -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f,
-        0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f,
-        -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f,
-        -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f,
-        -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f,
-        0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f,
-        -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f,
-        0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f,
-        0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f,
-        -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f,
-        -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f,
-        0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f,
-        0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f,
-        0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f,
-        -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f,
-        -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f,
-        0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f,
-        -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f,
-        2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f,
-        0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f,
-        -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f,
-        0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f,
-        0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f,
-        0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f,
-        0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f,
-        0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f,
-        -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f,
-        -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f,
-        0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f,
-        -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f,
-        0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f,
-        -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f,
-        0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f,
-        -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f,
-        -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f,
-        0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f,
-        5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f,
-        0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f,
-        -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f,
-        0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f,
-        0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f,
-        0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f,
-        0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f,
-        0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f,
-        -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f,
-        -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f,
-        -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f,
-        -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f,
-        0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f,
-        -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f,
-        0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f,
-        7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f,
-        8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f,
-        -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f,
-        -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f,
-        -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f,
-        -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f,
-        -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f,
-        0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f,
-        0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f,
-        0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f,
-        -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f,
-        5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f,
-        -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f,
-        -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f,
-        -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f,
-        0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f,
-        0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f,
-        0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f,
-        0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f,
-        -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f,
-        0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f,
-        0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f,
-        0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f,
-        0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f,
-        -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f,
-        -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f,
-        0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f,
-        -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f,
-        -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f,
-        -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f,
-        0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f,
-        -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f,
-        -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f,
-        -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f,
-        -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f,
-        0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f,
-        0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f,
-        -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f,
-        -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f,
-        -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f,
-        -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f,
-        0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f,
-        -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f,
-        -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f,
-        -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f,
-        0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f,
-        -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f,
-        0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f,
-        -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f,
-        0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f,
-        6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f,
-        -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f,
-        -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f,
-        0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f,
-        -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f,
-        -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f,
-        -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f,
-        0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f,
-        -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f,
-        8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f,
-        -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f,
-        -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f,
-        -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f,
-        0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f,
-        0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f,
-        0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f,
-        -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f,
-        -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f,
-        -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f,
-        -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f,
-        0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f,
-        0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f,
-        -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f,
-        -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f,
-        0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f,
-        0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f,
-        -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f,
-        -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f,
-        -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f,
-        7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f,
-        -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f,
-        -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f,
-        -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f,
-        -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f,
-        0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f,
-        0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f,
-        6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f,
-        0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f,
-        0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f,
-        -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f,
-        0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f,
-        0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f,
-        0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f,
-        0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f,
-        -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f,
-        0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f,
-        -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f,
-        2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f,
-        0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f,
-        0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f,
-        0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f,
-        0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f,
-        0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f,
-        0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f,
-        0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f,
-        -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f,
-        -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f,
-        -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f,
-        0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f,
-        -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f,
-        -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f,
-        -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f,
-        8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f,
-        -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f,
-        -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f,
-        0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f,
-        -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f,
-        -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f,
-        -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f,
-        -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f,
-        0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f,
-        0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f,
-        0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f,
-        -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f,
-        0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f,
-        6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f,
-        -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f,
-        0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f,
-        -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f,
-        0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f,
-        0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f,
-        -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f,
-        0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f,
-        -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f,
-        0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f,
-        -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f,
-        0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f,
-        3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f,
-        -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f,
-        0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f,
-        -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f,
-        6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f,
-        -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f,
-        1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f,
-        0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f,
-        -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f,
-        1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f,
-        0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f,
-        0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f,
-        -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f,
-        -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f,
-        0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f,
-        -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f,
-        0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f,
-        8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f,
-        0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f,
-        -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f,
-        0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f,
-        -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f,
-        0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f,
-        -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f,
-        -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f,
-        0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f,
-        -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f,
-        0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f,
-        -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f,
-        0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f,
-        -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f,
-        -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f,
-        -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f,
-        -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f,
-        -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f,
-        8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f,
-        0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f,
-        0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f,
-        0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f,
-        0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f,
-        0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f,
-        0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f,
-        0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f,
-        0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f,
-        -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f,
-        -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f,
-        0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f,
-        0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f,
-        0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f,
-        0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f,
-        0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f,
-        -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f,
-        0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f,
-        2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f,
-        0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f,
-        -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f,
-        -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f,
-        7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f,
-        -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f,
-        -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f,
-        -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f,
-        -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f,
-        -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f,
-        -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f,
-        0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f,
-        -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f,
-        0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f,
-        0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f,
-        0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f,
-        -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f,
-        -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f,
-        -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f,
-        0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f,
-        -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f,
-        0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f,
-        0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f,
-        0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f,
-        -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f,
-        8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f,
-        0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f,
-        0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f,
-        0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f,
-        -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f,
-        0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f,
-        -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f,
-        0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f,
-        -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f,
-        0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f,
-        -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f,
-        0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f,
-        -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f,
-        -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f,
-        -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f,
-        0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f,
-        -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f,
-        0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f,
-        -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f,
-        0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f,
-        -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f,
-        -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f,
-        -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f,
-        0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f,
-        0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f,
-        0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f,
-        -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f,
-        -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f,
-        0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f,
-        0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f,
-        0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f,
-        -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f,
-        0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f,
-        -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f,
-        4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f,
-        -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f,
-        9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f,
-        -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f,
-        0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f,
-        -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f,
-        -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f,
-        0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f,
-        0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f,
-        -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f,
-        -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f,
-        0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f,
-        5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f,
-        0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f,
-        -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f,
-        -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f,
-        -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f,
-        -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f,
-        0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f,
-        -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f,
-        0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f,
-        -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f,
-        0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f,
-        -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f,
-        0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f,
-        -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f,
-        -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f,
-        -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f,
-        0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f,
-        -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f,
-        -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f,
-        0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f,
-        -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f,
-        -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f,
-        -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f,
-        0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f,
-        0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f,
-        0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f,
-        0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f,
-        -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f,
-        3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f,
-        0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f,
-        -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f,
-        0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f,
-        0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f,
-        -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f,
-        -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f,
-        0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f,
-        0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f,
-        -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f,
-        -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f,
-        -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f,
-        -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f,
-        -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f,
-        -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f,
-        -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f,
-        0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f,
-        0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f,
-        -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f,
-        0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f,
-        -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f,
-        0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f,
-        -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f,
-        -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f,
-        -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f,
-        0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f,
-        0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f,
-        0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f,
-        -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f,
-        -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f,
-        0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f,
-        0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f,
-        0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f,
-        -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f,
-        0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f,
-        0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f,
-        0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f,
-        -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f,
-        0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f,
-        0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f,
-        0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f,
-        -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f,
-        -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f,
-        0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f,
-        -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f,
-        0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f,
-        -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f,
-        -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f,
-        -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f,
-        -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f,
-        0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f,
-        0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f,
-        -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f,
-        0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f,
-        7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f,
-        0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f,
-        -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f,
-        0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f,
-        0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f,
-        -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f,
-        0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f,
-        0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f,
-        -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f,
-        -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f,
-        0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f,
-        1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f,
-        0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f,
-        5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f,
-        4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f,
-        0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f,
-        0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f,
-        -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f,
-        -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f,
-        0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f,
-        0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f,
-        0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f,
-        0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f,
-        0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f,
-        0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f,
-        -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f,
-        0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f,
-        0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f,
-        0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f,
-        -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f,
-        4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f,
-        -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f,
-        -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f,
-        6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f,
-        -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f,
-        0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f,
-        0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f,
-        0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f,
-        -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f,
-        -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f,
-        -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f,
-        -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f,
-        0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f,
-        -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f,
-        0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f,
-        2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f,
-        -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f,
-        -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f,
-        -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f,
-        2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f,
-        0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f,
-        0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f,
-        0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f,
-        -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f,
-        -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f,
-        -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f,
-        -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f,
-        0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f,
-        0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f,
-        0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f,
-        -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f,
-        -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f,
-        -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f,
-        -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f,
-        -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f,
-        0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f,
-        -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f,
-        -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f,
-        0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f,
-        -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f,
-        0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f,
-        -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f,
-        0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f,
-        1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f,
-        -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f,
-        -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f,
-        -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f,
-        -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f,
-        0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f,
-        0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f,
-        0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f,
-        0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f,
-        0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f,
-        -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f,
-        -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f,
-        0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f,
-        -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f,
-        0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f,
-        -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f,
-        0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f,
-        0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f,
-        3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f,
-        0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f,
-        0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f,
-        -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f,
-        -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f,
-        0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f,
-        0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f,
-        0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f,
-        -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f,
-        0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f,
-        0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f,
-        6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f,
-        -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f,
-        0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f,
-        -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f,
-        0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f,
-        0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f,
-        9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f,
-        -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f,
-        0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f,
-        -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f,
-        0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f,
-        -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f,
-        5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f,
-        -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f,
-        -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f,
-        -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f,
-        0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f,
-        -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f,
-        0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f,
-        -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f,
-        0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f,
-        -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f,
-        -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f,
-        0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f,
-        0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f,
-        8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f,
-        -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f,
-        -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f,
-        0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f,
-        -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f,
-        7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f,
-        0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f,
-        0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f,
-        -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f,
-        0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f,
-        0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f,
-        -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f,
-        -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f,
-        0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f,
-        8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f,
-        -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f,
-        0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f,
-        -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f,
-        0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f,
-        -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f,
-        0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f,
-        0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f,
-        -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f,
-        0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f,
-        -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f,
-        0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f,
-        0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f,
-        0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f,
-        -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f,
-        -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f,
-        0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f,
-        -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f,
-        -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f,
-        -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f,
-        -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f,
-        -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f,
-        -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f,
-        -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f,
-        0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f,
-        -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f,
-        -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f,
-        -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f,
-        0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f,
-        -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f,
-        -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f,
-        -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f,
-        0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f,
-        0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f,
-        0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f,
-        -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f,
-        -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f,
-        -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f,
-        -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f,
-        -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f,
-        2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f,
-        -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f,
-        0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f,
-        0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f,
-        0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f,
-        -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f,
-        4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f,
-        0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f,
-        0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f,
-        -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f,
-        -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f,
-        -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f,
-        -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f,
-        -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f,
-        -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f
-    };
-    return std::vector<float>(detector, detector + sizeof(detector) / sizeof(detector[0]));
-}
-
-/* Returns the nearest upper power of two, works only for
-the typical GPU thread count (pert block) values */
-static int power_2up(unsigned int n)
-{
-    if (n < 1) return 1;
-    else if (n < 2) return 2;
-    else if (n < 4) return 4;
-    else if (n < 8) return 8;
-    else if (n < 16) return 16;
-    else if (n < 32) return 32;
-    else if (n < 64) return 64;
-    else if (n < 128) return 128;
-    else if (n < 256) return 256;
-    else if (n < 512) return 512;
-    else if (n < 1024) return 1024;
-    return -1; // Input is too big
-}
-
-void cv::ocl::device::hog::set_up_constants(int nbins,
-                                            int block_stride_x, int block_stride_y,
-                                            int nblocks_win_x, int nblocks_win_y)
-{
-    cnbins = nbins;
-    cblock_stride_x = block_stride_x;
-    cblock_stride_y = block_stride_y;
-    cnblocks_win_x = nblocks_win_x;
-    cnblocks_win_y = nblocks_win_y;
-
-    int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
-    cblock_hist_size = block_hist_size;
-
-    int descr_width = nblocks_win_x * block_hist_size;
-    cdescr_width = descr_width;
-    cdescr_height = nblocks_win_y;
-
-    int descr_size = descr_width * nblocks_win_y;
-    cdescr_size = descr_size;
-
-    qangle_type = CV_8UC2;
-    qangle_step_shift = 0;
-    // Some Intel devices have low single-byte access performance,
-    // so we change the datatype here.
-    if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
-    {
-        qangle_type = CV_32SC2;
-        qangle_step_shift = 2;
-    }
-}
-
-void cv::ocl::device::hog::compute_hists(int nbins,
-                                         int block_stride_x, int block_stride_y,
-                                         int height, int width,
-                                         const cv::ocl::oclMat &grad,
-                                         const cv::ocl::oclMat &qangle,
-                                         const cv::ocl::oclMat &gauss_w_lut,
-                                         cv::ocl::oclMat &block_hists)
-{
-    Context *clCxt = Context::getContext();
-    std::vector< std::pair<size_t, const void *> > args;
-    String kernelName = "compute_hists_lut_kernel";
-
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
-        / block_stride_x;
-    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
-        / block_stride_y;
-    int blocks_total = img_block_width * img_block_height;
-
-    int grad_quadstep = grad.step >> 2;
-    int qangle_step = qangle.step >> qangle_step_shift;
-
-    int blocks_in_group = 4;
-    size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
-    size_t globalThreads[3] = {
-        divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 };
-
-    int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float);
-    int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float);
-
-    int smem = (hists_size + final_hists_size) * blocks_in_group;
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_stride_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_stride_y));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&blocks_in_group));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&blocks_total));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
-    args.push_back( std::make_pair( smem, (void *)NULL));
-
-    if(hog_device_cpu)
-    {
-        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-            localThreads, args, -1, -1, "-D CPU");
-    }
-    else
-    {
-        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
-        size_t wave_size = queryWaveFrontSize(kernel);
-        char opt[32] = {0};
-        sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
-        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-            localThreads, args, -1, -1, opt);
-    }
-}
-
-void cv::ocl::device::hog::normalize_hists(int nbins,
-                                           int block_stride_x, int block_stride_y,
-                                           int height, int width,
-                                           cv::ocl::oclMat &block_hists,
-                                           float threshold)
-{
-    Context *clCxt = Context::getContext();
-    std::vector< std::pair<size_t, const void *> > args;
-    String kernelName;
-
-    int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
-        / block_stride_x;
-    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
-        / block_stride_y;
-    int nthreads;
-    size_t globalThreads[3] = { 1, 1, 1  };
-    size_t localThreads[3] = { 1, 1, 1  };
-
-    if ( nbins == 9 )
-    {
-        /* optimized for the case of 9 bins */
-        kernelName = "normalize_hists_36_kernel";
-        int blocks_in_group = NTHREADS / block_hist_size;
-        nthreads = blocks_in_group * block_hist_size;
-        int num_groups = divUp( img_block_width * img_block_height, blocks_in_group);
-        globalThreads[0] = nthreads * num_groups;
-        localThreads[0] = nthreads;
-    }
-    else
-    {
-        kernelName = "normalize_hists_kernel";
-        nthreads = power_2up(block_hist_size);
-        globalThreads[0] = img_block_width * nthreads;
-        globalThreads[1] = img_block_height;
-        localThreads[0] = nthreads;
-
-        if ((nthreads < 32) || (nthreads > 512) )
-            cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big",
-                "normalize_hists", __FILE__, __LINE__);
-
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
-    }
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
-    args.push_back( std::make_pair( nthreads * sizeof(float), (void *)NULL));
-
-    if(hog_device_cpu)
-        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-                             localThreads, args, -1, -1, "-D CPU");
-    else
-    {
-        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
-        size_t wave_size = queryWaveFrontSize(kernel);
-        char opt[32] = {0};
-        sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
-        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-                             localThreads, args, -1, -1, opt);
-    }
-}
-
-void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
-                                          int block_stride_y, int block_stride_x,
-                                          int win_stride_y, int win_stride_x,
-                                          int height, int width,
-                                          const cv::ocl::oclMat &block_hists,
-                                          const cv::ocl::oclMat &coefs,
-                                          float free_coef, float threshold,
-                                          cv::ocl::oclMat &labels)
-{
-    Context *clCxt = Context::getContext();
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int nthreads;
-    String kernelName;
-    switch (cdescr_width)
-    {
-    case 180:
-        nthreads = 180;
-        kernelName = "classify_hists_180_kernel";
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
-        break;
-    case 252:
-        nthreads = 256;
-        kernelName = "classify_hists_252_kernel";
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
-        break;
-    default:
-        nthreads = 256;
-        kernelName = "classify_hists_kernel";
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
-        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
-    }
-
-    int win_block_stride_x = win_stride_x / block_stride_x;
-    int win_block_stride_y = win_stride_y / block_stride_y;
-    int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
-    int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
-        block_stride_x;
-
-    size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 };
-    size_t localThreads[3] = { nthreads, 1, 1 };
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_win_width));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coefs.data));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&free_coef));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&labels.data));
-
-    if(hog_device_cpu)
-        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-                             localThreads, args, -1, -1, "-D CPU");
-    else
-    {
-        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
-        size_t wave_size = queryWaveFrontSize(kernel);
-        char opt[32] = {0};
-        sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
-        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-                             localThreads, args, -1, -1, opt);
-    }
-}
-
-void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
-                                                  int block_stride_y, int block_stride_x,
-                                                  int win_stride_y, int win_stride_x,
-                                                  int height, int width,
-                                                  const cv::ocl::oclMat &block_hists,
-                                                  cv::ocl::oclMat &descriptors)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "extract_descrs_by_rows_kernel";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int win_block_stride_x = win_stride_x / block_stride_x;
-    int win_block_stride_y = win_stride_y / block_stride_y;
-    int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
-    int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
-        block_stride_x;
-    int descriptors_quadstep = descriptors.step >> 2;
-
-    size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
-    size_t localThreads[3] = { NTHREADS, 1, 1 };
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors_quadstep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
-
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-        localThreads, args, -1, -1);
-}
-
-void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
-                                                  int block_stride_y, int block_stride_x,
-                                                  int win_stride_y, int win_stride_x,
-                                                  int height, int width,
-                                                  const cv::ocl::oclMat &block_hists,
-                                                  cv::ocl::oclMat &descriptors)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "extract_descrs_by_cols_kernel";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int win_block_stride_x = win_stride_x / block_stride_x;
-    int win_block_stride_y = win_stride_y / block_stride_y;
-    int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
-    int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
-        block_stride_x;
-    int descriptors_quadstep = descriptors.step >> 2;
-
-    size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
-    size_t localThreads[3] = { NTHREADS, 1, 1 };
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors_quadstep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnblocks_win_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnblocks_win_y));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
-
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-        localThreads, args, -1, -1);
-}
-
-void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
-                                                  const cv::ocl::oclMat &img,
-                                                  float angle_scale,
-                                                  cv::ocl::oclMat &grad,
-                                                  cv::ocl::oclMat &qangle,
-                                                  bool correct_gamma)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "compute_gradients_8UC1_kernel";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    size_t localThreads[3] = { NTHREADS, 1, 1 };
-    size_t globalThreads[3] = { width, height, 1 };
-    char correctGamma = (correct_gamma) ? 1 : 0;
-    int img_step = img.step;
-    int grad_quadstep = grad.step >> 3;
-    int qangle_step = qangle.step >> (1 + qangle_step_shift);
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&angle_scale));
-    args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
-
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-        localThreads, args, -1, -1);
-}
-
-void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
-                                                  const cv::ocl::oclMat &img,
-                                                  float angle_scale,
-                                                  cv::ocl::oclMat &grad,
-                                                  cv::ocl::oclMat &qangle,
-                                                  bool correct_gamma)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "compute_gradients_8UC4_kernel";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    size_t localThreads[3] = { NTHREADS, 1, 1 };
-    size_t globalThreads[3] = { width, height, 1 };
-
-    char correctGamma = (correct_gamma) ? 1 : 0;
-    int img_step = img.step >> 2;
-    int grad_quadstep = grad.step >> 3;
-    int qangle_step = qangle.step >> (1 + qangle_step_shift);
-
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&angle_scale));
-    args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
-
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
-        localThreads, args, -1, -1);
-}
diff --git a/modules/ocl/src/hough.cpp b/modules/ocl/src/hough.cpp
deleted file mode 100644
index dca1d8b..0000000
--- a/modules/ocl/src/hough.cpp
+++ /dev/null
@@ -1,398 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-#if !defined (HAVE_OPENCL)
-
-void cv::ocl::HoughCircles(const oclMat&, oclMat&, int, float, float, int, int, int, int, int) { throw_nogpu(); }
-void cv::ocl::HoughCircles(const oclMat&, oclMat&, HoughCirclesBuf&, int, float, float, int, int, int, int, int) { throw_nogpu(); }
-void cv::ocl::HoughCirclesDownload(const oclMat&, OutputArray) { throw_nogpu(); }
-
-#else /* !defined (HAVE_OPENCL) */
-
-#define MUL_UP(a, b) ((a)/(b)+1)*(b)
-
-//////////////////////////////////////////////////////////
-// common functions
-
-namespace
-{
-    int buildPointList_gpu(const oclMat& src, oclMat& list)
-    {
-        const int PIXELS_PER_THREAD = 16;
-
-        int totalCount = 0;
-        int err = CL_SUCCESS;
-        cl_mem counter = clCreateBuffer(*(cl_context*)src.clCxt->getOpenCLContextPtr(),
-                                        CL_MEM_COPY_HOST_PTR,
-                                        sizeof(int),
-                                        &totalCount,
-                                        &err);
-        openCLSafeCall(err);
-
-        const size_t blkSizeX = 32;
-        const size_t blkSizeY = 4;
-        size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
-
-        const int PIXELS_PER_BLOCK = blkSizeX * PIXELS_PER_THREAD;
-        const size_t glbSizeX = src.cols % (PIXELS_PER_BLOCK) == 0 ? src.cols : MUL_UP(src.cols, PIXELS_PER_BLOCK);
-        const size_t glbSizeY = src.rows % blkSizeY == 0 ? src.rows : MUL_UP(src.rows, blkSizeY);
-        size_t globalThreads[3] = { glbSizeX, glbSizeY, 1 };
-
-        std::vector<std::pair<size_t , const void *> > args;
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&src.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.step ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&list.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&counter ));
-
-        // WARNING: disabled until
-        openCLExecuteKernel(src.clCxt, &imgproc_hough, "buildPointList", globalThreads, localThreads, args, -1, -1);
-        openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)src.clCxt->getOpenCLCommandQueuePtr(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
-        openCLSafeCall(clReleaseMemObject(counter));
-
-        return totalCount;
-    }
-}
-
-//////////////////////////////////////////////////////////
-// HoughCircles
-
-namespace
-{
-    void circlesAccumCenters_gpu(const oclMat& list, int count, const oclMat& dx, const oclMat& dy, oclMat& accum, int minRadius, int maxRadius, float idp)
-    {
-        const size_t blkSizeX = 256;
-        size_t localThreads[3] = { 256, 1, 1 };
-
-        const size_t glbSizeX = count % blkSizeX == 0 ? count : MUL_UP(count, blkSizeX);
-        size_t globalThreads[3] = { glbSizeX, 1, 1 };
-
-        const int width  = accum.cols - 2;
-        const int height = accum.rows - 2;
-
-        std::vector<std::pair<size_t , const void *> > args;
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&list.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&count ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&dx.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dx.step ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&dy.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dy.step ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&accum.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&accum.step ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&width ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&height ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&minRadius));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&maxRadius));
-        args.push_back( std::make_pair( sizeof(cl_float), (void *)&idp));
-
-        openCLExecuteKernel(accum.clCxt, &imgproc_hough, "circlesAccumCenters", globalThreads, localThreads, args, -1, -1);
-    }
-
-    int buildCentersList_gpu(const oclMat& accum, oclMat& centers, int threshold)
-    {
-        int totalCount = 0;
-        int err = CL_SUCCESS;
-        cl_mem counter = clCreateBuffer(*(cl_context*)accum.clCxt->getOpenCLContextPtr(),
-                                        CL_MEM_COPY_HOST_PTR,
-                                        sizeof(int),
-                                        &totalCount,
-                                        &err);
-        openCLSafeCall(err);
-
-        const size_t blkSizeX = 32;
-        const size_t blkSizeY = 8;
-        size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
-
-        const size_t glbSizeX = (accum.cols - 2) % blkSizeX == 0 ? accum.cols - 2 : MUL_UP(accum.cols - 2, blkSizeX);
-        const size_t glbSizeY = (accum.rows - 2) % blkSizeY == 0 ? accum.rows - 2 : MUL_UP(accum.rows - 2, blkSizeY);
-        size_t globalThreads[3] = { glbSizeX, glbSizeY, 1 };
-
-        std::vector<std::pair<size_t , const void *> > args;
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&accum.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&accum.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&accum.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&accum.step ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&centers.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&threshold ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&counter ));
-
-        openCLExecuteKernel(accum.clCxt, &imgproc_hough, "buildCentersList", globalThreads, localThreads, args, -1, -1);
-
-        openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)accum.clCxt->getOpenCLCommandQueuePtr(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
-        openCLSafeCall(clReleaseMemObject(counter));
-
-        return totalCount;
-    }
-
-    int circlesAccumRadius_gpu(const oclMat& centers, int centersCount,
-                               const oclMat& list, int count,
-                               oclMat& circles, int maxCircles,
-                               float dp, int minRadius, int maxRadius, int threshold)
-    {
-        int totalCount = 0;
-        int err = CL_SUCCESS;
-        cl_mem counter = clCreateBuffer(*(cl_context*)circles.clCxt->getOpenCLContextPtr(),
-                                        CL_MEM_COPY_HOST_PTR,
-                                        sizeof(int),
-                                        &totalCount,
-                                        &err);
-        openCLSafeCall(err);
-
-        const size_t blkSizeX = circles.clCxt->getDeviceInfo().maxWorkGroupSize;
-        size_t localThreads[3] = { blkSizeX, 1, 1 };
-
-        const size_t glbSizeX = centersCount * blkSizeX;
-        size_t globalThreads[3] = { glbSizeX, 1, 1 };
-
-        const int histSize = maxRadius - minRadius + 1;
-        size_t smemSize = (histSize + 2) * sizeof(int);
-
-        std::vector<std::pair<size_t , const void *> > args;
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&centers.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&list.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&count ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&circles.data ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&maxCircles ));
-        args.push_back( std::make_pair( sizeof(cl_float), (void *)&dp ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&minRadius ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&maxRadius ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&histSize ));
-        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&threshold ));
-        args.push_back( std::make_pair( smemSize        , (void *)NULL ));
-        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&counter ));
-
-        CV_Assert(circles.offset == 0);
-
-        openCLExecuteKernel(circles.clCxt, &imgproc_hough, "circlesAccumRadius", globalThreads, localThreads, args, -1, -1);
-
-        openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)circles.clCxt->getOpenCLCommandQueuePtr(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
-
-        openCLSafeCall(clReleaseMemObject(counter));
-
-        totalCount = std::min(totalCount, maxCircles);
-
-        return totalCount;
-    }
-
-
-} // namespace
-
-
-
-void cv::ocl::HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
-{
-    HoughCirclesBuf buf;
-    HoughCircles(src, circles, buf, method, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
-}
-
-void cv::ocl::HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method,
-                           float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
-{
-    CV_Assert(src.type() == CV_8UC1);
-    CV_Assert(src.cols < std::numeric_limits<unsigned short>::max());
-    CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
-    CV_Assert(method == HOUGH_GRADIENT);
-    CV_Assert(dp > 0);
-    CV_Assert(minRadius > 0 && maxRadius > minRadius);
-    CV_Assert(cannyThreshold > 0);
-    CV_Assert(votesThreshold > 0);
-    CV_Assert(maxCircles > 0);
-
-    const float idp = 1.0f / dp;
-
-    cv::ocl::Canny(src, buf.cannyBuf, buf.edges, std::max(cannyThreshold / 2, 1), cannyThreshold);
-
-    ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.srcPoints);
-    const int pointsCount = buildPointList_gpu(buf.edges, buf.srcPoints);
-    if (pointsCount == 0)
-    {
-        circles.release();
-        return;
-    }
-
-    ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum);
-    buf.accum.setTo(Scalar::all(0));
-
-    circlesAccumCenters_gpu(buf.srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp);
-
-    ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.centers);
-    int centersCount = buildCentersList_gpu(buf.accum, buf.centers, votesThreshold);
-    if (centersCount == 0)
-    {
-        circles.release();
-        return;
-    }
-
-    if (minDist > 1)
-    {
-        cv::AutoBuffer<unsigned int> oldBuf_(centersCount);
-        cv::AutoBuffer<unsigned int> newBuf_(centersCount);
-        int newCount = 0;
-
-        unsigned int* oldBuf = oldBuf_;
-        unsigned int* newBuf = newBuf_;
-
-        openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)buf.centers.clCxt->getOpenCLCommandQueuePtr(),
-                                           (cl_mem)buf.centers.data,
-                                           CL_TRUE,
-                                           0,
-                                           centersCount * sizeof(unsigned int),
-                                           oldBuf,
-                                           0,
-                                           NULL,
-                                           NULL));
-
-
-        const int cellSize = cvRound(minDist);
-        const int gridWidth = (src.cols + cellSize - 1) / cellSize;
-        const int gridHeight = (src.rows + cellSize - 1) / cellSize;
-
-        std::vector< std::vector<unsigned int> > grid(gridWidth * gridHeight);
-
-        const float minDist2 = minDist * minDist;
-
-        for (int i = 0; i < centersCount; ++i)
-        {
-            unsigned int p = oldBuf[i];
-            const int px = p & 0xFFFF;
-            const int py = (p >> 16) & 0xFFFF;
-
-            bool good = true;
-
-            int xCell = static_cast<int>(px / cellSize);
-            int yCell = static_cast<int>(py / cellSize);
-
-            int x1 = xCell - 1;
-            int y1 = yCell - 1;
-            int x2 = xCell + 1;
-            int y2 = yCell + 1;
-
-            // boundary check
-            x1 = std::max(0, x1);
-            y1 = std::max(0, y1);
-            x2 = std::min(gridWidth - 1, x2);
-            y2 = std::min(gridHeight - 1, y2);
-
-            for (int yy = y1; yy <= y2; ++yy)
-            {
-                for (int xx = x1; xx <= x2; ++xx)
-                {
-                    std::vector<unsigned int>& m = grid[yy * gridWidth + xx];
-
-                    for(size_t j = 0; j < m.size(); ++j)
-                    {
-                        const int val = m[j];
-                        const int jx = val & 0xFFFF;
-                        const int jy = (val >> 16) & 0xFFFF;
-
-                        float dx = (float)(px - jx);
-                        float dy = (float)(py - jy);
-
-                        if (dx * dx + dy * dy < minDist2)
-                        {
-                            good = false;
-                            goto break_out;
-                        }
-                    }
-                }
-            }
-
-            break_out:
-
-            if(good)
-            {
-                grid[yCell * gridWidth + xCell].push_back(p);
-                newBuf[newCount++] = p;
-            }
-        }
-
-        openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)buf.centers.clCxt->getOpenCLCommandQueuePtr(),
-                                            (cl_mem)buf.centers.data,
-                                            CL_TRUE,
-                                            0,
-                                            newCount * sizeof(unsigned int),
-                                            newBuf,
-                                            0,
-                                            0,
-                                            0));
-        centersCount = newCount;
-    }
-
-    ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles);
-
-    const int circlesCount = circlesAccumRadius_gpu(buf.centers, centersCount,
-                                                           buf.srcPoints, pointsCount,
-                                                           circles, maxCircles,
-                                                           dp, minRadius, maxRadius, votesThreshold);
-
-    if (circlesCount > 0)
-        circles.cols = circlesCount;
-    else
-        circles.release();
-}
-
-void cv::ocl::HoughCirclesDownload(const oclMat& d_circles, cv::OutputArray h_circles_)
-{
-    // FIX ME: garbage values are copied!
-    CV_Error(Error::StsNotImplemented, "HoughCirclesDownload is not implemented");
-
-    if (d_circles.empty())
-    {
-        h_circles_.release();
-        return;
-    }
-
-    CV_Assert(d_circles.rows == 1 && d_circles.type() == CV_32FC3);
-
-    h_circles_.create(1, d_circles.cols, CV_32FC3);
-    Mat h_circles = h_circles_.getMat();
-    d_circles.download(h_circles);
-}
-
-#endif /* !defined (HAVE_OPENCL) */
diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
deleted file mode 100644
index 0ac6271..0000000
--- a/modules/ocl/src/imgproc.cpp
+++ /dev/null
@@ -1,2014 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Shengen Yan, yanshengen@gmail.com
-//    Rock Li, Rock.Li@amd.com
-//    Zero Lin, Zero.Lin@amd.com
-//    Zhang Ying, zhangying913@gmail.com
-//    Xu Pang, pangxu010@163.com
-//    Wu Zailong, bullet@yeah.net
-//    Wenju He, wenju@multicorewareinc.com
-//    Peng Xiao, pengxiao@outlook.com
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace cv
-{
-    namespace ocl
-    {
-        ////////////////////////////////////OpenCL call wrappers////////////////////////////
-
-        template <typename T> struct index_and_sizeof;
-        template <> struct index_and_sizeof<char>
-        {
-            enum { index = 1 };
-        };
-        template <> struct index_and_sizeof<unsigned char>
-        {
-            enum { index = 2 };
-        };
-        template <> struct index_and_sizeof<short>
-        {
-            enum { index = 3 };
-        };
-        template <> struct index_and_sizeof<unsigned short>
-        {
-            enum { index = 4 };
-        };
-        template <> struct index_and_sizeof<int>
-        {
-            enum { index = 5 };
-        };
-        template <> struct index_and_sizeof<float>
-        {
-            enum { index = 6 };
-        };
-        template <> struct index_and_sizeof<double>
-        {
-            enum { index = 7 };
-        };
-
-        /////////////////////////////////////////////////////////////////////////////////////
-        // threshold
-
-        static std::vector<uchar> scalarToVector(const cv::Scalar & sc, int depth, int ocn, int cn)
-        {
-            CV_Assert(ocn == cn || (ocn == 4 && cn == 3));
-
-            static const int sizeMap[] = { sizeof(uchar), sizeof(char), sizeof(ushort),
-                                       sizeof(short), sizeof(int), sizeof(float), sizeof(double) };
-
-            int elemSize1 = sizeMap[depth];
-            int bufSize = elemSize1 * ocn;
-            std::vector<uchar> _buf(bufSize);
-            uchar * buf = &_buf[0];
-            scalarToRawData(sc, buf, CV_MAKE_TYPE(depth, cn));
-            memset(buf + elemSize1 * cn, 0, (ocn - cn) * elemSize1);
-
-            return _buf;
-        }
-
-        static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
-        {
-            bool ival = src.depth() < CV_32F;
-            int cn = src.channels(), vecSize = 4, depth = src.depth();
-            std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(),
-                                                               dst.oclchannels(), dst.channels());
-            std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
-
-            const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
-                                                  "THRESH_TOZERO", "THRESH_TOZERO_INV" };
-            const char * const channelMap[] = { "", "", "2", "4", "4" };
-            const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-            std::string buildOptions = format("-D T=%s%s -D %s", typeMap[depth], channelMap[cn], thresholdMap[thresholdType]);
-
-            int elemSize = src.elemSize();
-            int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
-            int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_offset));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_step));
-            args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_offset));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_step));
-            args.push_back( std::make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
-            args.push_back( std::make_pair(maxValue.size(), (void *)&maxValue[0]));
-
-            int max_index = dst.cols, cols = dst.cols;
-            if (cn == 1 && vecSize > 1)
-            {
-                CV_Assert(((vecSize - 1) & vecSize) == 0 && vecSize <= 16);
-                cols = divUp(cols, vecSize);
-                buildOptions += format(" -D VECTORIZED -D VT=%s%d -D VLOADN=vload%d -D VECSIZE=%d -D VSTOREN=vstore%d",
-                                       typeMap[depth], vecSize, vecSize, vecSize, vecSize);
-
-                int vecSizeBytes = vecSize * dst.elemSize1();
-                if ((dst.offset % dst.step) % vecSizeBytes == 0 && dst.step % vecSizeBytes == 0)
-                    buildOptions += " -D DST_ALIGNED";
-                if ((src.offset % src.step) % vecSizeBytes == 0 && src.step % vecSizeBytes == 0)
-                    buildOptions += " -D SRC_ALIGNED";
-
-                args.push_back( std::make_pair(sizeof(cl_int), (void *)&max_index));
-            }
-
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols));
-
-            size_t localThreads[3] = { 16, 16, 1 };
-            size_t globalThreads[3] = { cols, dst.rows, 1 };
-
-            openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args,
-                                -1, -1, buildOptions.c_str());
-        }
-
-        double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
-        {
-            CV_Assert(thresholdType == THRESH_BINARY || thresholdType == THRESH_BINARY_INV || thresholdType == THRESH_TRUNC
-                      || thresholdType == THRESH_TOZERO || thresholdType == THRESH_TOZERO_INV);
-
-            dst.create(src.size(), src.type());
-            threshold_runner(src, dst, thresh, maxVal, thresholdType);
-
-            return thresh;
-        }
-
-        ////////////////////////////////////////////////////////////////////////////////////////////
-        ///////////////////////////////   remap   //////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////
-
-        void remap( const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int borderType, const Scalar &borderValue )
-        {
-            Context *clCxt = src.clCxt;
-            bool supportsDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
-            if (!supportsDouble && src.depth() == CV_64F)
-            {
-                CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
-                return;
-            }
-
-            if (map1.empty())
-                map1.swap(map2);
-
-            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
-            CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ) ||
-                      (map1.type() == CV_32FC2 && !map2.data) ||
-                      (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
-            CV_Assert(!map2.data || map2.size() == map1.size());
-            CV_Assert(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_WRAP
-                      || borderType == BORDER_REFLECT_101 || borderType == BORDER_REFLECT);
-
-            dst.create(map1.size(), src.type());
-
-            const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-            const char * const channelMap[] = { "", "", "2", "4", "4" };
-            const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
-            const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
-                                   "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
-
-            String kernelName = "remap";
-            if (map1.type() == CV_32FC2 && map2.empty())
-                kernelName += "_32FC2";
-            else if (map1.type() == CV_16SC2)
-            {
-                kernelName += "_16SC2";
-                if (!map2.empty())
-                    kernelName += "_16UC1";
-            }
-            else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
-                kernelName += "_2_32FC1";
-            else
-                CV_Error(Error::StsBadArg, "Unsupported map types");
-
-            int ocn = dst.oclchannels();
-            size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-            Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue);
-            String buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation],
-                                         borderMap[borderType], typeMap[src.depth()], channelMap[ocn]);
-
-            if (interpolation != INTER_NEAREST)
-            {
-                int wdepth = std::max(CV_32F, dst.depth());
-                buildOptions = buildOptions
-                              + format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s"
-                                       " -D convertToWT2=convert_%s2 -D WT2=%s2",
-                                       typeMap[wdepth], channelMap[ocn],
-                                       typeMap[src.depth()], channelMap[ocn], src.depth() < CV_32F ? "_sat_rte" : "",
-                                       typeMap[wdepth], channelMap[ocn],
-                                       typeMap[wdepth], typeMap[wdepth]);
-            }
-
-            int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
-            int map1_step = map1.step / map1.elemSize(), map1_offset = map1.offset / map1.elemSize();
-            int map2_step = map2.step / map2.elemSize(), map2_offset = map2.offset / map2.elemSize();
-            int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
-            args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-            args.push_back( std::make_pair(sizeof(cl_mem), (void *)&map1.data));
-            if (!map2.empty())
-                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&map2.data));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_offset));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_offset));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1_offset));
-            if (!map2.empty())
-                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map2_offset));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_step));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_step));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1_step));
-            if (!map2.empty())
-                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map2_step));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-            args.push_back( std::make_pair(scalar.elemSize(), (void *)scalar.data));
-
-#ifdef ANDROID
-            openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, NULL, args, -1, -1, buildOptions.c_str());
-#else
-            size_t localThreads[3] = { 256, 1, 1 };
-            openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
-#endif
-        }
-
-        ////////////////////////////////////////////////////////////////////////////////////////////
-        // resize
-
-        static void computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab,
-                                          float * const alpha_tab, int * const ofs_tab)
-        {
-            int k = 0, dx = 0;
-            for ( ; dx < dsize; dx++)
-            {
-                ofs_tab[dx] = k;
-
-                double fsx1 = dx * scale;
-                double fsx2 = fsx1 + scale;
-                double cellWidth = std::min(scale, ssize - fsx1);
-
-                int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
-
-                sx2 = std::min(sx2, ssize - 1);
-                sx1 = std::min(sx1, sx2);
-
-                if (sx1 - fsx1 > 1e-3)
-                {
-                    map_tab[k] = sx1 - 1;
-                    alpha_tab[k++] = (float)((sx1 - fsx1) / cellWidth);
-                }
-
-                for (int sx = sx1; sx < sx2; sx++)
-                {
-                    map_tab[k] = sx;
-                    alpha_tab[k++] = float(1.0 / cellWidth);
-                }
-
-                if (fsx2 - sx2 > 1e-3)
-                {
-                    map_tab[k] = sx2;
-                    alpha_tab[k++] = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
-                }
-            }
-            ofs_tab[dx] = k;
-        }
-
-        static void computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol)
-        {
-            for (int i = 0; i < dcols; ++i)
-                dmap_tab[i] = scale * i;
-
-            for (int i = 0, size = dcols * scale; i < size; ++i)
-                smap_tab[i] = std::min(scol - 1, i);
-        }
-
-        static void resize_gpu( const oclMat &src, oclMat &dst, double ifx, double ify, int interpolation)
-        {
-            float ifxf = (float)ifx, ifyf = (float)ify;
-            int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
-            int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
-            int ocn = dst.oclchannels(), depth = dst.depth();
-
-            const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" };
-            std::string kernelName = std::string("resize") + interMap[interpolation];
-
-            const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-            const char * const channelMap[] = { "" , "", "2", "4", "4" };
-            std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[depth], channelMap[ocn]);
-
-            int wdepth = std::max(src.depth(), CV_32F);
-
-            // check if fx, fy is integer and then we have inter area fast mode
-            int iscale_x = saturate_cast<int>(ifx);
-            int iscale_y = saturate_cast<int>(ify);
-
-            bool is_area_fast = std::abs(ifx - iscale_x) < DBL_EPSILON &&
-                std::abs(ify - iscale_y) < DBL_EPSILON;
-            if (is_area_fast)
-                wdepth = std::max(src.depth(), CV_32S);
-
-            if (interpolation != INTER_NEAREST)
-            {
-                buildOption += format(" -D WT=%s -D WTV=%s%s -D convertToWTV=convert_%s%s -D convertToT=convert_%s%s%s",
-                                      typeMap[wdepth], typeMap[wdepth], channelMap[ocn],
-                                      typeMap[wdepth], channelMap[ocn],
-                                      typeMap[src.depth()], channelMap[ocn], src.depth() <= CV_32S ? "_sat_rte" : "");
-            }
-
-#ifdef ANDROID
-            size_t blkSizeX = 16, blkSizeY = 8;
-#else
-            size_t blkSizeX = 16, blkSizeY = 16;
-#endif
-            size_t glbSizeX;
-            if (src.type() == CV_8UC1 && interpolation == INTER_LINEAR)
-            {
-                size_t cols = (dst.cols + dst.offset % 4 + 3) / 4;
-                glbSizeX = cols % blkSizeX == 0 && cols != 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
-            }
-            else
-                glbSizeX = dst.cols;
-
-            oclMat alphaOcl, mapOcl, tabofsOcl;
-            if (interpolation == INTER_AREA)
-            {
-                if (is_area_fast)
-                {
-                    kernelName += "_FAST";
-                    int wdepth2 = std::max(CV_32F, src.depth());
-                    buildOption += format(" -D WT2V=%s%s -D convertToWT2V=convert_%s%s -D AREA_FAST -D XSCALE=%d -D YSCALE=%d -D SCALE=%f",
-                                          typeMap[wdepth2], channelMap[ocn], typeMap[wdepth2], channelMap[ocn],
-                                          iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
-
-                    int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y;
-                    AutoBuffer<int> dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size);
-                    int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols;
-                    int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y;
-
-                    computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols);
-                    computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows);
-
-                    tabofsOcl = oclMat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab);
-                    mapOcl = oclMat(1, smap_tab_size, CV_32SC1, (void *)smap_tab);
-                }
-                else
-                {
-                    Size ssize = src.size(), dsize = dst.size();
-                    int xytab_size = (ssize.width + ssize.height) << 1;
-                    int tabofs_size = dsize.height + dsize.width + 2;
-
-                    AutoBuffer<int> _xymap_tab(xytab_size), _xyofs_tab(tabofs_size);
-                    AutoBuffer<float> _xyalpha_tab(xytab_size);
-                    int * xmap_tab = _xymap_tab, * ymap_tab = _xymap_tab + (ssize.width << 1);
-                    float * xalpha_tab = _xyalpha_tab, * yalpha_tab = _xyalpha_tab + (ssize.width << 1);
-                    int * xofs_tab = _xyofs_tab, * yofs_tab = _xyofs_tab + dsize.width + 1;
-
-                    computeResizeAreaTabs(ssize.width, dsize.width, ifx, xmap_tab, xalpha_tab, xofs_tab);
-                    computeResizeAreaTabs(ssize.height, dsize.height, ify, ymap_tab, yalpha_tab, yofs_tab);
-
-                    // loading precomputed arrays to GPU
-                    alphaOcl = oclMat(1, xytab_size, CV_32FC1, (void *)_xyalpha_tab);
-                    mapOcl = oclMat(1, xytab_size, CV_32SC1, (void *)_xymap_tab);
-                    tabofsOcl = oclMat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab);
-                }
-            }
-
-            size_t globalThreads[3] = { glbSizeX, dst.rows, 1 };
-            size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-            args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_offset));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_offset));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_step));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_step));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-
-            if (wdepth == CV_64F)
-            {
-                args.push_back( std::make_pair(sizeof(cl_double), (void *)&ifx));
-                args.push_back( std::make_pair(sizeof(cl_double), (void *)&ify));
-            }
-            else
-            {
-                args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifxf));
-                args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifyf));
-            }
-
-            // precomputed tabs
-            if (!tabofsOcl.empty())
-                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&tabofsOcl.data));
-
-            if (!mapOcl.empty())
-                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&mapOcl.data));
-
-            if (!alphaOcl.empty())
-                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&alphaOcl.data));
-
-            ocn = interpolation == INTER_LINEAR ? ocn : -1;
-            depth = interpolation == INTER_LINEAR ? depth : -1;
-
-            openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args,
-                                ocn, depth, buildOption.c_str());
-        }
-
-        void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation)
-        {
-            if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-            {
-                CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
-                return;
-            }
-
-            CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4
-                      || src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4);
-            CV_Assert(dsize.area() > 0 || (fx > 0 && fy > 0));
-
-            if (dsize.area() == 0)
-            {
-                dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
-                CV_Assert(dsize.area() > 0);
-            }
-            else
-            {
-                fx = (double)dsize.width / src.cols;
-                fy = (double)dsize.height / src.rows;
-            }
-
-            double inv_fy = 1 / fy, inv_fx = 1 / fx;
-            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST ||
-                      (interpolation == INTER_AREA && inv_fx >= 1 && inv_fy >= 1));
-
-            dst.create(dsize, src.type());
-
-            resize_gpu( src, dst, inv_fx, inv_fy, interpolation);
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // medianFilter
-
-        void medianFilter(const oclMat &src, oclMat &dst, int m)
-        {
-            CV_Assert( m % 2 == 1 && m > 1 );
-            CV_Assert( (src.depth() == CV_8U || src.depth() == CV_32F) && (src.channels() == 1 || src.channels() == 4));
-            dst.create(src.size(), src.type());
-
-            int srcStep = src.step / src.elemSize(), dstStep = dst.step / dst.elemSize();
-            int srcOffset = src.offset /  src.elemSize(), dstOffset = dst.offset / dst.elemSize();
-
-            Context *clCxt = src.clCxt;
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcOffset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstOffset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcStep));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstStep));
-
-            size_t globalThreads[3] = {(src.cols + 18) / 16 * 16, (src.rows + 15) / 16 * 16, 1};
-            size_t localThreads[3] = {16, 16, 1};
-
-            if (m == 3)
-            {
-                String kernelName = "medianFilter3";
-                openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
-            }
-            else if (m == 5)
-            {
-                String kernelName = "medianFilter5";
-                openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
-            }
-            else
-                CV_Error(Error::StsBadArg, "Non-supported filter length");
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // copyMakeBorder
-
-        void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar)
-        {
-            if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-            {
-                CV_Error(Error::OpenCLDoubleNotSupported, "Selected device does not support double");
-                return;
-            }
-
-            oclMat _src = src;
-
-            CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0);
-
-            if( (_src.wholecols != _src.cols || _src.wholerows != _src.rows) && (bordertype & BORDER_ISOLATED) == 0 )
-            {
-                Size wholeSize;
-                Point ofs;
-                _src.locateROI(wholeSize, ofs);
-                int dtop = std::min(ofs.y, top);
-                int dbottom = std::min(wholeSize.height - _src.rows - ofs.y, bottom);
-                int dleft = std::min(ofs.x, left);
-                int dright = std::min(wholeSize.width - _src.cols - ofs.x, right);
-                _src.adjustROI(dtop, dbottom, dleft, dright);
-                top -= dtop;
-                left -= dleft;
-                bottom -= dbottom;
-                right -= dright;
-            }
-            bordertype &= ~cv::BORDER_ISOLATED;
-
-            dst.create(_src.rows + top + bottom, _src.cols + left + right, _src.type());
-            int srcStep = _src.step / _src.elemSize(),  dstStep = dst.step / dst.elemSize();
-            int srcOffset = _src.offset / _src.elemSize(), dstOffset = dst.offset / dst.elemSize();
-            int depth = _src.depth(), ochannels = _src.oclchannels();
-
-            int __bordertype[] = { BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101 };
-            const char *borderstr[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" };
-
-            int bordertype_index = -1;
-            for (int i = 0, end = sizeof(__bordertype) / sizeof(int); i < end; i++)
-                if (__bordertype[i] == bordertype)
-                {
-                    bordertype_index = i;
-                    break;
-                }
-            if (bordertype_index < 0)
-                CV_Error(Error::StsBadArg, "Unsupported border type");
-
-            size_t localThreads[3] = { 16, 16, 1 };
-            size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_src.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&_src.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&_src.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcStep));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcOffset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstStep));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstOffset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&top));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&left));
-
-            const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-            const char * const channelMap[] = { "", "", "2", "4", "4" };
-            std::string buildOptions = format("-D GENTYPE=%s%s -D %s",
-                                              typeMap[depth], channelMap[ochannels],
-                                              borderstr[bordertype_index]);
-
-            int cn = src.channels(), ocn = src.oclchannels();
-            int bufSize = src.elemSize1() * ocn;
-            AutoBuffer<uchar> _buf(bufSize);
-            uchar * buf = (uchar *)_buf;
-            scalarToRawData(scalar, buf, dst.type());
-            memset(buf + src.elemSize1() * cn, 0, (ocn - cn) * src.elemSize1());
-
-            args.push_back( std::make_pair( bufSize , (void *)buf ));
-
-            openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, "copymakeborder", globalThreads,
-                                localThreads, args, -1, -1, buildOptions.c_str());
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // warp
-
-        namespace
-        {
-#define F double
-
-            void convert_coeffs(F *M)
-            {
-                double D = M[0] * M[4] - M[1] * M[3];
-                D = D != 0 ? 1. / D : 0;
-                double A11 = M[4] * D, A22 = M[0] * D;
-                M[0] = A11;
-                M[1] *= -D;
-                M[3] *= -D;
-                M[4] = A22;
-                double b1 = -M[0] * M[2] - M[1] * M[5];
-                double b2 = -M[3] * M[2] - M[4] * M[5];
-                M[2] = b1;
-                M[5] = b2;
-            }
-
-            double invert(double *M)
-            {
-#define Sd(y,x) (Sd[y*3+x])
-#define Dd(y,x) (Dd[y*3+x])
-#define det3(m)    (m(0,0)*(m(1,1)*m(2,2) - m(1,2)*m(2,1)) -  \
-                    m(0,1)*(m(1,0)*m(2,2) - m(1,2)*m(2,0)) +  \
-                    m(0,2)*(m(1,0)*m(2,1) - m(1,1)*m(2,0)))
-                double *Sd = M;
-                double *Dd = M;
-                double d = det3(Sd);
-                double result = 0;
-                if ( d != 0)
-                {
-                    double t[9];
-                    result = d;
-                    d = 1. / d;
-
-                    t[0] = (Sd(1, 1) * Sd(2, 2) - Sd(1, 2) * Sd(2, 1)) * d;
-                    t[1] = (Sd(0, 2) * Sd(2, 1) - Sd(0, 1) * Sd(2, 2)) * d;
-                    t[2] = (Sd(0, 1) * Sd(1, 2) - Sd(0, 2) * Sd(1, 1)) * d;
-
-                    t[3] = (Sd(1, 2) * Sd(2, 0) - Sd(1, 0) * Sd(2, 2)) * d;
-                    t[4] = (Sd(0, 0) * Sd(2, 2) - Sd(0, 2) * Sd(2, 0)) * d;
-                    t[5] = (Sd(0, 2) * Sd(1, 0) - Sd(0, 0) * Sd(1, 2)) * d;
-
-                    t[6] = (Sd(1, 0) * Sd(2, 1) - Sd(1, 1) * Sd(2, 0)) * d;
-                    t[7] = (Sd(0, 1) * Sd(2, 0) - Sd(0, 0) * Sd(2, 1)) * d;
-                    t[8] = (Sd(0, 0) * Sd(1, 1) - Sd(0, 1) * Sd(1, 0)) * d;
-
-                    Dd(0, 0) = t[0];
-                    Dd(0, 1) = t[1];
-                    Dd(0, 2) = t[2];
-                    Dd(1, 0) = t[3];
-                    Dd(1, 1) = t[4];
-                    Dd(1, 2) = t[5];
-                    Dd(2, 0) = t[6];
-                    Dd(2, 1) = t[7];
-                    Dd(2, 2) = t[8];
-                }
-                return result;
-            }
-
-            void warpAffine_gpu(const oclMat &src, oclMat &dst, F coeffs[2][3], int interpolation)
-            {
-                CV_Assert( (src.oclchannels() == dst.oclchannels()) );
-                int srcStep = src.step1();
-                int dstStep = dst.step1();
-                float float_coeffs[2][3];
-                cl_mem coeffs_cm;
-
-                Context *clCxt = src.clCxt;
-                String s[3] = {"NN", "Linear", "Cubic"};
-                String kernelName = "warpAffine" + s[interpolation];
-
-                if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
-                {
-                    cl_int st;
-                    coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
-                    openCLVerifyCall(st);
-                    openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0,
-                                                        sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
-                }
-                else
-                {
-                    cl_int st;
-                    for(int m = 0; m < 2; m++)
-                        for(int n = 0; n < 3; n++)
-                            float_coeffs[m][n] = coeffs[m][n];
-
-                    coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
-                    openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm,
-                                                        1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
-
-                }
-
-                //TODO: improve this kernel
-#ifdef ANDROID
-                size_t blkSizeX = 16, blkSizeY = 4;
-#else
-                size_t blkSizeX = 16, blkSizeY = 16;
-#endif
-                size_t glbSizeX;
-                size_t cols;
-
-                if (src.type() == CV_8UC1 && interpolation != 2)
-                {
-                    cols = (dst.cols + dst.offset % 4 + 3) / 4;
-                    glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
-                }
-                else
-                {
-                    cols = dst.cols;
-                    glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
-                }
-
-                size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
-                size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
-                size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
-
-                std::vector< std::pair<size_t, const void *> > args;
-
-                args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-                args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcStep));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstStep));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
-                args.push_back(std::make_pair(sizeof(cl_mem), (void *)&coeffs_cm));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
-
-                openCLExecuteKernel(clCxt, &imgproc_warpAffine, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
-                openCLSafeCall(clReleaseMemObject(coeffs_cm));
-            }
-
-            void warpPerspective_gpu(const oclMat &src, oclMat &dst, double coeffs[3][3], int interpolation)
-            {
-                CV_Assert( (src.oclchannels() == dst.oclchannels()) );
-                int srcStep = src.step1();
-                int dstStep = dst.step1();
-                float float_coeffs[3][3];
-                cl_mem coeffs_cm;
-
-                Context *clCxt = src.clCxt;
-                String s[3] = {"NN", "Linear", "Cubic"};
-                String kernelName = "warpPerspective" + s[interpolation];
-
-                if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
-                {
-                    cl_int st;
-                    coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
-                    openCLVerifyCall(st);
-                    openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0,
-                                                        sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
-                }
-                else
-                {
-                    cl_int st;
-                    for(int m = 0; m < 3; m++)
-                        for(int n = 0; n < 3; n++)
-                            float_coeffs[m][n] = coeffs[m][n];
-
-                    coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
-                    openCLVerifyCall(st);
-                    openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0,
-                                                        sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
-                }
-
-                //TODO: improve this kernel
-#ifdef ANDROID
-                size_t blkSizeX = 16, blkSizeY = 8;
-#else
-                size_t blkSizeX = 16, blkSizeY = 16;
-#endif
-                size_t glbSizeX;
-                size_t cols;
-                if (src.type() == CV_8UC1 && interpolation == 0)
-                {
-                    cols = (dst.cols + dst.offset % 4 + 3) / 4;
-                    glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
-                }
-                else
-                {
-                    cols = dst.cols;
-                    glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
-                }
-
-                size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
-                size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
-                size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
-
-                std::vector< std::pair<size_t, const void *> > args;
-
-                args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-                args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcStep));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstStep));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
-                args.push_back(std::make_pair(sizeof(cl_mem), (void *)&coeffs_cm));
-                args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
-
-                openCLExecuteKernel(clCxt, &imgproc_warpPerspective, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
-                openCLSafeCall(clReleaseMemObject(coeffs_cm));
-            }
-        }
-
-        void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags)
-        {
-            int interpolation = flags & INTER_MAX;
-
-            CV_Assert((src.depth() == CV_8U  || src.depth() == CV_32F) && src.oclchannels() != 2 && src.oclchannels() != 3);
-            CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
-
-            dst.create(dsize, src.type());
-
-            CV_Assert(M.rows == 2 && M.cols == 3);
-
-            int warpInd = (flags & WARP_INVERSE_MAP) >> 4;
-            F coeffs[2][3];
-
-            double coeffsM[2*3];
-            Mat coeffsMat(2, 3, CV_64F, (void *)coeffsM);
-            M.convertTo(coeffsMat, coeffsMat.type());
-            if (!warpInd)
-                convert_coeffs(coeffsM);
-
-            for(int i = 0; i < 2; ++i)
-                for(int j = 0; j < 3; ++j)
-                    coeffs[i][j] = coeffsM[i*3+j];
-
-            warpAffine_gpu(src, dst, coeffs, interpolation);
-        }
-
-        void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags)
-        {
-            int interpolation = flags & INTER_MAX;
-
-            CV_Assert((src.depth() == CV_8U  || src.depth() == CV_32F) && src.oclchannels() != 2 && src.oclchannels() != 3);
-            CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
-
-            dst.create(dsize, src.type());
-
-
-            CV_Assert(M.rows == 3 && M.cols == 3);
-
-            int warpInd = (flags & WARP_INVERSE_MAP) >> 4;
-            double coeffs[3][3];
-
-            double coeffsM[3*3];
-            Mat coeffsMat(3, 3, CV_64F, (void *)coeffsM);
-            M.convertTo(coeffsMat, coeffsMat.type());
-            if (!warpInd)
-                invert(coeffsM);
-
-            for(int i = 0; i < 3; ++i)
-                for(int j = 0; j < 3; ++j)
-                    coeffs[i][j] = coeffsM[i*3+j];
-
-            warpPerspective_gpu(src, dst, coeffs, interpolation);
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // integral
-
-        void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth)
-        {
-            CV_Assert(src.type() == CV_8UC1);
-            if (!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-            {
-                CV_Error(Error::OpenCLDoubleNotSupported, "Select device doesn't support double");
-                return;
-            }
-
-            if( sdepth <= 0 )
-                sdepth = CV_32S;
-            sdepth = CV_MAT_DEPTH(sdepth);
-            int type = CV_MAKE_TYPE(sdepth, 1);
-
-            int vlen = 4;
-            int offset = src.offset / vlen;
-            int pre_invalid = src.offset % vlen;
-            int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
-
-            oclMat t_sum , t_sqsum;
-            int w = src.cols + 1, h = src.rows + 1;
-
-            char build_option[250];
-            if(Context::getContext()->supportsFeature(ocl::FEATURE_CL_DOUBLE))
-            {
-                t_sqsum.create(src.cols, src.rows, CV_64FC1);
-                sqsum.create(h, w, CV_64FC1);
-                sprintf(build_option, "-D TYPE=double -D TYPE4=double4 -D convert_TYPE4=convert_double4");
-            }
-            else
-            {
-                t_sqsum.create(src.cols, src.rows, CV_32FC1);
-                sqsum.create(h, w, CV_32FC1);
-                sprintf(build_option, "-D TYPE=float -D TYPE4=float4 -D convert_TYPE4=convert_float4");
-            }
-
-            t_sum.create(src.cols, src.rows, type);
-            sum.create(h, w, type);
-
-            int sum_offset = sum.offset / sum.elemSize();
-            int sqsum_offset = sqsum.offset / sqsum.elemSize();
-
-            std::vector<std::pair<size_t , const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sqsum.step));
-            size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
-            openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, sdepth, build_option);
-
-            args.clear();
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sqsum.step));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum.step));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
-            size_t gt2[3] = {t_sum.cols  * 32, 1, 1}, lt2[3] = {256, 1, 1};
-            openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, sdepth, build_option);
-        }
-
-        void integral(const oclMat &src, oclMat &sum, int sdepth)
-        {
-            CV_Assert(src.type() == CV_8UC1);
-            int vlen = 4;
-            int offset = src.offset / vlen;
-            int pre_invalid = src.offset % vlen;
-            int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
-
-            if( sdepth <= 0 )
-                sdepth = CV_32S;
-            sdepth = CV_MAT_DEPTH(sdepth);
-            int type = CV_MAKE_TYPE(sdepth, 1);
-
-            oclMat t_sum;
-            int w = src.cols + 1, h = src.rows + 1;
-
-            t_sum.create(src.cols, src.rows, type);
-            sum.create(h, w, type);
-
-            int sum_offset = sum.offset / vlen;
-            std::vector<std::pair<size_t , const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step));
-            size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
-            openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, sdepth);
-
-            args.clear();
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset));
-            size_t gt2[3] = {t_sum.cols  * 32, 1, 1}, lt2[3] = {256, 1, 1};
-            openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, sdepth);
-        }
-
-        /////////////////////// corner //////////////////////////////
-
-        static void extractCovData(const oclMat &src, oclMat &Dx, oclMat &Dy,
-                            int blockSize, int ksize, int borderType)
-        {
-            CV_Assert(src.type() == CV_8UC1 || src.type() == CV_32FC1);
-            double scale = static_cast<double>(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
-            if (ksize < 0)
-                scale *= 2.;
-
-            if (src.depth() == CV_8U)
-            {
-                scale *= 255.;
-                scale = 1. / scale;
-            }
-            else
-                scale = 1. / scale;
-
-            const int sobel_lsz = 16;
-            if((src.type() == CV_8UC1 || src.type() == CV_32FC1) &&
-                (ksize==3 || ksize==5 || ksize==7 || ksize==-1) &&
-                src.wholerows > sobel_lsz + (ksize>>1) &&
-                src.wholecols > sobel_lsz + (ksize>>1))
-            {
-                Dx.create(src.size(), CV_32FC1);
-                Dy.create(src.size(), CV_32FC1);
-
-                CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols);
-
-                size_t lt2[3] = {sobel_lsz, sobel_lsz, 1};
-                size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
-
-                unsigned int src_pitch = src.step;
-                unsigned int Dx_pitch = Dx.step;
-                unsigned int Dy_pitch = Dy.step;
-
-                int src_offset_x = (src.offset % src.step) / src.elemSize();
-                int src_offset_y = src.offset / src.step;
-
-                float _scale = scale;
-
-                std::vector<std::pair<size_t , const void *> > args;
-                args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&src.data ));
-                args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
-
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
-
-                args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&Dx.data ));
-                args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&Dx.offset ));
-                args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch ));
-                args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&Dy.data ));
-                args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&Dy.offset ));
-                args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch ));
-
-                args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.wholecols ));
-                args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.wholerows ));
-
-                args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&Dx.cols ));
-                args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&Dx.rows ));
-
-                args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale ));
-
-                String option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
-                switch(src.type())
-                {
-                case CV_8UC1:
-                    option += " -D SRCTYPE=uchar";
-                    break;
-                case CV_32FC1:
-                    option += " -D SRCTYPE=float";
-                    break;
-                }
-                switch(borderType)
-                {
-                case cv::BORDER_CONSTANT:
-                    option += " -D BORDER_CONSTANT";
-                    break;
-                case cv::BORDER_REPLICATE:
-                    option += " -D BORDER_REPLICATE";
-                    break;
-                case cv::BORDER_REFLECT:
-                    option += " -D BORDER_REFLECT";
-                    break;
-                case cv::BORDER_REFLECT101:
-                    option += " -D BORDER_REFLECT_101";
-                    break;
-                case cv::BORDER_WRAP:
-                    option += " -D BORDER_WRAP";
-                    break;
-                default:
-                    CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
-                    break;
-                }
-
-                String kernel_name;
-                switch(ksize)
-                {
-                case -1:
-                    option += " -D SCHARR";
-                    kernel_name = "sobel3";
-                    break;
-                case 3:
-                    kernel_name = "sobel3";
-                    break;
-                case 5:
-                    kernel_name = "sobel5";
-                    break;
-                case 7:
-                    kernel_name = "sobel7";
-                    break;
-                default:
-                    CV_Error(CV_StsBadFlag, "Kernel size is not supported!");
-                    break;
-                }
-                openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() );
-            }
-            else
-            {
-                if (ksize > 0)
-                {
-                    Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
-                    Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
-                }
-                else
-                {
-                    Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
-                    Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
-                }
-            }
-            CV_Assert(Dx.offset == 0 && Dy.offset == 0);
-        }
-
-        static void corner_ocl(const cv::ocl::ProgramEntry* source, String kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy,
-                        oclMat &dst, int border_type)
-        {
-            char borderType[30];
-            switch (border_type)
-            {
-            case cv::BORDER_CONSTANT:
-                sprintf(borderType, "BORDER_CONSTANT");
-                break;
-            case cv::BORDER_REFLECT101:
-                sprintf(borderType, "BORDER_REFLECT101");
-                break;
-            case cv::BORDER_REFLECT:
-                sprintf(borderType, "BORDER_REFLECT");
-                break;
-            case cv::BORDER_REPLICATE:
-                sprintf(borderType, "BORDER_REPLICATE");
-                break;
-            default:
-                CV_Error(Error::StsBadFlag, "BORDER type is not supported!");
-            }
-
-            std::string buildOptions = format("-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
-                    block_size / 2, block_size / 2, block_size, block_size, borderType);
-
-            size_t blockSizeX = 256, blockSizeY = 1;
-            size_t gSize = blockSizeX - block_size / 2 * 2;
-            size_t globalSizeX = (Dx.cols) % gSize == 0 ? Dx.cols / gSize * blockSizeX : (Dx.cols / gSize + 1) * blockSizeX;
-            size_t rows_per_thread = 2;
-            size_t globalSizeY = ((Dx.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
-                                 ((Dx.rows + rows_per_thread - 1) / rows_per_thread) :
-                                 (((Dx.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
-
-            size_t gt[3] = { globalSizeX, globalSizeY, 1 };
-            size_t lt[3]  = { blockSizeX, blockSizeY, 1 };
-            std::vector<std::pair<size_t , const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.wholerows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.wholecols ));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&Dx.step));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.wholerows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.wholecols ));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&Dy.step));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
-            args.push_back( std::make_pair( sizeof(cl_float) , (void *)&k));
-
-            openCLExecuteKernel(dst.clCxt, source, kernelName, gt, lt, args, -1, -1, buildOptions.c_str());
-        }
-
-        void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
-                          double k, int borderType)
-        {
-            oclMat dx, dy;
-            cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType);
-        }
-
-        void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
-                          double k, int borderType)
-        {
-            if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-            {
-                CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-                return;
-            }
-
-            CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE
-                      || borderType == cv::BORDER_REFLECT);
-
-            extractCovData(src, dx, dy, blockSize, ksize, borderType);
-            dst.create(src.size(), CV_32FC1);
-            corner_ocl(&imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
-        }
-
-        void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
-        {
-            oclMat dx, dy;
-            cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType);
-        }
-
-        void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
-        {
-            if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
-            {
-                CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-                return;
-            }
-
-            CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 ||
-                      borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
-
-            extractCovData(src, dx, dy, blockSize, ksize, borderType);
-            dst.create(src.size(), CV_32F);
-
-            corner_ocl(&imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
-        }
-
-        /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
-
-        static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
-        {
-            CV_Assert( (src.cols == dst.cols) && (src.rows == dst.rows) );
-            CV_Assert( !(dst.step & 0x3) );
-
-            //Arrange the NDRange
-            int col = src.cols, row = src.rows;
-            int ltx = 16, lty = 8;
-            if (src.cols % ltx != 0)
-                col = (col / ltx + 1) * ltx;
-            if (src.rows % lty != 0)
-                row = (row / lty + 1) * lty;
-
-            size_t globalThreads[3] = {col, row, 1};
-            size_t localThreads[3]  = {ltx, lty, 1};
-
-            //set args
-            std::vector<std::pair<size_t , const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sp ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sr ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&maxIter ));
-            args.push_back( std::make_pair( sizeof(cl_float) , (void *)&eps ));
-
-            openCLExecuteKernel(src.clCxt, &meanShift, "meanshift_kernel", globalThreads, localThreads, args, -1, -1);
-        }
-
-        void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr, TermCriteria criteria)
-        {
-            if (src.empty())
-                CV_Error(Error::StsBadArg, "The input image is empty");
-
-            if ( src.depth() != CV_8U || src.oclchannels() != 4 )
-                CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
-
-            dst.create( src.size(), CV_8UC4 );
-
-            if ( !(criteria.type & TermCriteria::MAX_ITER) )
-                criteria.maxCount = 5;
-
-            int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
-
-            float eps;
-            if ( !(criteria.type & TermCriteria::EPS) )
-                eps = 1.f;
-            eps = (float)std::max(criteria.epsilon, 0.0);
-
-            meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);
-        }
-
-        static void meanShiftProc_gpu(const oclMat &src, oclMat dstr, oclMat dstsp, int sp, int sr, int maxIter, float eps)
-        {
-            //sanity checks
-            CV_Assert( (src.cols == dstr.cols) && (src.rows == dstr.rows) &&
-                       (src.rows == dstsp.rows) && (src.cols == dstsp.cols));
-            CV_Assert( !(dstsp.step & 0x3) );
-
-            //Arrange the NDRange
-            int col = src.cols, row = src.rows;
-            int ltx = 16, lty = 8;
-            if (src.cols % ltx != 0)
-                col = (col / ltx + 1) * ltx;
-            if (src.rows % lty != 0)
-                row = (row / lty + 1) * lty;
-
-            size_t globalThreads[3] = {col, row, 1};
-            size_t localThreads[3]  = {ltx, lty, 1};
-
-            //set args
-            std::vector<std::pair<size_t , const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dstr.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dstsp.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstr.step ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstsp.step ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstr.offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstsp.offset ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstr.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstr.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sp ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sr ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&maxIter ));
-            args.push_back( std::make_pair( sizeof(cl_float) , (void *)&eps ));
-
-            openCLExecuteKernel(src.clCxt, &meanShift, "meanshiftproc_kernel", globalThreads, localThreads, args, -1, -1);
-        }
-
-        void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr, TermCriteria criteria)
-        {
-            if (src.empty())
-                CV_Error(Error::StsBadArg, "The input image is empty");
-
-            if ( src.depth() != CV_8U || src.oclchannels() != 4 )
-                CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
-
-//            if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
-//            {
-//                CV_Error(Error::OpenCLDoubleNotSupportedNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
-//                return;
-//            }
-
-            dstr.create( src.size(), CV_8UC4 );
-            dstsp.create( src.size(), CV_16SC2 );
-
-            if ( !(criteria.type & TermCriteria::MAX_ITER) )
-                criteria.maxCount = 5;
-
-            int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
-
-            float eps;
-            if ( !(criteria.type & TermCriteria::EPS) )
-                eps = 1.f;
-            eps = (float)std::max(criteria.epsilon, 0.0);
-
-            meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);
-        }
-
-        ///////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////hist///////////////////////////////////////////////
-        /////////////////////////////////////////////////////////////////////////////////////////////////////
-
-        namespace histograms
-        {
-            const int PARTIAL_HISTOGRAM256_COUNT = 256;
-            const int HISTOGRAM256_BIN_COUNT = 256;
-        }
-        ///////////////////////////////calcHist/////////////////////////////////////////////////////////////////
-        static void calc_sub_hist(const oclMat &mat_src, const oclMat &mat_sub_hist)
-        {
-            using namespace histograms;
-
-            int depth = mat_src.depth();
-
-            size_t localThreads[3]  = { HISTOGRAM256_BIN_COUNT, 1, 1 };
-            size_t globalThreads[3] = { PARTIAL_HISTOGRAM256_COUNT *localThreads[0], 1, 1};
-
-            int dataWidth = 16;
-            int dataWidth_bits = 4;
-            int mask = dataWidth - 1;
-
-            int cols = mat_src.cols * mat_src.oclchannels();
-            int src_offset = mat_src.offset;
-            int hist_step = mat_sub_hist.step >> 2;
-            int left_col = 0, right_col = 0;
-
-            if (cols >= dataWidth * 2 - 1)
-            {
-                left_col = dataWidth - (src_offset & mask);
-                left_col &= mask;
-                src_offset += left_col;
-                cols -= left_col;
-                right_col = cols & mask;
-                cols -= right_col;
-            }
-            else
-            {
-                left_col = cols;
-                right_col = 0;
-                cols = 0;
-                globalThreads[0] = 0;
-            }
-
-            std::vector<std::pair<size_t , const void *> > args;
-            if (globalThreads[0] != 0)
-            {
-                int tempcols = cols >> dataWidth_bits;
-                int inc_x = globalThreads[0] % tempcols;
-                int inc_y = globalThreads[0] / tempcols;
-                src_offset >>= dataWidth_bits;
-                int src_step = mat_src.step >> dataWidth_bits;
-                int datacount = tempcols * mat_src.rows;
-
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src.data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset));
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_sub_hist.data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&datacount));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&tempcols));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&inc_x));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&inc_y));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&hist_step));
-
-                openCLExecuteKernel(mat_src.clCxt, &imgproc_histogram, "calc_sub_hist", globalThreads, localThreads, args, -1, depth);
-            }
-
-            if (left_col != 0 || right_col != 0)
-            {
-                src_offset = mat_src.offset;
-                localThreads[0] = 1;
-                localThreads[1] = 256;
-                globalThreads[0] = left_col + right_col;
-                globalThreads[1] = mat_src.rows;
-
-                args.clear();
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src.data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.step));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset));
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_sub_hist.data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&left_col));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.rows));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&hist_step));
-
-                openCLExecuteKernel(mat_src.clCxt, &imgproc_histogram, "calc_sub_hist_border", globalThreads, localThreads, args, -1, depth);
-            }
-        }
-
-        static void merge_sub_hist(const oclMat &sub_hist, oclMat &mat_hist)
-        {
-            using namespace histograms;
-
-            size_t localThreads[3]  = { 256, 1, 1 };
-            size_t globalThreads[3] = { HISTOGRAM256_BIN_COUNT *localThreads[0], 1, 1};
-            int src_step = sub_hist.step >> 2;
-
-            std::vector<std::pair<size_t , const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sub_hist.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_hist.data));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step));
-
-            openCLExecuteKernel(sub_hist.clCxt, &imgproc_histogram, "merge_hist", globalThreads, localThreads, args, -1, -1);
-        }
-
-        void calcHist(const oclMat &mat_src, oclMat &mat_hist)
-        {
-            using namespace histograms;
-            CV_Assert(mat_src.type() == CV_8UC1);
-            mat_hist.create(1, 256, CV_32SC1);
-
-            oclMat buf(PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_BIN_COUNT, CV_32SC1);
-            buf.setTo(0);
-
-            calc_sub_hist(mat_src, buf);
-            merge_sub_hist(buf, mat_hist);
-        }
-
-        ///////////////////////////////////equalizeHist/////////////////////////////////////////////////////
-        void equalizeHist(const oclMat &mat_src, oclMat &mat_dst)
-        {
-            mat_dst.create(mat_src.rows, mat_src.cols, CV_8UC1);
-
-            oclMat mat_hist(1, 256, CV_32SC1);
-
-            calcHist(mat_src, mat_hist);
-
-            size_t localThreads[3] = { 256, 1, 1};
-            size_t globalThreads[3] = { 256, 1, 1};
-            oclMat lut(1, 256, CV_8UC1);
-            int total = mat_src.rows * mat_src.cols;
-
-            std::vector<std::pair<size_t , const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&lut.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_hist.data));
-            args.push_back( std::make_pair( sizeof(int), (void *)&total));
-
-            openCLExecuteKernel(mat_src.clCxt, &imgproc_histogram, "calLUT", globalThreads, localThreads, args, -1, -1);
-            LUT(mat_src, lut, mat_dst);
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // CLAHE
-        namespace clahe
-        {
-            static void calcLut(const oclMat &src, oclMat &dst,
-                const int tilesX, const int tilesY, const cv::Size tileSize,
-                const int clipLimit, const float lutScale)
-            {
-                cl_int2 tile_size;
-                tile_size.s[0] = tileSize.width;
-                tile_size.s[1] = tileSize.height;
-
-                std::vector<std::pair<size_t , const void *> > args;
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
-                args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit ));
-                args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
-
-                String kernelName = "calcLut";
-                size_t localThreads[3]  = { 32, 8, 1 };
-                size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
-                bool is_cpu = isCpuDevice();
-                if (is_cpu)
-                    openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)"-D CPU");
-                else
-                {
-                    cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName);
-                    int wave_size = (int)queryWaveFrontSize(kernel);
-                    openCLSafeCall(clReleaseKernel(kernel));
-
-                    std::string opt = format("-D WAVE_SIZE=%d", wave_size);
-                    openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt.c_str());
-                }
-            }
-
-            static void transform(const oclMat &src, oclMat &dst, const oclMat &lut,
-                const int tilesX, const int tilesY, const Size & tileSize)
-            {
-                cl_int2 tile_size;
-                tile_size.s[0] = tileSize.width;
-                tile_size.s[1] = tileSize.height;
-
-                std::vector<std::pair<size_t , const void *> > args;
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&lut.data ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.step ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows ));
-                args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.offset ));
-
-                size_t localThreads[3]  = { 32, 8, 1 };
-                size_t globalThreads[3] = { src.cols, src.rows, 1 };
-
-                openCLExecuteKernel(Context::getContext(), &imgproc_clahe, "transform", globalThreads, localThreads, args, -1, -1);
-            }
-        }
-
-        namespace
-        {
-            class CLAHE_Impl : public cv::CLAHE
-            {
-            public:
-                CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
-
-                cv::AlgorithmInfo* info() const;
-
-                void apply(cv::InputArray src, cv::OutputArray dst);
-
-                void setClipLimit(double clipLimit);
-                double getClipLimit() const;
-
-                void setTilesGridSize(cv::Size tileGridSize);
-                cv::Size getTilesGridSize() const;
-
-                void collectGarbage();
-
-            private:
-                double clipLimit_;
-                int tilesX_;
-                int tilesY_;
-
-                oclMat srcExt_;
-                oclMat lut_;
-            };
-
-            CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
-                clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
-            {
-            }
-
-            CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE_OCL",
-                obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
-                obj.info()->addParam(obj, "tilesX", obj.tilesX_);
-                obj.info()->addParam(obj, "tilesY", obj.tilesY_))
-
-            void CLAHE_Impl::apply(cv::InputArray src_raw, cv::OutputArray dst_raw)
-            {
-                oclMat& src = getOclMatRef(src_raw);
-                oclMat& dst = getOclMatRef(dst_raw);
-                CV_Assert( src.type() == CV_8UC1 );
-
-                dst.create( src.size(), src.type() );
-
-                const int histSize = 256;
-
-                ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_8UC1, lut_);
-
-                cv::Size tileSize;
-                oclMat srcForLut;
-
-                if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
-                {
-                    tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
-                    srcForLut = src;
-                }
-                else
-                {
-                    ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0,
-                                            tilesX_ - (src.cols % tilesX_), BORDER_REFLECT_101, Scalar::all(0));
-
-                    tileSize = Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
-                    srcForLut = srcExt_;
-                }
-
-                const int tileSizeTotal = tileSize.area();
-                const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
-
-                int clipLimit = 0;
-                if (clipLimit_ > 0.0)
-                {
-                    clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
-                    clipLimit = std::max(clipLimit, 1);
-                }
-
-                clahe::calcLut(srcForLut, lut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale);
-                clahe::transform(src, dst, lut_, tilesX_, tilesY_, tileSize);
-            }
-
-            void CLAHE_Impl::setClipLimit(double clipLimit)
-            {
-                clipLimit_ = clipLimit;
-            }
-
-            double CLAHE_Impl::getClipLimit() const
-            {
-                return clipLimit_;
-            }
-
-            void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
-            {
-                tilesX_ = tileGridSize.width;
-                tilesY_ = tileGridSize.height;
-            }
-
-            cv::Size CLAHE_Impl::getTilesGridSize() const
-            {
-                return cv::Size(tilesX_, tilesY_);
-            }
-
-            void CLAHE_Impl::collectGarbage()
-            {
-                srcExt_.release();
-                lut_.release();
-            }
-        }
-
-        cv::Ptr<cv::CLAHE> createCLAHE(double clipLimit, cv::Size tileGridSize)
-        {
-            return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
-        }
-
-        //////////////////////////////////bilateralFilter////////////////////////////////////////////////////
-
-        static void oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d,
-                               double sigma_color, double sigma_space,
-                               int borderType )
-        {
-            int cn = src.channels();
-            int i, j, maxk, radius;
-
-            CV_Assert( (src.channels() == 1 || src.channels() == 3) &&
-                       src.type() == dst.type() && src.size() == dst.size() &&
-                       src.data != dst.data );
-
-            if ( sigma_color <= 0 )
-                sigma_color = 1;
-            if ( sigma_space <= 0 )
-                sigma_space = 1;
-
-            double gauss_color_coeff = -0.5 / (sigma_color * sigma_color);
-            double gauss_space_coeff = -0.5 / (sigma_space * sigma_space);
-
-            if ( d <= 0 )
-                radius = cvRound(sigma_space * 1.5);
-            else
-                radius = d / 2;
-            radius = MAX(radius, 1);
-            d = radius * 2 + 1;
-
-            oclMat temp;
-            copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
-
-            std::vector<float> _color_weight(cn * 256);
-            std::vector<float> _space_weight(d * d);
-            std::vector<int> _space_ofs(d * d);
-            float *color_weight = &_color_weight[0];
-            float *space_weight = &_space_weight[0];
-            int *space_ofs = &_space_ofs[0];
-
-            int dst_step_in_pixel = dst.step / dst.elemSize();
-            int dst_offset_in_pixel = dst.offset / dst.elemSize();
-            int temp_step_in_pixel = temp.step / temp.elemSize();
-
-            // initialize color-related bilateral filter coefficients
-            for( i = 0; i < 256 * cn; i++ )
-                color_weight[i] = (float)std::exp(i * i * gauss_color_coeff);
-
-            // initialize space-related bilateral filter coefficients
-            for( i = -radius, maxk = 0; i <= radius; i++ )
-                for( j = -radius; j <= radius; j++ )
-                {
-                    double r = std::sqrt((double)i * i + (double)j * j);
-                    if ( r > radius )
-                        continue;
-                    space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff);
-                    space_ofs[maxk++] = (int)(i * temp_step_in_pixel + j);
-                }
-
-            oclMat oclcolor_weight(1, cn * 256, CV_32FC1, color_weight);
-            oclMat oclspace_weight(1, d * d, CV_32FC1, space_weight);
-            oclMat oclspace_ofs(1, d * d, CV_32SC1, space_ofs);
-
-            String kernelName = "bilateral";
-#ifdef ANDROID
-            size_t localThreads[3]  = { 16, 8, 1 };
-#else
-            size_t localThreads[3]  = { 16, 16, 1 };
-#endif
-            size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-            if ((dst.type() == CV_8UC1) && ((dst.offset & 3) == 0) && ((dst.cols & 3) == 0))
-            {
-                kernelName = "bilateral2";
-                globalThreads[0] = dst.cols >> 2;
-            }
-
-            std::vector<std::pair<size_t , const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&temp.data ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxk ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&radius ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step_in_pixel ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset_in_pixel ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&temp_step_in_pixel ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&temp.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&temp.cols ));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&oclcolor_weight.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&oclspace_weight.data ));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&oclspace_ofs.data ));
-
-            openCLExecuteKernel(src.clCxt, &imgproc_bilateral, kernelName, globalThreads, localThreads, args, dst.oclchannels(), dst.depth());
-        }
-
-        void bilateralFilter(const oclMat &src, oclMat &dst, int radius, double sigmaclr, double sigmaspc, int borderType)
-        {
-            dst.create( src.size(), src.type() );
-            if ( src.depth() == CV_8U )
-                oclbilateralFilter_8u( src, dst, radius, sigmaclr, sigmaspc, borderType );
-            else
-                CV_Error(Error::StsUnsupportedFormat, "Bilateral filtering is only implemented for CV_8U images");
-        }
-
-    }
-}
-//////////////////////////////////mulSpectrums////////////////////////////////////////////////////
-void cv::ocl::mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int /*flags*/, float scale, bool conjB)
-{
-    CV_Assert(a.type() == CV_32FC2);
-    CV_Assert(b.type() == CV_32FC2);
-
-    c.create(a.size(), CV_32FC2);
-
-    size_t lt[3]  = { 16, 16, 1 };
-    size_t gt[3]  = { a.cols, a.rows, 1 };
-
-    String kernelName = conjB ? "mulAndScaleSpectrumsKernel_CONJ":"mulAndScaleSpectrumsKernel";
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&a.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&b.data ));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&c.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.step ));
-
-    Context *clCxt = Context::getContext();
-    openCLExecuteKernel(clCxt, &imgproc_mulAndScaleSpectrums, kernelName, gt, lt, args, -1, -1);
-}
-//////////////////////////////////convolve////////////////////////////////////////////////////
-// ported from CUDA module
-void cv::ocl::ConvolveBuf::create(Size image_size, Size templ_size)
-{
-    result_size = Size(image_size.width - templ_size.width + 1,
-                       image_size.height - templ_size.height + 1);
-
-    block_size = user_block_size;
-    if (user_block_size.width == 0 || user_block_size.height == 0)
-        block_size = estimateBlockSize(result_size, templ_size);
-
-    dft_size.width  = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
-    dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
-
-    // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192),
-    // see CUDA Toolkit 4.1 CUFFT Library Programming Guide
-    //if (dft_size.width > 8192)
-    dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1.);
-    //if (dft_size.height > 8192)
-    dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1.);
-
-    // To avoid wasting time doing small DFTs
-    dft_size.width = std::max(dft_size.width, 512);
-    dft_size.height = std::max(dft_size.height, 512);
-
-    image_block.create(dft_size, CV_32F);
-    templ_block.create(dft_size, CV_32F);
-    result_data.create(dft_size, CV_32F);
-
-    //spect_len = dft_size.height * (dft_size.width / 2 + 1);
-    image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
-    templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
-    result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
-
-    // Use maximum result matrix block size for the estimated DFT block size
-    block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
-    block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
-}
-
-Size cv::ocl::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size*/)
-{
-    int width = (result_size.width + 2) / 3;
-    int height = (result_size.height + 2) / 3;
-    width = std::min(width, result_size.width);
-    height = std::min(height, result_size.height);
-    return Size(width, height);
-}
-
-static void convolve_run_fft(const oclMat &image, const oclMat &templ, oclMat &result, bool ccorr, ConvolveBuf& buf)
-{
-#if defined HAVE_CLAMDFFT
-    CV_Assert(image.type() == CV_32F);
-    CV_Assert(templ.type() == CV_32F);
-
-    buf.create(image.size(), templ.size());
-    result.create(buf.result_size, CV_32F);
-
-    Size& block_size = buf.block_size;
-    Size& dft_size = buf.dft_size;
-
-    oclMat& image_block = buf.image_block;
-    oclMat& templ_block = buf.templ_block;
-    oclMat& result_data = buf.result_data;
-
-    oclMat& image_spect = buf.image_spect;
-    oclMat& templ_spect = buf.templ_spect;
-    oclMat& result_spect = buf.result_spect;
-
-    oclMat templ_roi = templ;
-    copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
-                   templ_block.cols - templ_roi.cols, 0, Scalar());
-
-    cv::ocl::dft(templ_block, templ_spect, dft_size);
-
-    // Process all blocks of the result matrix
-    for (int y = 0; y < result.rows; y += block_size.height)
-    {
-        for (int x = 0; x < result.cols; x += block_size.width)
-        {
-            Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
-                                std::min(y + dft_size.height, image.rows) - y);
-            Rect roi0(x, y, image_roi_size.width, image_roi_size.height);
-
-            oclMat image_roi(image, roi0);
-
-            copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
-                           0, image_block.cols - image_roi.cols, 0, Scalar());
-
-            cv::ocl::dft(image_block, image_spect, dft_size);
-
-            mulSpectrums(image_spect, templ_spect, result_spect, 0,
-                                 1.f / dft_size.area(), ccorr);
-
-            cv::ocl::dft(result_spect, result_data, dft_size, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT);
-
-            Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
-                                 std::min(y + block_size.height, result.rows) - y);
-
-            Rect roi1(x, y, result_roi_size.width, result_roi_size.height);
-            Rect roi2(0, 0, result_roi_size.width, result_roi_size.height);
-
-            oclMat result_roi(result, roi1);
-            oclMat result_block(result_data, roi2);
-
-            result_block.copyTo(result_roi);
-        }
-    }
-
-#else
-    CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
-#define UNUSED(x) (void)(x);
-    UNUSED(image) UNUSED(templ) UNUSED(result) UNUSED(ccorr) UNUSED(buf)
-#undef UNUSED
-#endif
-}
-
-static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
-{
-    CV_Assert(src.depth() == CV_32FC1);
-    CV_Assert(temp1.depth() == CV_32F);
-    CV_Assert(temp1.cols <= 17 && temp1.rows <= 17);
-
-    dst.create(src.size(), src.type());
-
-    CV_Assert(src.cols == dst.cols && src.rows == dst.rows);
-    CV_Assert(src.type() == dst.type());
-
-    size_t localThreads[3]  = { 16, 16, 1 };
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
-    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
-    int temp1_step = temp1.step / temp1.elemSize(), temp1_offset = temp1.offset / temp1.elemSize();
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&temp1.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&temp1_step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&temp1.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&temp1.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&temp1_offset ));
-
-    openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, dst.depth());
-}
-
-void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y, bool ccorr)
-{
-    CV_Assert(x.depth() == CV_32F);
-    CV_Assert(t.depth() == CV_32F);
-    y.create(x.size(), x.type());
-    String kernelName = "convolve";
-    if(t.cols > 17 || t.rows > 17)
-    {
-        ConvolveBuf buf;
-        convolve_run_fft(x, t, y, ccorr, buf);
-    }
-    else
-    {
-        CV_Assert(ccorr == false);
-        convolve_run(x, t, y, kernelName, &imgproc_convolve);
-    }
-}
-void cv::ocl::convolve(const oclMat &image, const oclMat &templ, oclMat &result, bool ccorr, ConvolveBuf& buf)
-{
-    result.create(image.size(), image.type());
-    convolve_run_fft(image, templ, result, ccorr, buf);
-}
diff --git a/modules/ocl/src/interpolate_frames.cpp b/modules/ocl/src/interpolate_frames.cpp
deleted file mode 100644
index 47d6c83..0000000
--- a/modules/ocl/src/interpolate_frames.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Comuter Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular urpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace cv
-{
-    namespace ocl
-    {
-        namespace interpolate
-        {
-            //The following are ported from NPP_staging.cu
-            // As it is not valid to do pointer offset operations on host for default oclMat's native cl_mem pointer,
-            // we may have to do this on kernel
-            void memsetKernel(float val, oclMat &img, int height, int offset);
-            void normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset);
-            void forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,
-                                   int b_offset, int d_offset); // buffer, dst offset
-
-            //OpenCL conversion of nppiStVectorWarp_PSF2x2_32f_C1
-            void vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,
-                            oclMat &buffer, int buf_offset, float timeScale, int dst_offset);
-            //OpenCL conversion of BlendFrames
-            void blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer,
-                             float pos, oclMat &newFrame, cl_mem &, cl_mem &);
-
-            // bind a buffer to an image
-            void bindImgTex(const oclMat &img, cl_mem &tex);
-        }
-    }
-}
-
-void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,
-                                const oclMat &fu, const oclMat &fv,
-                                const oclMat &bu, const oclMat &bv,
-                                float pos, oclMat &newFrame, oclMat &buf)
-{
-    CV_Assert(frame0.type() == CV_32FC1);
-    CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
-    CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());
-    CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());
-    CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());
-    CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());
-
-    newFrame.create(frame0.size(), frame0.type());
-
-    buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);
-    buf.setTo(Scalar::all(0));
-
-    size_t step = frame0.step;
-
-    CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);
-    cl_mem tex_src0 = 0, tex_src1 = 0;
-
-    // warp flow
-    using namespace interpolate;
-
-    bindImgTex(frame0, tex_src0);
-    bindImgTex(frame1, tex_src1);
-
-    // CUDA Offsets
-    enum
-    {
-        cov0 = 0,
-        cov1,
-        fwdU,
-        fwdV,
-        bwdU,
-        bwdV
-    };
-
-    vectorWarp(fu, fu, fv, buf, cov0, pos,        fwdU);
-    vectorWarp(fv, fu, fv, buf, cov0, pos,        fwdV);
-    vectorWarp(bu, bu, bv, buf, cov1, 1.0f - pos, bwdU);
-    vectorWarp(bv, bu, bv, buf, cov1, 1.0f - pos, bwdU);
-
-    blendFrames(frame0, frame1, buf, pos, newFrame, tex_src0, tex_src1);
-
-    openCLFree(tex_src0);
-    openCLFree(tex_src1);
-}
-
-void interpolate::memsetKernel(float val, oclMat &img, int height, int offset)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "memsetKernel";
-    std::vector< std::pair<size_t, const void *> > args;
-    int step = img.step / sizeof(float);
-    offset = step * height * offset;
-
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&val));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&offset));
-
-    size_t globalThreads[3] = {img.cols, height, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-    openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-void interpolate::normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "normalizeKernel";
-    std::vector< std::pair<size_t, const void *> > args;
-    int step   = buffer.step / sizeof(float);
-    factor_offset = step * height * factor_offset;
-    dst_offset    = step * height * dst_offset;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buffer.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&buffer.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&factor_offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset));
-
-    size_t globalThreads[3] = {buffer.cols, height, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-    openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void interpolate::forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,
-                                    int b_offset, int d_offset)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "forwardWarpKernel";
-    std::vector< std::pair<size_t, const void *> > args;
-    int f_step  = u.step / sizeof(float); // flow step
-    int b_step  = buffer.step / sizeof(float);
-
-    b_offset  = b_step * src.rows * b_offset;
-    d_offset  = b_step * src.rows * d_offset;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buffer.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&u.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&v.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&f_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&b_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&b_offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&d_offset));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&time_scale));
-
-    size_t globalThreads[3] = {src.cols, src.rows, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-    openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void interpolate::vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,
-                             oclMat &buffer, int b_offset, float timeScale, int d_offset)
-{
-    memsetKernel(0, buffer, src.rows, b_offset);
-    forwardWarpKernel(src, buffer, u, v, timeScale, b_offset, d_offset);
-    normalizeKernel(buffer, src.rows, b_offset, d_offset);
-}
-
-void interpolate::blendFrames(const oclMat &frame0, const oclMat &/*frame1*/, const oclMat &buffer, float pos, oclMat &newFrame, cl_mem &tex_src0, cl_mem &tex_src1)
-{
-    int step = buffer.step / sizeof(float);
-
-    Context *clCxt = Context::getContext();
-    String kernelName = "blendFramesKernel";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&tex_src0));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&tex_src1));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buffer.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&newFrame.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&frame0.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&frame0.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&step));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&pos));
-
-    size_t globalThreads[3] = {frame0.cols, frame0.rows, 1};
-    size_t localThreads[3]  = {16, 16, 1};
-    openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
-{
-    if(texture)
-    {
-        openCLFree(texture);
-    }
-    texture = bindTexture(img);
-}
diff --git a/modules/ocl/src/kalman.cpp b/modules/ocl/src/kalman.cpp
deleted file mode 100644
index 5a133a7..0000000
--- a/modules/ocl/src/kalman.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//     Jin Ma, jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-KalmanFilter::KalmanFilter()
-{
-
-}
-
-KalmanFilter::KalmanFilter(int dynamParams, int measureParams, int controlParams, int type)
-{
-    init(dynamParams, measureParams, controlParams, type);
-}
-
-void KalmanFilter::init(int DP, int MP, int CP, int type)
-{
-    CV_Assert( DP > 0 && MP > 0 );
-    CV_Assert( type == CV_32F || type == CV_64F );
-    CP = cv::max(CP, 0);
-
-    statePre.create(DP, 1, type);
-    statePre.setTo(Scalar::all(0));
-
-    statePost.create(DP, 1, type);
-    statePost.setTo(Scalar::all(0));
-
-    transitionMatrix.create(DP, DP, type);
-    setIdentity(transitionMatrix, 1);
-
-    processNoiseCov.create(DP, DP, type);
-    setIdentity(processNoiseCov, 1);
-
-    measurementNoiseCov.create(MP, MP, type);
-    setIdentity(measurementNoiseCov, 1);
-
-    measurementMatrix.create(MP, DP, type);
-    measurementMatrix.setTo(Scalar::all(0));
-
-    errorCovPre.create(DP, DP, type);
-    errorCovPre.setTo(Scalar::all(0));
-
-    errorCovPost.create(DP, DP, type);
-    errorCovPost.setTo(Scalar::all(0));
-
-    gain.create(DP, MP, type);
-    gain.setTo(Scalar::all(0));
-
-    if( CP > 0 )
-    {
-        controlMatrix.create(DP, CP, type);
-        controlMatrix.setTo(Scalar::all(0));
-    }
-    else
-        controlMatrix.release();
-
-    temp1.create(DP, DP, type);
-    temp2.create(MP, DP, type);
-    temp3.create(MP, MP, type);
-    temp4.create(MP, DP, type);
-    temp5.create(MP, 1, type);
-}
-
-CV_EXPORTS const oclMat& KalmanFilter::predict(const oclMat& control)
-{
-    gemm(transitionMatrix, statePost, 1, oclMat(), 0, statePre);
-    oclMat temp;
-
-    if(control.data)
-        gemm(controlMatrix, control, 1, statePre, 1, statePre);
-    gemm(transitionMatrix, errorCovPost, 1, oclMat(), 0, temp1);
-    gemm(temp1, transitionMatrix, 1, processNoiseCov, 1, errorCovPre, GEMM_2_T);
-    statePre.copyTo(statePost);
-    return statePre;
-}
-
-CV_EXPORTS const oclMat& KalmanFilter::correct(const oclMat& measurement)
-{
-    CV_Assert(measurement.empty() == false);
-    gemm(measurementMatrix, errorCovPre, 1, oclMat(), 0, temp2);
-    gemm(temp2, measurementMatrix, 1, measurementNoiseCov, 1, temp3, GEMM_2_T);
-    Mat temp;
-    solve(Mat(temp3), Mat(temp2), temp, DECOMP_SVD);
-    temp4.upload(temp);
-    gain = temp4.t();
-    gemm(measurementMatrix, statePre, -1, measurement, 1, temp5);
-    gemm(gain, temp5, 1, statePre, 1, statePost);
-    gemm(gain, temp2, -1, errorCovPre, 1, errorCovPost);
-    return statePost;
-}
diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp
deleted file mode 100644
index 9a5b193..0000000
--- a/modules/ocl/src/kmeans.cpp
+++ /dev/null
@@ -1,451 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//     Xiaopeng Fu, fuxiaopeng2222@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-static void generateRandomCenter(const std::vector<Vec2f>& box, float* center, RNG& rng)
-{
-    size_t j, dims = box.size();
-    float margin = 1.f/dims;
-    for( j = 0; j < dims; j++ )
-        center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0];
-}
-
-// This class is copied from matrix.cpp in core module.
-class KMeansPPDistanceComputer : public ParallelLoopBody
-{
-public:
-    KMeansPPDistanceComputer( float *_tdist2,
-                              const float *_data,
-                              const float *_dist,
-                              int _dims,
-                              size_t _step,
-                              size_t _stepci )
-        : tdist2(_tdist2),
-          data(_data),
-          dist(_dist),
-          dims(_dims),
-          step(_step),
-          stepci(_stepci) { }
-
-    void operator()( const cv::Range& range ) const
-    {
-        const int begin = range.start;
-        const int end = range.end;
-
-        for ( int i = begin; i<end; i++ )
-        {
-            tdist2[i] = std::min(normL2Sqr_(data + step*i, data + stepci, dims), dist[i]);
-        }
-    }
-
-private:
-    KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC
-
-    float *tdist2;
-    const float *data;
-    const float *dist;
-    const int dims;
-    const size_t step;
-    const size_t stepci;
-};
-/*
-k-means center initialization using the following algorithm:
-Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding
-*/
-static void generateCentersPP(const Mat& _data, Mat& _out_centers,
-                              int K, RNG& rng, int trials)
-{
-    int i, j, k, dims = _data.cols, N = _data.rows;
-    const float* data = (float*)_data.data;
-    size_t step = _data.step/sizeof(data[0]);
-    std::vector<int> _centers(K);
-    int* centers = &_centers[0];
-    std::vector<float> _dist(N*3);
-    float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N;
-    double sum0 = 0;
-
-    centers[0] = (unsigned)rng % N;
-
-    for( i = 0; i < N; i++ )
-    {
-        dist[i] = normL2Sqr_(data + step*i, data + step*centers[0], dims);
-        sum0 += dist[i];
-    }
-
-    for( k = 1; k < K; k++ )
-    {
-        double bestSum = DBL_MAX;
-        int bestCenter = -1;
-
-        for( j = 0; j < trials; j++ )
-        {
-            double p = (double)rng*sum0, s = 0;
-            for( i = 0; i < N-1; i++ )
-                if( (p -= dist[i]) <= 0 )
-                    break;
-            int ci = i;
-
-            parallel_for_(Range(0, N),
-                          KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci));
-            for( i = 0; i < N; i++ )
-            {
-                s += tdist2[i];
-            }
-
-            if( s < bestSum )
-            {
-                bestSum = s;
-                bestCenter = ci;
-                std::swap(tdist, tdist2);
-            }
-        }
-        centers[k] = bestCenter;
-        sum0 = bestSum;
-        std::swap(dist, tdist);
-    }
-
-    for( k = 0; k < K; k++ )
-    {
-        const float* src = data + step*centers[k];
-        float* dst = _out_centers.ptr<float>(k);
-        for( j = 0; j < dims; j++ )
-            dst[j] = src[j];
-    }
-}
-
-void cv::ocl::distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType)
-{
-    CV_Assert(src.cols * src.channels() == centers.cols * centers.channels());
-    CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F);
-    CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR);
-
-    dists.create(src.rows, 1, CV_32FC1);
-    labels.create(src.rows, 1, CV_32SC1);
-
-    std::stringstream build_opt_ss;
-    build_opt_ss << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST");
-
-    int src_step = src.step / src.elemSize1();
-    int centers_step = centers.step / centers.elemSize1();
-    int feature_width = centers.cols * centers.oclchannels();
-    int src_offset = src.offset / src.elemSize1();
-    int centers_offset = centers.offset / centers.elemSize1();
-
-    int all_dist_count = src.rows * centers.rows;
-    oclMat all_dist(1, all_dist_count, CV_32FC1);
-
-    std::vector<std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&centers.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&all_dist.data));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&feature_width));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&centers_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&centers.rows));
-
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&centers_offset));
-
-    size_t globalThreads[3] = { all_dist_count, 1, 1 };
-
-    openCLExecuteKernel(Context::getContext(), &kmeans_kernel,
-                        "distanceToCenters", globalThreads, NULL, args, -1, -1, build_opt_ss.str().c_str());
-
-    Mat all_dist_cpu;
-    all_dist.download(all_dist_cpu);
-
-    for (int i = 0; i < src.rows; ++i)
-    {
-        Point p;
-        double minVal;
-
-        Rect roi(i * centers.rows, 0, centers.rows, 1);
-        Mat hdr(all_dist_cpu, roi);
-
-        cv::minMaxLoc(hdr, &minVal, NULL, &p);
-
-        dists.at<float>(i, 0) = static_cast<float>(minVal);
-        labels.at<int>(i, 0) = p.x;
-    }
-}
-
-///////////////////////////////////k - means /////////////////////////////////////////////////////////
-
-double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
-                       TermCriteria criteria, int attempts, int flags, oclMat &_centers)
-{
-    const int SPP_TRIALS = 3;
-    bool isrow = _src.rows == 1 && _src.oclchannels() > 1;
-    int N = !isrow ? _src.rows : _src.cols;
-    int dims = (!isrow ? _src.cols : 1) * _src.oclchannels();
-    int type = _src.depth();
-
-    attempts = std::max(attempts, 1);
-    CV_Assert(type == CV_32F && K > 0 );
-    CV_Assert( N >= K );
-
-    Mat _labels;
-    if( flags & KMEANS_USE_INITIAL_LABELS )
-    {
-        CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
-                   _bestLabels.cols * _bestLabels.rows == N &&
-                   _bestLabels.type() == CV_32S );
-        _bestLabels.download(_labels);
-    }
-    else
-    {
-        if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
-                _bestLabels.cols * _bestLabels.rows == N &&
-                _bestLabels.type() == CV_32S &&
-                _bestLabels.isContinuous()))
-            _bestLabels.create(N, 1, CV_32S);
-        _labels.create(_bestLabels.size(), _bestLabels.type());
-    }
-    int* labels = _labels.ptr<int>();
-
-    Mat data;
-    _src.download(data);
-    Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
-    std::vector<int> counters(K);
-    std::vector<Vec2f> _box(dims);
-    Vec2f* box = &_box[0];
-    double best_compactness = DBL_MAX, compactness = 0;
-    RNG& rng = theRNG();
-    int a, iter, i, j, k;
-
-    if( criteria.type & TermCriteria::EPS )
-        criteria.epsilon = std::max(criteria.epsilon, 0.);
-    else
-        criteria.epsilon = FLT_EPSILON;
-    criteria.epsilon *= criteria.epsilon;
-
-    if( criteria.type & TermCriteria::COUNT )
-        criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100);
-    else
-        criteria.maxCount = 100;
-
-    if( K == 1 )
-    {
-        attempts = 1;
-        criteria.maxCount = 2;
-    }
-
-    const float* sample = data.ptr<float>();
-    for( j = 0; j < dims; j++ )
-        box[j] = Vec2f(sample[j], sample[j]);
-
-    for( i = 1; i < N; i++ )
-    {
-        sample = data.ptr<float>(i);
-        for( j = 0; j < dims; j++ )
-        {
-            float v = sample[j];
-            box[j][0] = std::min(box[j][0], v);
-            box[j][1] = std::max(box[j][1], v);
-        }
-    }
-
-    for( a = 0; a < attempts; a++ )
-    {
-        double max_center_shift = DBL_MAX;
-        for( iter = 0;; )
-        {
-            swap(centers, old_centers);
-
-            if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) )
-            {
-                if( flags & KMEANS_PP_CENTERS )
-                    generateCentersPP(data, centers, K, rng, SPP_TRIALS);
-                else
-                {
-                    for( k = 0; k < K; k++ )
-                        generateRandomCenter(_box, centers.ptr<float>(k), rng);
-                }
-            }
-            else
-            {
-                if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) )
-                {
-                    for( i = 0; i < N; i++ )
-                        CV_Assert( (unsigned)labels[i] < (unsigned)K );
-                }
-
-                // compute centers
-                centers = Scalar(0);
-                for( k = 0; k < K; k++ )
-                    counters[k] = 0;
-
-                for( i = 0; i < N; i++ )
-                {
-                    sample = data.ptr<float>(i);
-                    k = labels[i];
-                    float* center = centers.ptr<float>(k);
-                    j=0;
-#if CV_ENABLE_UNROLLED
-                    for(; j <= dims - 4; j += 4 )
-                    {
-                        float t0 = center[j] + sample[j];
-                        float t1 = center[j+1] + sample[j+1];
-
-                        center[j] = t0;
-                        center[j+1] = t1;
-
-                        t0 = center[j+2] + sample[j+2];
-                        t1 = center[j+3] + sample[j+3];
-
-                        center[j+2] = t0;
-                        center[j+3] = t1;
-                    }
-#endif
-                    for( ; j < dims; j++ )
-                        center[j] += sample[j];
-                    counters[k]++;
-                }
-
-                if( iter > 0 )
-                    max_center_shift = 0;
-
-                for( k = 0; k < K; k++ )
-                {
-                    if( counters[k] != 0 )
-                        continue;
-
-                    // if some cluster appeared to be empty then:
-                    //   1. find the biggest cluster
-                    //   2. find the farthest from the center point in the biggest cluster
-                    //   3. exclude the farthest point from the biggest cluster and form a new 1-point cluster.
-                    int max_k = 0;
-                    for( int k1 = 1; k1 < K; k1++ )
-                    {
-                        if( counters[max_k] < counters[k1] )
-                            max_k = k1;
-                    }
-
-                    double max_dist = 0;
-                    int farthest_i = -1;
-                    float* new_center =  centers.ptr<float>(k);
-                    float* old_center =  centers.ptr<float>(max_k);
-                    float* _old_center = temp.ptr<float>(); // normalized
-                    float scale = 1.f/counters[max_k];
-                    for( j = 0; j < dims; j++ )
-                        _old_center[j] = old_center[j]*scale;
-
-                    for( i = 0; i < N; i++ )
-                    {
-                        if( labels[i] != max_k )
-                            continue;
-                        sample = data.ptr<float>(i);
-                        double dist = normL2Sqr_(sample, _old_center, dims);
-
-                        if( max_dist <= dist )
-                        {
-                            max_dist = dist;
-                            farthest_i = i;
-                        }
-                    }
-
-                    counters[max_k]--;
-                    counters[k]++;
-                    labels[farthest_i] = k;
-                    sample = data.ptr<float>(farthest_i);
-
-                    for( j = 0; j < dims; j++ )
-                    {
-                        old_center[j] -= sample[j];
-                        new_center[j] += sample[j];
-                    }
-                }
-
-                for( k = 0; k < K; k++ )
-                {
-                    float* center = centers.ptr<float>(k);
-                    CV_Assert( counters[k] != 0 );
-
-                    float scale = 1.f/counters[k];
-                    for( j = 0; j < dims; j++ )
-                        center[j] *= scale;
-
-                    if( iter > 0 )
-                    {
-                        double dist = 0;
-                        const float* old_center = old_centers.ptr<float>(k);
-                        for( j = 0; j < dims; j++ )
-                        {
-                            double t = center[j] - old_center[j];
-                            dist += t*t;
-                        }
-                        max_center_shift = std::max(max_center_shift, dist);
-                    }
-                }
-            }
-
-            if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon )
-                break;
-
-            // assign labels
-            Mat dists(1, N, CV_64F);
-            _centers.upload(centers);
-            distanceToCenters(_src, _centers, dists, _labels);
-            _bestLabels.upload(_labels);
-
-            float* dist = dists.ptr<float>(0);
-            compactness = 0;
-            for( i = 0; i < N; i++ )
-                compactness += (double)dist[i];
-        }
-
-        if( compactness < best_compactness )
-            best_compactness = compactness;
-    }
-
-    return best_compactness;
-}
diff --git a/modules/ocl/src/knearest.cpp b/modules/ocl/src/knearest.cpp
deleted file mode 100644
index 143e7aa..0000000
--- a/modules/ocl/src/knearest.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma, jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-KNearestNeighbour::KNearestNeighbour()
-{
-    clear();
-}
-
-KNearestNeighbour::~KNearestNeighbour()
-{
-    clear();
-    samples_ocl.release();
-}
-
-void KNearestNeighbour::clear()
-{
-    CvKNearest::clear();
-}
-
-bool KNearestNeighbour::train(const Mat& trainData, Mat& labels, Mat& sampleIdx,
-                              bool isRegression, int _max_k, bool updateBase)
-{
-    max_k = _max_k;
-    bool cv_knn_train = CvKNearest::train(trainData, labels, sampleIdx, isRegression, max_k, updateBase);
-
-    CvVectors* s = CvKNearest::samples;
-
-    cv::Mat samples_mat(s->count, CvKNearest::var_count + 1, s->type);
-
-    float* s1 = (float*)(s + 1);
-    for(int i = 0; i < s->count; i++)
-    {
-        float* t1 = s->data.fl[i];
-        for(int j = 0; j < CvKNearest::var_count; j++)
-        {
-            Point pos(j, i);
-            samples_mat.at<float>(pos) = t1[j];
-        }
-
-        Point pos_label(CvKNearest::var_count, i);
-        samples_mat.at<float>(pos_label) = s1[i];
-    }
-
-    samples_ocl = samples_mat;
-    return cv_knn_train;
-}
-
-void KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lables)
-{
-    CV_Assert(!samples_ocl.empty());
-    lables.create(samples.rows, 1, CV_32FC1);
-
-    CV_Assert(samples.cols == CvKNearest::var_count);
-    CV_Assert(samples.type() == CV_32FC1);
-    CV_Assert(k >= 1 && k <= max_k);
-
-    int k1 = KNearest::get_sample_count();
-    k1 = MIN( k1, k );
-
-    String kernel_name = "knn_find_nearest";
-    cl_ulong local_memory_size = (cl_ulong)Context::getContext()->getDeviceInfo().localMemorySize;
-    int nThreads = local_memory_size / (2 * k * 4);
-    if(nThreads >= 256)
-        nThreads = 256;
-
-    int smem_size = nThreads * k * 4 * 2;
-    size_t local_thread[] = {1, nThreads, 1};
-    size_t global_thread[] = {1, samples.rows, 1};
-
-    char build_option[50];
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-    {
-        sprintf(build_option, " ");
-    }else
-        sprintf(build_option, "-D DOUBLE_SUPPORT");
-
-    std::vector< std::pair<size_t, const void*> > args;
-
-    int samples_ocl_step = samples_ocl.step/samples_ocl.elemSize();
-    int samples_step = samples.step/samples.elemSize();
-    int lables_step = lables.step/lables.elemSize();
-
-    int _regression = 0;
-    if(CvKNearest::regression)
-        _regression = 1;
-
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&samples.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&k));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&samples_ocl.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples_ocl.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples_ocl_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&lables.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&lables_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&_regression));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&k1));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples_ocl.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void*)&nThreads));
-    args.push_back(std::make_pair(smem_size, (void*)NULL));
-    openCLExecuteKernel(Context::getContext(), &knearest, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
-}
\ No newline at end of file
diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp
deleted file mode 100644
index c95ad84..0000000
--- a/modules/ocl/src/match_template.cpp
+++ /dev/null
@@ -1,570 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace cv
-{
-    namespace ocl
-    {
-        void matchTemplate_SQDIFF(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
-
-        void matchTemplate_SQDIFF_NORMED(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
-
-        void convolve_32F(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
-
-        void matchTemplate_CCORR(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
-
-        void matchTemplate_CCORR_NORMED(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
-
-        void matchTemplate_CCOFF(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
-
-        void matchTemplate_CCOFF_NORMED(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
-
-
-        void matchTemplateNaive_SQDIFF(
-            const oclMat &image, const oclMat &templ, oclMat &result, int cn);
-
-        void matchTemplateNaive_CCORR(
-            const oclMat &image, const oclMat &templ, oclMat &result, int cn);
-
-        void extractFirstChannel_32F(
-            const oclMat &image, oclMat &result);
-
-        // Evaluates optimal template's area threshold. If
-        // template's area is less  than the threshold, we use naive match
-        // template version, otherwise FFT-based (if available)
-        static bool useNaive(int method, int depth, Size size)
-        {
-#ifdef HAVE_CLAMDFFT
-            if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)))
-            {
-                return true;
-            }
-            else if(method == TM_CCORR || (method == TM_SQDIFF && depth == CV_8U))
-            {
-                return size.height < 18 && size.width < 18;
-            }
-            else
-                return false;
-#else
-#define UNUSED(x) (void)(x);
-            UNUSED(method) UNUSED(depth) UNUSED(size)
-#undef  UNUSED
-            return true;
-#endif
-        }
-
-        //////////////////////////////////////////////////////////////////////
-        // SQDIFF
-        void matchTemplate_SQDIFF(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf)
-        {
-            result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
-            if (useNaive(TM_SQDIFF, image.depth(), templ.size()))
-            {
-                matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
-                return;
-            }
-            else
-            {
-                buf.image_sqsums.resize(1);
-
-                // TODO, add double support for ocl::integral
-                // use CPU integral temporarily
-                Mat sums, sqsums;
-                cv::integral(Mat(image.reshape(1)), sums, sqsums);
-                buf.image_sqsums[0] = sqsums;
-
-                unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
-                matchTemplate_CCORR(image, templ, result, buf);
-
-                //port CUDA's matchTemplatePrepared_SQDIFF_8U
-                Context *clCxt = image.clCxt;
-                String kernelName = "matchTemplate_Prepared_SQDIFF";
-                std::vector< std::pair<size_t, const void *> > args;
-
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
-                args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
-
-                size_t globalThreads[3] = {result.cols, result.rows, 1};
-                size_t localThreads[3]  = {16, 16, 1};
-
-                const char * build_opt = image.oclchannels() == 4 ? "-D CN4" : "";
-                openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U, build_opt);
-            }
-        }
-
-        void matchTemplate_SQDIFF_NORMED(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
-        {
-            matchTemplate_CCORR(image, templ, result, buf);
-            buf.image_sums.resize(1);
-
-            integral(image.reshape(1), buf.image_sums[0]);
-
-            unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
-
-            Context *clCxt = image.clCxt;
-            String kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
-            std::vector< std::pair<size_t, const void *> > args;
-
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
-            args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
-
-            size_t globalThreads[3] = {result.cols, result.rows, 1};
-            size_t localThreads[3]  = {16, 16, 1};
-            openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
-        }
-
-        void matchTemplateNaive_SQDIFF(
-            const oclMat &image, const oclMat &templ, oclMat &result, int)
-        {
-            CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
-                      || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
-                     );
-            CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
-            CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
-
-            Context *clCxt = image.clCxt;
-            String kernelName = "matchTemplate_Naive_SQDIFF";
-
-            std::vector< std::pair<size_t, const void *> > args;
-
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
-
-            size_t globalThreads[3] = {result.cols, result.rows, 1};
-            size_t localThreads[3]  = {16, 16, 1};
-            openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
-        }
-
-        //////////////////////////////////////////////////////////////////////
-        // CCORR
-        void convolve_32F(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
-        {
-            ConvolveBuf convolve_buf;
-            convolve_buf.user_block_size = buf.user_block_size;
-            if (image.oclchannels() == 1)
-                convolve(image, templ, result, true, convolve_buf);
-            else
-            {
-                oclMat result_;
-                convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf);
-                extractFirstChannel_32F(result_, result);
-            }
-        }
-
-        void matchTemplate_CCORR(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
-        {
-            result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
-            if (useNaive(TM_CCORR, image.depth(), templ.size()))
-            {
-                matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
-                return;
-            }
-            else
-            {
-                if(image.depth() == CV_8U && templ.depth() == CV_8U)
-                {
-                    image.convertTo(buf.imagef, CV_32F);
-                    templ.convertTo(buf.templf, CV_32F);
-                    convolve_32F(buf.imagef, buf.templf, result, buf);
-                }
-                else
-                {
-                    convolve_32F(image, templ, result, buf);
-                }
-            }
-        }
-
-        void matchTemplate_CCORR_NORMED(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
-        {
-            cv::ocl::oclMat temp;
-            matchTemplate_CCORR(image, templ, result, buf);
-            buf.image_sums.resize(1);
-            buf.image_sqsums.resize(1);
-            integral(image.reshape(1), buf.image_sums[0], temp);
-            if(temp.depth() == CV_64F)
-                temp.convertTo(buf.image_sqsums[0], CV_32FC1);
-            else
-                buf.image_sqsums[0] = temp;
-            unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
-
-            Context *clCxt = image.clCxt;
-            String kernelName = "normalizeKernel";
-            std::vector< std::pair<size_t, const void *> > args;
-
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
-            args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
-
-            size_t globalThreads[3] = {result.cols, result.rows, 1};
-            size_t localThreads[3]  = {16, 16, 1};
-            openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
-        }
-
-        void matchTemplateNaive_CCORR(
-            const oclMat &image, const oclMat &templ, oclMat &result, int)
-        {
-            CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
-                      || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
-                     );
-            CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
-            CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
-
-            Context *clCxt = image.clCxt;
-            String kernelName = "matchTemplate_Naive_CCORR";
-
-            std::vector< std::pair<size_t, const void *> > args;
-
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
-
-            size_t globalThreads[3] = {result.cols, result.rows, 1};
-            size_t localThreads[3]  = {16, 16, 1};
-            openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
-        }
-        //////////////////////////////////////////////////////////////////////
-        // CCOFF
-        void matchTemplate_CCOFF(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
-        {
-            CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
-
-            matchTemplate_CCORR(image, templ, result, buf);
-
-            Context *clCxt = image.clCxt;
-            String kernelName;
-
-            kernelName = "matchTemplate_Prepared_CCOFF";
-            size_t globalThreads[3] = {result.cols, result.rows, 1};
-            size_t localThreads[3]  = {16, 16, 1};
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
-            Vec4f templ_sum = Vec4f::all(0);
-            // to be continued in the following section
-            if(image.oclchannels() == 1)
-            {
-                buf.image_sums.resize(1);
-                integral(image, buf.image_sums[0]);
-
-                templ_sum[0] = (float)sum(templ)[0] / templ.size().area();
-                args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
-                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
-                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
-                args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
-            }
-            else
-            {
-
-                split(image, buf.images);
-                templ_sum = sum(templ) / templ.size().area();
-                buf.image_sums.resize(buf.images.size());
-
-
-                for(int i = 0; i < image.oclchannels(); i ++)
-                {
-                    integral(buf.images[i], buf.image_sums[i]);
-                }
-                switch(image.oclchannels())
-                {
-                case 4:
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
-                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
-                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
-                    break;
-                default:
-                    CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
-                    break;
-                }
-            }
-            openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
-        }
-
-        void matchTemplate_CCOFF_NORMED(
-            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
-        {
-            image.convertTo(buf.imagef, CV_32F);
-            templ.convertTo(buf.templf, CV_32F);
-
-            matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
-            float scale = 1.f / templ.size().area();
-
-            Context *clCxt = image.clCxt;
-            String kernelName;
-
-            kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
-            size_t globalThreads[3] = {result.cols, result.rows, 1};
-            size_t localThreads[3]  = {16, 16, 1};
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
-            args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale) );
-
-            Vec4f templ_sum   = Vec4f::all(0);
-            Vec4f templ_sqsum = Vec4f::all(0);
-            // to be continued in the following section
-            if(image.oclchannels() == 1)
-            {
-                buf.image_sums.resize(1);
-                buf.image_sqsums.resize(1);
-                cv::ocl::oclMat temp;
-                integral(image, buf.image_sums[0], temp);
-                if(temp.depth() == CV_64F)
-                    temp.convertTo(buf.image_sqsums[0], CV_32FC1);
-                else
-                    buf.image_sqsums[0] = temp;
-
-                templ_sum[0]   = (float)sum(templ)[0];
-
-                templ_sqsum[0] = sqrSum(templ)[0];
-
-                templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0];
-                templ_sum[0]   *= scale;
-
-                args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
-                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
-                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
-                args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
-                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
-                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
-                args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
-                args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
-            }
-            else
-            {
-
-                split(image, buf.images);
-                templ_sum   = sum(templ);
-
-                templ_sqsum = sqrSum(templ);
-
-                templ_sqsum -= scale * templ_sum * templ_sum;
-
-                float templ_sqsum_sum = 0;
-                for(int i = 0; i < image.oclchannels(); i ++)
-                {
-                    templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
-                }
-                templ_sum   *= scale;
-                buf.image_sums.resize(buf.images.size());
-                buf.image_sqsums.resize(buf.images.size());
-                cv::ocl::oclMat temp;
-                for(int i = 0; i < image.oclchannels(); i ++)
-                {
-                    integral(buf.images[i], buf.image_sums[i], temp);
-                    if(temp.depth() == CV_64F)
-                        temp.convertTo(buf.image_sqsums[i], CV_32FC1);
-                    else
-                        buf.image_sqsums[i] = temp;
-                }
-
-                switch(image.oclchannels())
-                {
-                case 4:
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
-                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
-                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[1].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[2].data) );
-                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[3].data) );
-                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
-                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
-                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
-                    break;
-                default:
-                    CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
-                    break;
-                }
-            }
-            openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
-        }
-        void extractFirstChannel_32F(const oclMat &image, oclMat &result)
-        {
-            Context *clCxt = image.clCxt;
-            String kernelName;
-
-            kernelName = "extractFirstChannel";
-            size_t globalThreads[3] = {result.cols, result.rows, 1};
-            size_t localThreads[3]  = {16, 16, 1};
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data) );
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
-
-            openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1);
-        }
-    }/*ocl*/
-} /*cv*/
-
-void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
-{
-    MatchTemplateBuf buf;
-    matchTemplate(image, templ, result, method, buf);
-}
-void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
-{
-    CV_Assert(image.type() == templ.type());
-    CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
-
-    typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
-
-    const Caller callers[] =
-    {
-        ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
-        ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
-        ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
-    };
-
-    Caller caller = callers[method];
-    CV_Assert(caller);
-    caller(image, templ, result, buf);
-}
diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp
deleted file mode 100644
index c028fb7..0000000
--- a/modules/ocl/src/matrix_operations.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-#define ALIGN 32
-#define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN
-
-// helper routines
-namespace cv
-{
-    namespace ocl
-    {
-        extern DevMemType gDeviceMemType;
-        extern DevMemRW gDeviceMemRW;
-    }
-}
-
-////////////////////////////////////////////////////////////////////////
-// convert_C3C4
-
-static void convert_C3C4(const cl_mem &src, oclMat &dst)
-{
-    Context *clCxt = dst.clCxt;
-    int pixel_end = dst.wholecols * dst.wholerows - 1;
-    int dstStep_in_pixel = dst.step1() / dst.oclchannels();
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[dst.depth()]);
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholecols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholerows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));
-
-    size_t globalThreads[3] = { divUp(dst.wholecols * dst.wholerows, 4), 1, 1 };
-
-#ifdef ANDROID
-    openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, NULL,
-                        args, -1, -1, buildOptions.c_str());
-#else
-    size_t localThreads[3] = { 256, 1, 1 };
-    openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, localThreads,
-                        args, -1, -1, buildOptions.c_str());
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////
-// convert_C4C3
-
-static void convert_C4C3(const oclMat &src, cl_mem &dst)
-{
-    int srcStep_in_pixel = src.step1() / src.oclchannels();
-    int pixel_end = src.wholecols * src.wholerows - 1;
-    Context *clCxt = src.clCxt;
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[src.depth()]);
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholecols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholerows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));
-
-    size_t globalThreads[3] = { divUp(src.wholecols * src.wholerows, 4), 1, 1};
-
-#ifdef ANDROID
-    openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, NULL, args, -1, -1, buildOptions.c_str());
-#else
-    size_t localThreads[3] = { 256, 1, 1};
-    openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
-#endif
-}
-
-void cv::ocl::oclMat::upload(const Mat &m)
-{
-    if (!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && m.depth() == CV_64F)
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    CV_DbgAssert(!m.empty());
-    Size wholeSize;
-    Point ofs;
-    m.locateROI(wholeSize, ofs);
-    create(wholeSize, m.type());
-
-    if (m.channels() == 3)
-    {
-        int pitch = wholeSize.width * 3 * m.elemSize1();
-        int tail_padding = m.elemSize1() * 3072;
-        int err;
-        cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE,
-                                     (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
-        openCLVerifyCall(err);
-
-        openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
-        convert_C3C4(temp, *this);
-        openCLSafeCall(clReleaseMemObject(temp));
-    }
-    else
-        openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
-
-    rows = m.rows;
-    cols = m.cols;
-    offset = ofs.y * step + ofs.x * elemSize();
-}
-
-cv::ocl::oclMat::operator cv::_InputArray()
-{
-    return _InputArray(cv::_InputArray::OCL_MAT, this);
-}
-
-cv::ocl::oclMat::operator cv::_OutputArray()
-{
-    return _OutputArray(cv::_InputArray::OCL_MAT, this);
-}
-
-cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src)
-{
-    CV_Assert(src.kind() == cv::_InputArray::OCL_MAT);
-    return *(oclMat*)src.getObj();
-}
-
-cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
-{
-    CV_Assert(src.kind() == cv::_InputArray::OCL_MAT);
-    return *(oclMat*)src.getObj();
-}
-
-void cv::ocl::oclMat::download(cv::Mat &m) const
-{
-    CV_DbgAssert(!this->empty());
-    m.create(wholerows, wholecols, type());
-
-    if(m.channels() == 3)
-    {
-        int pitch = wholecols * 3 * m.elemSize1();
-        int tail_padding = m.elemSize1() * 3072;
-        int err;
-        cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE,
-                                     (pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
-        openCLVerifyCall(err);
-
-        convert_C4C3(*this, temp);
-        openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
-        openCLSafeCall(clReleaseMemObject(temp));
-    }
-    else
-    {
-        openCLMemcpy2D(clCxt, m.data, m.step, data, step, wholecols * elemSize(), wholerows, clMemcpyDeviceToHost);
-    }
-
-    Size wholesize;
-    Point ofs;
-    locateROI(wholesize, ofs);
-    m.adjustROI(-ofs.y, ofs.y + rows - wholerows, -ofs.x, ofs.x + cols - wholecols);
-}
-
-///////////////////////////////////////////////////////////////////////////
-////////////////////////////////// CopyTo /////////////////////////////////
-///////////////////////////////////////////////////////////////////////////
-static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, String kernelName)
-{
-    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
-                  src.rows == dst.rows && src.cols == dst.cols
-                  && mask.type() == CV_8UC1);
-
-    std::vector<std::pair<size_t , const void *> > args;
-
-    String string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
-        {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
-        {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
-        {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
-    };
-
-    char compile_option[32];
-    sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
-    size_t localThreads[3] = {16, 16, 1};
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
-    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
-    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
-
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.step ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.offset ));
-
-    openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
-                        localThreads, args, -1, -1, compile_option);
-}
-
-void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const
-{
-    if (mask.empty())
-    {
-        CV_DbgAssert(!this->empty());
-        mat.create(size(), type());
-        openCLCopyBuffer2D(clCxt, mat.data, mat.step, mat.offset,
-                           data, step, cols * elemSize(), rows, offset);
-    }
-    else
-    {
-        mat.create(size(), type());
-        copy_to_with_mask(*this, mat, mask, "copy_to_with_mask");
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////
-//////////////////////////////// ConvertTo ////////////////////////////////
-///////////////////////////////////////////////////////////////////////////
-
-static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta)
-{
-    String kernelName = "convert_to";
-    float alpha_f = alpha, beta_f = beta;
-    int sdepth = src.depth(), ddepth = dst.depth();
-    int sstep1 = (int)src.step1(), dstep1 = (int)dst.step1();
-    int cols1 = src.cols * src.oclchannels();
-
-    char buildOptions[150], convertString[50];
-    const char * typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    sprintf(convertString, "convert_%s_sat_rte", typeMap[ddepth]);
-    sprintf(buildOptions, "-D srcT=%s -D dstT=%s -D convertToDstType=%s", typeMap[sdepth],
-            typeMap[ddepth], CV_32F == ddepth || ddepth == CV_64F ? "" : convertString);
-
-    CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
-    std::vector<std::pair<size_t , const void *> > args;
-
-    size_t localThreads[3] = { 16, 16, 1 };
-    size_t globalThreads[3] = { divUp(cols1, localThreads[0]) * localThreads[0],
-                                divUp(dst.rows, localThreads[1]) * localThreads[1], 1 };
-
-    int doffset1 = dst.offset / dst.elemSize1();
-    int soffset1 = src.offset / src.elemSize1();
-
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols1 ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sstep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&soffset1 ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstep1 ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&doffset1 ));
-    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f ));
-    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f ));
-
-    openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
-                        localThreads, args, -1, -1, buildOptions);
-}
-
-void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
-{
-    if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) &&
-            (depth() == CV_64F || dst.depth() == CV_64F))
-    {
-        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-        return;
-    }
-
-    bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
-                   && fabs(beta) < std::numeric_limits<double>::epsilon();
-
-    if( rtype < 0 )
-        rtype = type();
-    else
-        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
-
-    int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
-    if( sdepth == ddepth && noScale )
-    {
-        copyTo(dst);
-        return;
-    }
-
-    oclMat temp;
-    const oclMat *psrc = this;
-    if( sdepth != ddepth && psrc == &dst )
-        psrc = &(temp = *this);
-
-    dst.create( size(), rtype );
-    convert_run(*psrc, dst, alpha, beta);
-}
-
-///////////////////////////////////////////////////////////////////////////
-//////////////////////////////// setTo ////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////
-
-oclMat &cv::ocl::oclMat::operator = (const Scalar &s)
-{
-    setTo(s);
-    return *this;
-}
-
-#ifdef CL_VERSION_1_2
-
-template <typename CLT, typename PT>
-static std::vector<uchar> cvt1(const cv::Scalar & s)
-{
-    std::vector<uchar> _buf(sizeof(CLT));
-    CLT * const buf = reinterpret_cast<CLT *>(&_buf[0]);
-    buf[0] = saturate_cast<PT>(s[0]);
-    return _buf;
-}
-
-template <typename CLT, typename PT>
-static std::vector<uchar> cvt2(const cv::Scalar & s)
-{
-    std::vector<uchar> _buf(sizeof(CLT));
-    CLT * const buf = reinterpret_cast<CLT *>(&_buf[0]);
-    buf->s[0] = saturate_cast<PT>(s[0]);
-    buf->s[1] = saturate_cast<PT>(s[1]);
-    return _buf;
-}
-
-template <typename CLT, typename PT>
-static std::vector<uchar> cvt4(const cv::Scalar & s)
-{
-    std::vector<uchar> _buf(sizeof(CLT));
-    CLT * const buf = reinterpret_cast<CLT *>(&_buf[0]);
-    buf->s[0] = saturate_cast<PT>(s[0]);
-    buf->s[1] = saturate_cast<PT>(s[1]);
-    buf->s[2] = saturate_cast<PT>(s[2]);
-    buf->s[3] = saturate_cast<PT>(s[3]);
-    return _buf;
-}
-
-typedef std::vector<uchar> (*ConvertFunc)(const cv::Scalar & s);
-
-static std::vector<uchar> scalarToCLVector(const cv::Scalar & s, int type)
-{
-    const int depth = CV_MAT_DEPTH(type);
-    const int channels = CV_MAT_CN(type);
-
-    static const ConvertFunc funcs[4][7] =
-    {
-        { cvt1<cl_uchar, uchar>, cvt1<cl_char, char>, cvt1<cl_ushort, ushort>, cvt1<cl_short, short>,
-          cvt1<cl_int, int>, cvt1<cl_float, float>, cvt1<cl_double, double> },
-
-        { cvt2<cl_uchar2, uchar>, cvt2<cl_char2, char>, cvt2<cl_ushort2, ushort>, cvt2<cl_short2, short>,
-          cvt2<cl_int2, int>, cvt2<cl_float2, float>, cvt2<cl_double2, double> },
-
-        { 0, 0, 0, 0, 0, 0, 0 },
-
-        { cvt4<cl_uchar4, uchar>, cvt4<cl_char4, char>, cvt4<cl_ushort4, ushort>, cvt4<cl_short4, short>,
-          cvt4<cl_int4, int>, cvt4<cl_float4, float>, cvt4<cl_double4, double> }
-    };
-
-    ConvertFunc func = funcs[channels - 1][depth];
-    return func(s);
-}
-
-#endif
-
-static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, String kernelName)
-{
-    std::vector<std::pair<size_t , const void *> > args;
-
-    size_t localThreads[3] = {16, 16, 1};
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
-
-    if (dst.type() == CV_8UC1)
-        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char channelMap[] = { ' ', ' ', '2', '4', '4' };
-    std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);
-
-    Mat mat(1, 1, dst.type(), scalar);
-
-#ifdef CL_VERSION_1_2
-    // this enables backwards portability to
-    // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
-    if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) && dst.isContinuous())
-    {
-        std::vector<uchar> p = ::scalarToCLVector(scalar, CV_MAKE_TYPE(dst.depth(), dst.oclchannels()));
-        clEnqueueFillBuffer(getClCommandQueue(dst.clCxt),
-                (cl_mem)dst.data, (void*)&p[0], p.size(),
-                0, dst.step * dst.rows, 0, NULL, NULL);
-    }
-    else
-#endif
-    {
-        oclMat m(mat);
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void*)&m.data ));
-        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
-        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
-
-        openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
-            localThreads, args, -1, -1, buildOptions.c_str());
-    }
-}
-
-static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, String kernelName)
-{
-    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
-    std::vector<std::pair<size_t , const void *> > args;
-    size_t localThreads[3] = { 16, 16, 1 };
-    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
-
-    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
-    const char channelMap[] = { ' ', ' ', '2', '4', '4' };
-    std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);
-
-    oclMat m(Mat(1, 1, dst.type(), scalar));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&m.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
-    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.step ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.offset ));
-    openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
-                        localThreads, args, -1, -1, buildOptions.c_str());
-}
-
-oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
-{
-    CV_Assert(mask.type() == CV_8UC1);
-    CV_Assert( this->depth() >= 0 && this->depth() <= 6 );
-    CV_DbgAssert( !this->empty());
-    if (mask.empty())
-    {
-        set_to_withoutmask_run(*this, scalar, type() == CV_8UC1 ?
-                                   "set_to_without_mask_C1_D0" : "set_to_without_mask");
-    }
-    else
-        set_to_withmask_run(*this, scalar, mask, "set_to_with_mask");
-
-    return *this;
-}
-
-oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
-{
-    if( new_rows != 0 && new_rows != rows)
-    {
-        CV_Error( Error::StsBadFunc, "oclMat's number of rows can not be changed for current version" );
-    }
-
-    oclMat hdr = *this;
-
-    int cn = oclchannels();
-    if (new_cn == 0)
-        new_cn = cn;
-
-    int total_width = cols * cn;
-    if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
-        new_rows = rows * total_width / new_cn;
-
-    if (new_rows != 0 && new_rows != rows)
-    {
-        int total_size = total_width * rows;
-
-        if (!isContinuous())
-            CV_Error(Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
-
-        if ((unsigned)new_rows > (unsigned)total_size)
-            CV_Error(Error::StsOutOfRange, "Bad new number of rows");
-
-        total_width = total_size / new_rows;
-        if (total_width * new_rows != total_size)
-            CV_Error(Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
-
-        hdr.rows = new_rows;
-        hdr.step = total_width * elemSize1();
-    }
-
-    int new_width = total_width / new_cn;
-    if (new_width * new_cn != total_width)
-        CV_Error(Error::BadNumChannels, "The total width is not divisible by the new number of channels");
-
-    hdr.cols = new_width;
-    hdr.wholecols = new_width;
-    hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
-    return hdr;
-
-}
-
-void cv::ocl::oclMat::createEx(Size size, int type,
-                               DevMemRW rw_type, DevMemType mem_type)
-{
-    createEx(size.height, size.width, type, rw_type, mem_type);
-}
-
-void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
-{
-    createEx(_rows, _cols, _type, gDeviceMemRW, gDeviceMemType);
-}
-
-void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type,
-                               DevMemRW rw_type, DevMemType mem_type)
-{
-    clCxt = Context::getContext();
-    /* core logic */
-    _type &= Mat::TYPE_MASK;
-    if( rows == _rows && cols == _cols && type() == _type && data )
-        return;
-    if( data )
-        release();
-    CV_DbgAssert( _rows >= 0 && _cols >= 0 );
-    if( _rows > 0 && _cols > 0 )
-    {
-        flags = Mat::MAGIC_VAL + _type;
-        rows = _rows;
-        cols = _cols;
-        wholerows = _rows;
-        wholecols = _cols;
-        size_t esz = elemSize();
-
-        void *dev_ptr;
-        openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type);
-
-        if (esz * cols == step)
-            flags |= Mat::CONTINUOUS_FLAG;
-
-        int64 _nettosize = (int64)step * rows;
-        size_t nettosize = (size_t)_nettosize;
-
-        datastart = data = (uchar *)dev_ptr;
-        dataend = data + nettosize;
-
-        refcount = (int *)fastMalloc(sizeof(*refcount));
-        *refcount = 1;
-    }
-}
-
-void cv::ocl::oclMat::release()
-{
-    if( refcount && CV_XADD(refcount, -1) == 1 )
-    {
-        fastFree(refcount);
-        openCLFree(datastart);
-    }
-    data = datastart = dataend = 0;
-    step = rows = cols = 0;
-    offset = wholerows = wholecols = 0;
-    refcount = 0;
-}
-
-oclMat& cv::ocl::oclMat::operator+=( const oclMat& m )
-{
-    add(*this, m, *this);
-    return *this;
-}
-
-oclMat& cv::ocl::oclMat::operator-=( const oclMat& m )
-{
-    subtract(*this, m, *this);
-    return *this;
-}
-
-oclMat& cv::ocl::oclMat::operator*=( const oclMat& m )
-{
-    multiply(*this, m, *this);
-    return *this;
-}
-
-oclMat& cv::ocl::oclMat::operator/=( const oclMat& m )
-{
-    divide(*this, m, *this);
-    return *this;
-}
diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp
deleted file mode 100644
index e5dfdd4..0000000
--- a/modules/ocl/src/mcwutil.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-using namespace std;
-
-namespace cv
-{
-    namespace ocl
-    {
-        // provide additional methods for the user to interact with the command queue after a task is fired
-        static void openCLExecuteKernel_2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
-                                   size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-                                   int depth, const char *build_options, FLUSH_MODE finish_mode)
-        {
-            //construct kernel name
-            //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
-            //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
-            std::stringstream idxStr;
-            if(channels != -1)
-                idxStr << "_C" << channels;
-            if(depth != -1)
-                idxStr << "_D" << depth;
-            kernelName += idxStr.str().c_str();
-
-            cl_kernel kernel;
-            kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
-
-            if ( localThreads != NULL)
-            {
-                globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
-                globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
-                globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
-
-                //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
-                cv::ocl::openCLVerifyKernel(clCxt, kernel,  localThreads);
-            }
-            for(size_t i = 0; i < args.size(); i ++)
-                openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
-
-            openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), kernel, 3, NULL, globalThreads,
-                                                  localThreads, 0, NULL, NULL));
-
-            switch(finish_mode)
-            {
-            case CLFINISH:
-                clFinish(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr());
-            case CLFLUSH:
-                clFlush(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr());
-                break;
-            case DISABLE:
-            default:
-                break;
-            }
-            openCLSafeCall(clReleaseKernel(kernel));
-        }
-
-        void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName,
-                                  size_t globalThreads[3], size_t localThreads[3],
-                                  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
-        {
-            openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
-                                 channels, depth, NULL, finish_mode);
-        }
-        void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName,
-                                  size_t globalThreads[3], size_t localThreads[3],
-                                  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, FLUSH_MODE finish_mode)
-
-        {
-            openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
-                                  build_options, finish_mode);
-        }
-
-        cl_mem bindTexture(const oclMat &mat)
-        {
-            cl_mem texture;
-            cl_image_format format;
-            int err;
-            int depth    = mat.depth();
-            int channels = mat.oclchannels();
-
-            switch(depth)
-            {
-            case CV_8U:
-                format.image_channel_data_type = CL_UNSIGNED_INT8;
-                break;
-            case CV_32S:
-                format.image_channel_data_type = CL_UNSIGNED_INT32;
-                break;
-            case CV_32F:
-                format.image_channel_data_type = CL_FLOAT;
-                break;
-            default:
-                CV_Error(-1, "Image forma is not supported");
-                break;
-            }
-            switch(channels)
-            {
-            case 1:
-                format.image_channel_order     = CL_R;
-                break;
-            case 3:
-                format.image_channel_order     = CL_RGB;
-                break;
-            case 4:
-                format.image_channel_order     = CL_RGBA;
-                break;
-            default:
-                CV_Error(-1, "Image format is not supported");
-                break;
-            }
-#ifdef CL_VERSION_1_2
-            //this enables backwards portability to
-            //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
-            if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2))
-            {
-                cl_image_desc desc;
-                desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
-                desc.image_width      = mat.cols;
-                desc.image_height     = mat.rows;
-                desc.image_depth      = 0;
-                desc.image_array_size = 1;
-                desc.image_row_pitch  = 0;
-                desc.image_slice_pitch = 0;
-                desc.buffer           = NULL;
-                desc.num_mip_levels   = 0;
-                desc.num_samples      = 0;
-                texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
-            }
-            else
-#endif
-            {
-                texture = clCreateImage2D(
-                    *(cl_context*)mat.clCxt->getOpenCLContextPtr(),
-                    CL_MEM_READ_WRITE,
-                    &format,
-                    mat.cols,
-                    mat.rows,
-                    0,
-                    NULL,
-                    &err);
-            }
-            size_t origin[] = { 0, 0, 0 };
-            size_t region[] = { mat.cols, mat.rows, 1 };
-
-            cl_mem devData;
-            if (mat.cols * mat.elemSize() != mat.step)
-            {
-                devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows
-                    * mat.elemSize(), NULL, NULL);
-                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
-                clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin,
-                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
-                clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
-            }
-            else
-            {
-                devData = (cl_mem)mat.data;
-            }
-
-            clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0);
-            if ((mat.cols * mat.elemSize() != mat.step))
-            {
-                clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
-                clReleaseMemObject(devData);
-            }
-
-            openCLSafeCall(err);
-            return texture;
-        }
-
-        Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
-        {
-            return makePtr<TextureCL>(bindTexture(mat), mat.rows, mat.cols, mat.type());
-        }
-
-        void releaseTexture(cl_mem& texture)
-        {
-            openCLFree(texture);
-        }
-    }//namespace ocl
-
-}//namespace cv
diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp
deleted file mode 100644
index 0ba6e8c..0000000
--- a/modules/ocl/src/moments.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma,  jin@multicorewareinc.com
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other Materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-
-#include "opencv2/imgproc/types_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
-
-#include "opencl_kernels.hpp"
-
-#if defined _MSC_VER
-#define snprintf sprintf_s
-#endif
-namespace cv
-{
-    namespace ocl
-    {
-        // The function calculates center of gravity and the central second order moments
-        static void icvCompleteMomentState( CvMoments* moments )
-        {
-            double cx = 0, cy = 0;
-            double mu20, mu11, mu02;
-
-            assert( moments != 0 );
-            moments->inv_sqrt_m00 = 0;
-
-            if( fabs(moments->m00) > DBL_EPSILON )
-            {
-                double inv_m00 = 1. / moments->m00;
-                cx = moments->m10 * inv_m00;
-                cy = moments->m01 * inv_m00;
-                moments->inv_sqrt_m00 = std::sqrt( fabs(inv_m00) );
-            }
-
-            // mu20 = m20 - m10*cx
-            mu20 = moments->m20 - moments->m10 * cx;
-            // mu11 = m11 - m10*cy
-            mu11 = moments->m11 - moments->m10 * cy;
-            // mu02 = m02 - m01*cy
-            mu02 = moments->m02 - moments->m01 * cy;
-
-            moments->mu20 = mu20;
-            moments->mu11 = mu11;
-            moments->mu02 = mu02;
-
-            // mu30 = m30 - cx*(3*mu20 + cx*m10)
-            moments->mu30 = moments->m30 - cx * (3 * mu20 + cx * moments->m10);
-            mu11 += mu11;
-            // mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
-            moments->mu21 = moments->m21 - cx * (mu11 + cx * moments->m01) - cy * mu20;
-            // mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
-            moments->mu12 = moments->m12 - cy * (mu11 + cy * moments->m10) - cx * mu02;
-            // mu03 = m03 - cy*(3*mu02 + cy*m01)
-            moments->mu03 = moments->m03 - cy * (3 * mu02 + cy * moments->m01);
-        }
-
-
-        static void icvContourMoments( CvSeq* contour, CvMoments* mom )
-        {
-            if( contour->total )
-            {
-                CvSeqReader reader;
-                int lpt = contour->total;
-                double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03;
-
-                cvStartReadSeq( contour, &reader, 0 );
-
-                size_t reader_size = lpt << 1;
-                cv::Mat reader_mat(1,reader_size,CV_32FC1);
-
-                bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
-
-                if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && is_float)
-                {
-                    CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
-                }
-
-                if( is_float )
-                {
-                    for(size_t i = 0; i < reader_size; ++i)
-                    {
-                        reader_mat.at<float>(0, i++) = ((CvPoint2D32f*)(reader.ptr))->x;
-                        reader_mat.at<float>(0, i) = ((CvPoint2D32f*)(reader.ptr))->y;
-                        CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
-                    }
-                }
-                else
-                {
-                    for(size_t i = 0; i < reader_size; ++i)
-                    {
-                        reader_mat.at<float>(0, i++) = ((CvPoint*)(reader.ptr))->x;
-                        reader_mat.at<float>(0, i) = ((CvPoint*)(reader.ptr))->y;
-                        CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
-                    }
-                }
-
-                cv::ocl::oclMat dst_a(10, lpt, CV_64FC1);
-                cv::ocl::oclMat reader_oclmat(reader_mat);
-                int llength = std::min(lpt,128);
-                size_t localThreads[3]  = { llength, 1, 1};
-                size_t globalThreads[3] = { lpt, 1, 1};
-                std::vector<std::pair<size_t , const void *> > args;
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&contour->total ));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a.data ));
-                cl_int dst_step = (cl_int)dst_a.step;
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step ));
-
-                char builOption[128];
-                snprintf(builOption, 128, "-D CV_8UC1");
-
-                openCLExecuteKernel(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1, builOption);
-
-                cv::Mat dst(dst_a);
-                a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
-                if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-                {
-                    for (int i = 0; i < contour->total; ++i)
-                    {
-                        a00 += dst.at<cl_long>(0, i);
-                        a10 += dst.at<cl_long>(1, i);
-                        a01 += dst.at<cl_long>(2, i);
-                        a20 += dst.at<cl_long>(3, i);
-                        a11 += dst.at<cl_long>(4, i);
-                        a02 += dst.at<cl_long>(5, i);
-                        a30 += dst.at<cl_long>(6, i);
-                        a21 += dst.at<cl_long>(7, i);
-                        a12 += dst.at<cl_long>(8, i);
-                        a03 += dst.at<cl_long>(9, i);
-                    }
-                }
-                else
-                {
-                    a00 = cv::sum(dst.row(0))[0];
-                    a10 = cv::sum(dst.row(1))[0];
-                    a01 = cv::sum(dst.row(2))[0];
-                    a20 = cv::sum(dst.row(3))[0];
-                    a11 = cv::sum(dst.row(4))[0];
-                    a02 = cv::sum(dst.row(5))[0];
-                    a30 = cv::sum(dst.row(6))[0];
-                    a21 = cv::sum(dst.row(7))[0];
-                    a12 = cv::sum(dst.row(8))[0];
-                    a03 = cv::sum(dst.row(9))[0];
-                }
-
-                double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
-                if( fabs(a00) > FLT_EPSILON )
-                {
-                    if( a00 > 0 )
-                    {
-                        db1_2 = 0.5;
-                        db1_6 = 0.16666666666666666666666666666667;
-                        db1_12 = 0.083333333333333333333333333333333;
-                        db1_24 = 0.041666666666666666666666666666667;
-                        db1_20 = 0.05;
-                        db1_60 = 0.016666666666666666666666666666667;
-                    }
-                    else
-                    {
-                        db1_2 = -0.5;
-                        db1_6 = -0.16666666666666666666666666666667;
-                        db1_12 = -0.083333333333333333333333333333333;
-                        db1_24 = -0.041666666666666666666666666666667;
-                        db1_20 = -0.05;
-                        db1_60 = -0.016666666666666666666666666666667;
-                    }
-
-                    // spatial moments
-                    mom->m00 = a00 * db1_2;
-                    mom->m10 = a10 * db1_6;
-                    mom->m01 = a01 * db1_6;
-                    mom->m20 = a20 * db1_12;
-                    mom->m11 = a11 * db1_24;
-                    mom->m02 = a02 * db1_12;
-                    mom->m30 = a30 * db1_20;
-                    mom->m21 = a21 * db1_60;
-                    mom->m12 = a12 * db1_60;
-                    mom->m03 = a03 * db1_20;
-
-                    icvCompleteMomentState( mom );
-                }
-            }
-        }
-
-        Moments ocl_moments(oclMat& src, bool binary) //for image
-        {
-            CV_Assert(src.oclchannels() == 1);
-            if(src.type() == CV_64FC1 && !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-            {
-                CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
-            }
-
-            if(binary)
-            {
-                oclMat mask;
-                if(src.type() != CV_8UC1)
-                {
-                    src.convertTo(mask, CV_8UC1);
-                }
-                oclMat src8u(src.size(), CV_8UC1);
-                src8u.setTo(Scalar(255), mask);
-                src = src8u;
-            }
-            const int TILE_SIZE = 256;
-
-            CvMoments mom;
-            memset(&mom, 0, sizeof(mom));
-
-            cv::Size size = src.size();
-            int blockx, blocky;
-            blockx = (size.width + TILE_SIZE - 1)/TILE_SIZE;
-            blocky = (size.height + TILE_SIZE - 1)/TILE_SIZE;
-
-            oclMat dst_m;
-            int tile_height = TILE_SIZE;
-
-            size_t localThreads[3]  = {1, tile_height, 1};
-            size_t globalThreads[3] = {blockx, size.height, 1};
-
-            if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-            {
-                dst_m.create(blocky * 10, blockx, CV_64FC1);
-            }else
-            {
-                dst_m.create(blocky * 10, blockx, CV_32FC1);
-            }
-
-            int src_step = (int)(src.step/src.elemSize());
-            int dstm_step = (int)(dst_m.step/dst_m.elemSize());
-
-            std::vector<std::pair<size_t , const void *> > args,args_sum;
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step ));
-            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstm_step ));
-
-            int binary_;
-            if(binary)
-                binary_ = 1;
-            else
-                binary_ = 0;
-            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary_));
-
-            char builOption[128];
-            if(binary || src.type() == CV_8UC1)
-            {
-                snprintf(builOption, 128, "-D CV_8UC1");
-            }else if(src.type() == CV_16UC1)
-            {
-                snprintf(builOption, 128, "-D CV_16UC1");
-            }else if(src.type() == CV_16SC1)
-            {
-                snprintf(builOption, 128, "-D CV_16SC1");
-            }else if(src.type() == CV_32FC1)
-            {
-                snprintf(builOption, 128, "-D CV_32FC1");
-            }else if(src.type() == CV_64FC1)
-            {
-                snprintf(builOption, 128, "-D CV_64FC1");
-            }else
-            {
-                CV_Error( CV_StsUnsupportedFormat, "" );
-            }
-
-            openCLExecuteKernel(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, -1, builOption);
-
-            Mat tmp(dst_m);
-            tmp.convertTo(tmp, CV_64FC1);
-
-            double tmp_m[10] = {0};
-
-            for(int j = 0; j < tmp.rows; j += 10)
-            {
-                for(int i = 0; i < tmp.cols; i++)
-                {
-                    tmp_m[0] += tmp.at<double>(j, i);
-                    tmp_m[1] += tmp.at<double>(j + 1, i);
-                    tmp_m[2] += tmp.at<double>(j + 2, i);
-                    tmp_m[3] += tmp.at<double>(j + 3, i);
-                    tmp_m[4] += tmp.at<double>(j + 4, i);
-                    tmp_m[5] += tmp.at<double>(j + 5, i);
-                    tmp_m[6] += tmp.at<double>(j + 6, i);
-                    tmp_m[7] += tmp.at<double>(j + 7, i);
-                    tmp_m[8] += tmp.at<double>(j + 8, i);
-                    tmp_m[9] += tmp.at<double>(j + 9, i);
-                }
-            }
-
-            mom.m00 = tmp_m[0];
-            mom.m10 = tmp_m[1];
-            mom.m01 = tmp_m[2];
-            mom.m20 = tmp_m[3];
-            mom.m11 = tmp_m[4];
-            mom.m02 = tmp_m[5];
-            mom.m30 = tmp_m[6];
-            mom.m21 = tmp_m[7];
-            mom.m12 = tmp_m[8];
-            mom.m03 = tmp_m[9];
-            icvCompleteMomentState( &mom );
-            return mom;
-        }
-
-        Moments ocl_moments(InputArray _contour) //for contour
-        {
-            CvMoments mom;
-            memset(&mom, 0, sizeof(mom));
-
-            Mat arr = _contour.getMat();
-            CvMat c_array = arr;
-
-            const void* array = &c_array;
-
-            CvSeq* contour = 0;
-            if( CV_IS_SEQ( array ))
-            {
-                contour = (CvSeq*)(array);
-                if( !CV_IS_SEQ_POINT_SET( contour ))
-                    CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" );
-            }
-
-            int type, coi = 0;
-
-            CvMat stub, *mat = (CvMat*)(array);
-            CvContour contourHeader;
-            CvSeqBlock block;
-
-            if( !contour )
-            {
-                mat = cvGetMat( mat, &stub, &coi );
-                type = CV_MAT_TYPE( mat->type );
-
-                if( type == CV_32SC2 || type == CV_32FC2 )
-                {
-                    contour = cvPointSeqFromMat(
-                        CV_SEQ_KIND_CURVE | CV_SEQ_FLAG_CLOSED,
-                        mat, &contourHeader, &block );
-                }
-            }
-
-            CV_Assert(contour);
-
-            icvContourMoments(contour, &mom);
-            return mom;
-        }
-    }
-}
diff --git a/modules/ocl/src/mssegmentation.cpp b/modules/ocl/src/mssegmentation.cpp
deleted file mode 100644
index a569c12..0000000
--- a/modules/ocl/src/mssegmentation.cpp
+++ /dev/null
@@ -1,402 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
-# pragma GCC diagnostic ignored "-Warray-bounds"
-#endif
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-// Auxiliray stuff
-namespace
-{
-
-    //
-    // Declarations
-    //
-
-    class DjSets
-    {
-    public:
-        DjSets(int n);
-        int find(int elem);
-        int merge(int set1, int set2);
-
-        std::vector<int> parent;
-        std::vector<int> rank;
-        std::vector<int> size;
-    private:
-        DjSets(const DjSets &) {}
-        DjSets operator =(const DjSets &);
-    };
-
-    template <typename T>
-    struct GraphEdge
-    {
-        GraphEdge() {}
-        GraphEdge(int to, int next, const T &val) : to(to), next(next), val(val) {}
-        int to;
-        int next;
-        T val;
-    };
-
-
-    template <typename T>
-    class Graph
-    {
-    public:
-        typedef GraphEdge<T> Edge;
-
-        Graph(int numv, int nume_max);
-
-        void addEdge(int from, int to, const T &val = T());
-
-        std::vector<int> start;
-        std::vector<Edge> edges;
-
-        int numv;
-        int nume_max;
-        int nume;
-    private:
-        Graph(const Graph &) {}
-        Graph operator =(const Graph &) {}
-    };
-
-
-    struct SegmLinkVal
-    {
-        SegmLinkVal() {}
-        SegmLinkVal(int dr, int dsp) : dr(dr), dsp(dsp) {}
-        bool operator <(const SegmLinkVal &other) const
-        {
-            return dr + dsp < other.dr + other.dsp;
-        }
-        int dr;
-        int dsp;
-    };
-
-
-    struct SegmLink
-    {
-        SegmLink() {}
-        SegmLink(int from, int to, const SegmLinkVal &val)
-            : from(from), to(to), val(val) {}
-        bool operator <(const SegmLink &other) const
-        {
-            return val < other.val;
-        }
-        int from;
-        int to;
-        SegmLinkVal val;
-    };
-
-    //
-    // Implementation
-    //
-
-    DjSets DjSets::operator = (const DjSets &/*obj*/)
-    {
-        //cout << "Invalid DjSets constructor\n";
-        CV_Error(-1, "Invalid DjSets constructor\n");
-        return *this;
-    }
-
-    DjSets::DjSets(int n) : parent(n), rank(n, 0), size(n, 1)
-    {
-        for (int i = 0; i < n; ++i)
-            parent[i] = i;
-    }
-
-
-    inline int DjSets::find(int elem)
-    {
-        int set = elem;
-        while (set != parent[set])
-            set = parent[set];
-        while (elem != parent[elem])
-        {
-            int next = parent[elem];
-            parent[elem] = set;
-            elem = next;
-        }
-        return set;
-    }
-
-
-    inline int DjSets::merge(int set1, int set2)
-    {
-        if (rank[set1] < rank[set2])
-        {
-            parent[set1] = set2;
-            size[set2] += size[set1];
-            return set2;
-        }
-        if (rank[set2] < rank[set1])
-        {
-            parent[set2] = set1;
-            size[set1] += size[set2];
-            return set1;
-        }
-        parent[set1] = set2;
-        rank[set2]++;
-        size[set2] += size[set1];
-        return set2;
-    }
-
-
-    template <typename T>
-    Graph<T>::Graph(int numv, int nume_max) : start(numv, -1), edges(nume_max)
-    {
-        this->numv = numv;
-        this->nume_max = nume_max;
-        nume = 0;
-    }
-
-
-    template <typename T>
-    inline void Graph<T>::addEdge(int from, int to, const T &val)
-    {
-        edges[nume] = Edge(to, start[from], val);
-        start[from] = nume;
-        nume++;
-    }
-
-
-    inline int pix(int y, int x, int ncols)
-    {
-        return y * ncols + x;
-    }
-
-
-    inline int sqr(int x)
-    {
-        return x * x;
-    }
-
-
-    inline int dist2(const cv::Vec4b &lhs, const cv::Vec4b &rhs)
-    {
-        return sqr(lhs[0] - rhs[0]) + sqr(lhs[1] - rhs[1]) + sqr(lhs[2] - rhs[2]);
-    }
-
-
-    inline int dist2(const cv::Vec2s &lhs, const cv::Vec2s &rhs)
-    {
-        return sqr(lhs[0] - rhs[0]) + sqr(lhs[1] - rhs[1]);
-    }
-
-} // anonymous namespace
-
-namespace cv
-{
-    namespace ocl
-    {
-
-        void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize, TermCriteria criteria)
-        {
-            CV_Assert(src.type() == CV_8UC4);
-            const int nrows = src.rows;
-            const int ncols = src.cols;
-            const int hr = sr;
-            const int hsp = sp;
-
-            // Perform mean shift procedure and obtain region and spatial maps
-            oclMat h_rmap, h_spmap;
-            meanShiftProc(src, h_rmap, h_spmap, sp, sr, criteria);
-            Mat rmap = h_rmap;
-            Mat spmap = h_spmap;
-
-            Graph<SegmLinkVal> g(nrows * ncols, 4 * (nrows - 1) * (ncols - 1)
-                                 + (nrows - 1) + (ncols - 1));
-
-            // Make region adjacent graph from image
-            Vec4b r1;
-            Vec4b r2[4];
-            Vec2s sp1;
-            Vec2s sp2[4];
-            int dr[4];
-            int dsp[4];
-            for (int y = 0; y < nrows - 1; ++y)
-            {
-                Vec4b *ry = rmap.ptr<Vec4b>(y);
-                Vec4b *ryp = rmap.ptr<Vec4b>(y + 1);
-                Vec2s *spy = spmap.ptr<Vec2s>(y);
-                Vec2s *spyp = spmap.ptr<Vec2s>(y + 1);
-                for (int x = 0; x < ncols - 1; ++x)
-                {
-                    r1 = ry[x];
-                    sp1 = spy[x];
-
-                    r2[0] = ry[x + 1];
-                    r2[1] = ryp[x];
-                    r2[2] = ryp[x + 1];
-                    r2[3] = ryp[x];
-
-                    sp2[0] = spy[x + 1];
-                    sp2[1] = spyp[x];
-                    sp2[2] = spyp[x + 1];
-                    sp2[3] = spyp[x];
-
-                    dr[0] = dist2(r1, r2[0]);
-                    dr[1] = dist2(r1, r2[1]);
-                    dr[2] = dist2(r1, r2[2]);
-                    dsp[0] = dist2(sp1, sp2[0]);
-                    dsp[1] = dist2(sp1, sp2[1]);
-                    dsp[2] = dist2(sp1, sp2[2]);
-
-                    r1 = ry[x + 1];
-                    sp1 = spy[x + 1];
-
-                    dr[3] = dist2(r1, r2[3]);
-                    dsp[3] = dist2(sp1, sp2[3]);
-
-                    g.addEdge(pix(y, x, ncols), pix(y, x + 1, ncols), SegmLinkVal(dr[0], dsp[0]));
-                    g.addEdge(pix(y, x, ncols), pix(y + 1, x, ncols), SegmLinkVal(dr[1], dsp[1]));
-                    g.addEdge(pix(y, x, ncols), pix(y + 1, x + 1, ncols), SegmLinkVal(dr[2], dsp[2]));
-                    g.addEdge(pix(y, x + 1, ncols), pix(y + 1, x, ncols), SegmLinkVal(dr[3], dsp[3]));
-                }
-            }
-            for (int y = 0; y < nrows - 1; ++y)
-            {
-                r1 = rmap.at<Vec4b>(y, ncols - 1);
-                r2[0] = rmap.at<Vec4b>(y + 1, ncols - 1);
-                sp1 = spmap.at<Vec2s>(y, ncols - 1);
-                sp2[0] = spmap.at<Vec2s>(y + 1, ncols - 1);
-                dr[0] = dist2(r1, r2[0]);
-                dsp[0] = dist2(sp1, sp2[0]);
-                g.addEdge(pix(y, ncols - 1, ncols), pix(y + 1, ncols - 1, ncols), SegmLinkVal(dr[0], dsp[0]));
-            }
-            for (int x = 0; x < ncols - 1; ++x)
-            {
-                r1 = rmap.at<Vec4b>(nrows - 1, x);
-                r2[0] = rmap.at<Vec4b>(nrows - 1, x + 1);
-                sp1 = spmap.at<Vec2s>(nrows - 1, x);
-                sp2[0] = spmap.at<Vec2s>(nrows - 1, x + 1);
-                dr[0] = dist2(r1, r2[0]);
-                dsp[0] = dist2(sp1, sp2[0]);
-                g.addEdge(pix(nrows - 1, x, ncols), pix(nrows - 1, x + 1, ncols), SegmLinkVal(dr[0], dsp[0]));
-            }
-
-            DjSets comps(g.numv);
-
-            // Find adjacent components
-            for (int v = 0; v < g.numv; ++v)
-            {
-                for (int e_it = g.start[v]; e_it != -1; e_it = g.edges[e_it].next)
-                {
-                    int c1 = comps.find(v);
-                    int c2 = comps.find(g.edges[e_it].to);
-                    if (c1 != c2 && g.edges[e_it].val.dr < hr && g.edges[e_it].val.dsp < hsp)
-                        comps.merge(c1, c2);
-                }
-            }
-
-            std::vector<SegmLink> edges;
-            edges.reserve(g.numv);
-
-            // Prepare edges connecting differnet components
-            for (int v = 0; v < g.numv; ++v)
-            {
-                int c1 = comps.find(v);
-                for (int e_it = g.start[v]; e_it != -1; e_it = g.edges[e_it].next)
-                {
-                    int c2 = comps.find(g.edges[e_it].to);
-                    if (c1 != c2)
-                        edges.push_back(SegmLink(c1, c2, g.edges[e_it].val));
-                }
-            }
-
-            // Sort all graph's edges connecting differnet components (in asceding order)
-            std::sort(edges.begin(), edges.end());
-
-            // Exclude small components (starting from the nearest couple)
-            for (size_t i = 0; i < edges.size(); ++i)
-            {
-                int c1 = comps.find(edges[i].from);
-                int c2 = comps.find(edges[i].to);
-                if (c1 != c2 && (comps.size[c1] < minsize || comps.size[c2] < minsize))
-                    comps.merge(c1, c2);
-            }
-
-            // Compute sum of the pixel's colors which are in the same segment
-            Mat h_src = src;
-            std::vector<Vec4i> sumcols(nrows * ncols, Vec4i(0, 0, 0, 0));
-            for (int y = 0; y < nrows; ++y)
-            {
-                Vec4b *h_srcy = h_src.ptr<Vec4b>(y);
-                for (int x = 0; x < ncols; ++x)
-                {
-                    int parent = comps.find(pix(y, x, ncols));
-                    Vec4b col = h_srcy[x];
-                    Vec4i &sumcol = sumcols[parent];
-                    sumcol[0] += col[0];
-                    sumcol[1] += col[1];
-                    sumcol[2] += col[2];
-                }
-            }
-
-            // Create final image, color of each segment is the average color of its pixels
-            dst.create(src.size(), src.type());
-
-            for (int y = 0; y < nrows; ++y)
-            {
-                Vec4b *dsty = dst.ptr<Vec4b>(y);
-                for (int x = 0; x < ncols; ++x)
-                {
-                    int parent = comps.find(pix(y, x, ncols));
-                    const Vec4i &sumcol = sumcols[parent];
-                    Vec4b &dstcol = dsty[x];
-                    dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]);
-                    dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]);
-                    dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]);
-                }
-            }
-        }
-
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_LUT.cl b/modules/ocl/src/opencl/arithm_LUT.cl
deleted file mode 100644
index 30407bb..0000000
--- a/modules/ocl/src/opencl/arithm_LUT.cl
+++ /dev/null
@@ -1,107 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Rock Li, Rock.li@amd.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void LUT_C1( __global const srcT * src, __global const dstT *lut,
-      __global dstT *dst,
-      int cols1, int rows,
-      int src_offset1,
-      int lut_offset1,
-      int dst_offset1,
-      int src_step1, int dst_step1)
-{
-    int x1 = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x1 < cols1 && y < rows)
-    {
-        int src_index = mad24(y, src_step1, src_offset1 + x1);
-        int dst_index = mad24(y, dst_step1, dst_offset1 + x1);
-
-        dst[dst_index] = lut[lut_offset1 + src[src_index]];
-    }
-}
-
-__kernel void LUT_C2( __global const srcT * src, __global const dstT *lut,
-      __global dstT *dst,
-      int cols1, int rows,
-      int src_offset1,
-      int lut_offset1,
-      int dst_offset1,
-      int src_step1, int dst_step1)
-{
-    int x1 = get_global_id(0) << 1;
-    int y = get_global_id(1);
-
-    if (x1 < cols1 && y < rows)
-    {
-        int src_index = mad24(y, src_step1, src_offset1 + x1);
-        int dst_index = mad24(y, dst_step1, dst_offset1 + x1);
-
-        dst[dst_index    ] =                  lut[lut_offset1 + (src[src_index    ] << 1)    ];
-        dst[dst_index + 1] = x1 + 1 < cols1 ? lut[lut_offset1 + (src[src_index + 1] << 1) + 1] : dst[dst_index + 1];
-    }
-}
-
-__kernel void LUT_C4( __global const srcT * src, __global const dstT *lut,
-      __global dstT *dst,
-      int cols1, int rows,
-      int src_offset1,
-      int lut_offset1,
-      int dst_offset1,
-      int src_step1, int dst_step1)
-{
-    int x1 = get_global_id(0) << 2;
-    int y = get_global_id(1);
-
-    if (x1 < cols1 && y < rows)
-    {
-        int src_index = mad24(y, src_step1, src_offset1 + x1);
-        int dst_index = mad24(y, dst_step1, dst_offset1 + x1);
-
-        dst[dst_index    ] =                  lut[lut_offset1 + (src[src_index    ] << 2)    ];
-        dst[dst_index + 1] = x1 + 1 < cols1 ? lut[lut_offset1 + (src[src_index + 1] << 2) + 1] : dst[dst_index + 1];
-        dst[dst_index + 2] = x1 + 2 < cols1 ? lut[lut_offset1 + (src[src_index + 2] << 2) + 2] : dst[dst_index + 2];
-        dst[dst_index + 3] = x1 + 3 < cols1 ? lut[lut_offset1 + (src[src_index + 3] << 2) + 3] : dst[dst_index + 3];
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
deleted file mode 100644
index e07f314..0000000
--- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+++ /dev/null
@@ -1,107 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#ifdef BINARY
-
-__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset,
-                         __global srcT *src2, int src2_step, int src2_offset,
-                         __global dstT *dst, int dst_step, int dst_offset,
-                         int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-        int dst_index  = mad24(y, dst_step, x + dst_offset);
-#ifdef INTEL_DEVICE //workaround for intel compiler bug
-        if(src1_index >= 0 && src2_index >= 0)
-#endif
-        {
-            dstT t0 = convertToDstT(src1[src1_index]);
-            dstT t1 = convertToDstT(src2[src2_index]);
-            dstT t2 = t0 - t1;
-
-            dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
-        }
-    }
-}
-
-#else
-
-__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset,
-                         __global dstT *dst, int dst_step, int dst_offset,
-                         int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int dst_index  = mad24(y, dst_step, x + dst_offset);
-#ifdef INTEL_DEVICE //workaround for intel compiler bug
-        if(src1_index >= 0)
-#endif
-        {
-            dstT t0 = convertToDstT(src1[src1_index]);
-
-            dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
-        }
-    }
-}
-
-#endif
diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl
deleted file mode 100644
index 04262b8..0000000
--- a/modules/ocl/src/opencl/arithm_add.cl
+++ /dev/null
@@ -1,143 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#if defined (FUNC_ADD)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index]));
-#endif
-
-#if defined (FUNC_SUB)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]));
-#endif
-
-#if defined (FUNC_MUL)
-#if defined (HAVE_SCALAR)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar * convertToWT(src2[src2_index]));
-#else
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index]));
-#endif
-#endif
-
-#if defined (FUNC_DIV)
-#if defined (HAVE_SCALAR)
-#define EXPRESSION T zero = (T)(0); \
-    dst[dst_index] = src2[src2_index] == zero ? zero : \
-    convertToT(convertToWT(src1[src1_index]) * scalar / convertToWT(src2[src2_index]));
-#else
-#define EXPRESSION T zero = (T)(0); \
-    dst[dst_index] = src2[src2_index] == zero ? zero : \
-    convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index]));
-#endif
-#endif
-
-#if defined (FUNC_ABS_DIFF)
-#define EXPRESSION WT value = convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]); \
-    value = value > (WT)(0) ? value : -value; \
-    dst[dst_index] = convertToT(value);
-#endif
-
-#if defined (FUNC_MIN)
-#define EXPRESSION dst[dst_index] = min( src1[src1_index], src2[src2_index] );
-#endif
-
-#if defined (FUNC_MAX)
-#define EXPRESSION dst[dst_index] = max( src1[src1_index], src2[src2_index] );
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////// ADD ////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HAVE_SCALAR
-
-__kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_offset,
-                                   __global T *src2, int src2_step, int src2_offset,
-                                   __global T *dst, int dst_step, int dst_offset,
-                                   int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-        int dst_index  = mad24(y, dst_step, x + dst_offset);
-
-        EXPRESSION
-    }
-}
-
-#else
-
-// add mat with scale
-__kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int src1_offset,
-                                          __global T *src2, int src2_step, int src2_offset,
-                                          WT scalar,
-                                          __global T *dst, int dst_step,  int dst_offset,
-                                          int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-
-        EXPRESSION
-    }
-}
-
-#endif
diff --git a/modules/ocl/src/opencl/arithm_addWeighted.cl b/modules/ocl/src/opencl/arithm_addWeighted.cl
deleted file mode 100644
index 872ee85..0000000
--- a/modules/ocl/src/opencl/arithm_addWeighted.cl
+++ /dev/null
@@ -1,75 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////addWeighted//////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void addWeighted(__global T * src1, int src1_step1, int src1_offset1,
-                              __global T * src2, int src2_step1, int src2_offset1,
-                              __global T * dst, int dst_step1, int dst_offset1,
-                              WT alpha, WT beta, WT gama,
-                              int cols1, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols1 && y < rows)
-    {
-        int src1_index = mad24(y, src1_step1, x + src1_offset1);
-        int src2_index = mad24(y, src2_step1, x + src2_offset1);
-        int dst_index = mad24(y, dst_step1, x + dst_offset1);
-
-        dst[dst_index] = convertToT(src1[src1_index]*alpha + src2[src2_index]*beta + gama);
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_add_mask.cl b/modules/ocl/src/opencl/arithm_add_mask.cl
deleted file mode 100644
index b115d9b..0000000
--- a/modules/ocl/src/opencl/arithm_add_mask.cl
+++ /dev/null
@@ -1,97 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#if defined (FUNC_ADD)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index]));
-#endif
-
-#if defined (FUNC_SUB)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]));
-#endif
-
-#if defined (FUNC_MUL)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index]));
-#endif
-
-#if defined (FUNC_DIV)
-#define EXPRESSION T zero = (T)(0); \
-    dst[dst_index] = src2[src2_index] == zero ? zero : \
-    convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index]));
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////// add with mask //////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_binary_op_mat_mask(__global T * src1, int src1_step, int src1_offset,
-                              __global T * src2, int src2_step, int src2_offset,
-                              __global uchar * mask, int mask_step, int mask_offset,
-                              __global T * dst, int dst_step, int dst_offset,
-                              int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int mask_index = mad24(y, mask_step, x + mask_offset);
-        if (mask[mask_index])
-        {
-            int src1_index = mad24(y, src1_step, x + src1_offset);
-            int src2_index = mad24(y, src2_step, x + src2_offset);
-            int dst_index  = mad24(y, dst_step, dst_offset + x);
-
-            EXPRESSION
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl
deleted file mode 100644
index 05ea48d..0000000
--- a/modules/ocl/src/opencl/arithm_add_scalar.cl
+++ /dev/null
@@ -1,103 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#if defined (FUNC_ADD)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar);
-#endif
-
-#if defined (FUNC_SUB)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar);
-#endif
-
-#if defined (FUNC_MUL)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar);
-#endif
-
-#if defined (FUNC_DIV)
-#define EXPRESSION T zero = (T)(0); \
-    dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar / convertToWT(src1[src1_index]));
-#endif
-
-#if defined (FUNC_ABS)
-#define EXPRESSION \
-    T value = src1[src1_index] > (T)(0) ? src1[src1_index] : -src1[src1_index]; \
-    dst[dst_index] = value;
-#endif
-
-#if defined (FUNC_ABS_DIFF)
-#define EXPRESSION WT value = convertToWT(src1[src1_index]) - scalar; \
-    value = value > (WT)(0) ? value : -value; \
-    dst[dst_index] = convertToT(value);
-#endif
-
-///////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////// Add with scalar /////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_binary_op_scalar (__global T *src1, int src1_step, int src1_offset,
-                                 WT scalar,
-                                 __global T *dst,  int dst_step,  int dst_offset,
-                                 int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-
-        EXPRESSION
-
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
deleted file mode 100644
index a8b9657..0000000
--- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+++ /dev/null
@@ -1,96 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#if defined (FUNC_ADD)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar);
-#endif
-
-#if defined (FUNC_SUB)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar);
-#endif
-
-#if defined (FUNC_MUL)
-#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar);
-#endif
-
-#if defined (FUNC_DIV)
-#define EXPRESSION T zero = (T)(0); \
-    dst[dst_index] = src2[src2_index] == zero ? zero : \
-    convertToT(convertToWT(src1[src1_index]) / scalar[0]);
-#endif
-
-///////////////////////////////////////////////////////////////////////////////////
-//////////////////////////// Add with scalar with mask ////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_binary_op_scalar_mask(__global T *src1, int src1_step, int src1_offset,
-                                     WT scalar,
-                                     __global uchar *mask, int mask_step, int mask_offset,
-                                     __global T *dst,  int dst_step,  int dst_offset,
-                                     int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int mask_index = mad24(y, mask_step, x + mask_offset);
-        if (mask[mask_index])
-        {
-            int src1_index = mad24(y, src1_step, x + src1_offset);
-            int dst_index = mad24(y, dst_step, dst_offset + x);
-
-            EXPRESSION
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary.cl b/modules/ocl/src/opencl/arithm_bitwise_binary.cl
deleted file mode 100644
index 56cd745..0000000
--- a/modules/ocl/src/opencl/arithm_bitwise_binary.cl
+++ /dev/null
@@ -1,82 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jiang Liyuan, jlyuan001.good@163.com
-//    Peng Xiao,    pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////// bitwise_binary //////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_bitwise_binary(__global uchar * src1, int src1_step, int src1_offset,
-                                    __global uchar * src2, int src2_step, int src2_offset,
-                                    __global uchar * dst, int dst_step, int dst_offset,
-                                    int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-#if elemSize > 1
-        x *= elemSize;
-#endif
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-
-#if elemSize > 1
-        #pragma unroll
-        for (int i = 0; i < elemSize; i += vlen)
-        {
-            ucharv t0 = vloadn(0, src1 + src1_index + i);
-            ucharv t1 = vloadn(0, src2 + src2_index + i);
-            ucharv t2 = t0 Operation t1;
-
-            vstoren(t2, 0, dst + dst_index + i);
-        }
-#else
-        dst[dst_index] = src1[src1_index] Operation src2[src2_index];
-#endif
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
deleted file mode 100644
index 328ccd9..0000000
--- a/modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
+++ /dev/null
@@ -1,88 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jiang Liyuan, jlyuan001.good@163.com
-//    Peng Xiao,    pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_bitwise_binary_mask(__global uchar * src1, int src1_step, int src1_offset,
-                                    __global uchar * src2, int src2_step, int src2_offset,
-                                    __global uchar * mask, int mask_step, int mask_offset,
-                                    __global uchar * dst, int dst_step, int dst_offset,
-                                    int cols1, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols1 && y < rows)
-    {
-        int mask_index = mad24(y, mask_step, mask_offset + x);
-
-        if (mask[mask_index])
-        {
-#if elemSize > 1
-                x *= elemSize;
-#endif
-            int src1_index = mad24(y, src1_step, x + src1_offset);
-            int src2_index = mad24(y, src2_step, x + src2_offset);
-            int dst_index = mad24(y, dst_step, x + dst_offset);
-
-#if elemSize > 1
-            #pragma unroll
-            for (int i = 0; i < elemSize; i += vlen)
-            {
-                ucharv t0 = vloadn(0, src1 + src1_index + i);
-                ucharv t1 = vloadn(0, src2 + src2_index + i);
-                ucharv t2 = t0 Operation t1;
-
-                vstoren(t2, 0, dst + dst_index + i);
-            }
-#else
-            dst[dst_index] = src1[src1_index] Operation src2[src2_index];
-#endif
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
deleted file mode 100644
index 434bd5e..0000000
--- a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
+++ /dev/null
@@ -1,82 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jiang Liyuan, jlyuan001.good@163.com
-//    Peng Xiao,    pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////bitwise_binary/////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_bitwise_binary_scalar(
-        __global uchar *src1, int src1_step, int src1_offset,
-        __global uchar *src2,
-        __global uchar *dst, int dst_step, int dst_offset,
-        int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-#if elemSize > 1
-        x *= elemSize;
-#endif
-        int src1_index = mad24(y, src1_step, src1_offset + x);
-        int dst_index  = mad24(y, dst_step, dst_offset + x);
-
-#if elemSize > 1
-        #pragma unroll
-        for (int i = 0; i < elemSize; i += vlen)
-        {
-            ucharv t0 = vloadn(0, src1 + src1_index + i);
-            ucharv t1 = vloadn(0, src2 + i);
-            ucharv t2 = t0 Operation t1;
-
-            vstoren(t2, 0, dst + dst_index + i);
-        }
-#else
-        dst[dst_index] = src1[src1_index] Operation src2[0];
-#endif
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
deleted file mode 100644
index 756f201..0000000
--- a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+++ /dev/null
@@ -1,86 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_bitwise_binary_scalar_mask(__global uchar *src1, int src1_step, int src1_offset,
-        __global uchar *src2,
-        __global uchar *mask, int mask_step, int mask_offset,
-        __global uchar *dst,  int dst_step,  int dst_offset,
-        int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int mask_index = mad24(y, mask_step, x + mask_offset);
-
-        if (mask[mask_index])
-        {
-#if elemSize > 1
-            x *= elemSize;
-#endif
-            int src1_index = mad24(y, src1_step, x + src1_offset);
-            int dst_index = mad24(y, dst_step, x + dst_offset);
-
-#if elemSize > 1
-            #pragma unroll
-            for (int i = 0; i < elemSize; i += vlen)
-            {
-                ucharv t0 = vloadn(0, src1 + src1_index + i);
-                ucharv t1 = vloadn(0, src2 + i);
-                ucharv t2 = t0 Operation t1;
-
-                vstoren(t2, 0, dst + dst_index + i);
-            }
-#else
-            dst[dst_index] = src1[src1_index] Operation src2[0];
-#endif
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_bitwise_not.cl b/modules/ocl/src/opencl/arithm_bitwise_not.cl
deleted file mode 100644
index b6f76d6..0000000
--- a/modules/ocl/src/opencl/arithm_bitwise_not.cl
+++ /dev/null
@@ -1,253 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int src1_offset,
-                                     __global uchar *dst,  int dst_step,  int dst_offset,
-                                     int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        x = x << 2;
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-
-        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index  = mad24(y, dst_step, dst_offset + x);
-
-        uchar4 src1_data = vload4(0, src1 + src1_index);
-        uchar4 dst_data = vload4(0, dst + dst_index);
-        uchar4 tmp_data = ~src1_data;
-
-        dst_data.x = dst_index + 0 < dst_end ? tmp_data.x : dst_data.x;
-        dst_data.y = dst_index + 1 < dst_end ? tmp_data.y : dst_data.y;
-        dst_data.z = dst_index + 2 < dst_end ? tmp_data.z : dst_data.z;
-        dst_data.w = dst_index + 3 < dst_end ? tmp_data.w : dst_data.w;
-
-        vstore4(dst_data, 0, dst + dst_index);
-    }
-}
-
-
-__kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src1_offset,
-                                     __global char *dst,  int dst_step,  int dst_offset,
-                                     int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        x = x << 2;
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-
-        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index  = mad24(y, dst_step, dst_offset + x);
-
-        char4 src1_data = vload4(0, src1 + src1_index);
-        char4 dst_data = vload4(0, dst + dst_index);
-        char4 tmp_data = ~src1_data;
-
-        dst_data.x = dst_index + 0 < dst_end ? tmp_data.x : dst_data.x;
-        dst_data.y = dst_index + 1 < dst_end ? tmp_data.y : dst_data.y;
-        dst_data.z = dst_index + 2 < dst_end ? tmp_data.z : dst_data.z;
-        dst_data.w = dst_index + 3 < dst_end ? tmp_data.w : dst_data.w;
-
-        vstore4(dst_data, 0, dst + dst_index);
-    }
-}
-
-
-__kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int src1_offset,
-                                     __global ushort *dst,  int dst_step,  int dst_offset,
-                                     int rows, int cols, int dst_step1)
-
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
-        int dst_start  = mad24(y, dst_step, dst_offset);
-        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index  = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
-
-        ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
-
-        ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
-        ushort4 tmp_data = ~ src1_data;
-
-        dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
-        dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
-        dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
-        dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
-
-        *((__global ushort4 *)((__global char *)dst + dst_index)) = dst_data;
-    }
-}
-
-
-
-__kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int src1_offset,
-                                     __global short *dst,  int dst_step,  int dst_offset,
-                                     int rows, int cols, int dst_step1)
-
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
-        int dst_start  = mad24(y, dst_step, dst_offset);
-        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index  = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
-
-        short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
-
-        short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-        short4 tmp_data = ~ src1_data;
-
-        dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
-        dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
-        dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
-        dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
-
-        *((__global short4 *)((__global char *)dst + dst_index)) = dst_data;
-    }
-}
-
-
-
-__kernel void arithm_bitwise_not_D4 (__global int *src1, int src1_step, int src1_offset,
-                                     __global int *dst,  int dst_step,  int dst_offset,
-                                     int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 2) + dst_offset);
-
-        int data1 = *((__global int *)((__global char *)src1 + src1_index));
-        int tmp  = ~ data1;
-
-        *((__global int *)((__global char *)dst + dst_index)) = tmp;
-    }
-}
-
-__kernel void arithm_bitwise_not_D5 (__global char *src, int src_step, int src_offset,
-                                     __global char *dst, int dst_step, int dst_offset,
-                                     int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src_index = mad24(y, src_step, (x << 2) + src_offset);
-        int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
-        char4 data;
-
-        data = *((__global char4 *)((__global char *)src + src_index));
-        data = ~ data;
-
-        *((__global char4 *)((__global char *)dst + dst_index)) = data;
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_not_D6 (__global char *src, int src_step, int src_offset,
-                                     __global char *dst, int dst_step, int dst_offset,
-                                     int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src_index = mad24(y, src_step, (x << 3) + src_offset);
-        int dst_index = mad24(y, dst_step,  (x << 3) + dst_offset);
-
-        char8 data;
-
-        data = *((__global char8 *)((__global char *)src + src_index));
-        data = ~ data;
-
-        *((__global char8 *)((__global char *)dst + dst_index)) = data;
-    }
-}
-#endif
diff --git a/modules/ocl/src/opencl/arithm_cartToPolar.cl b/modules/ocl/src/opencl/arithm_cartToPolar.cl
deleted file mode 100644
index c65f899..0000000
--- a/modules/ocl/src/opencl/arithm_cartToPolar.cl
+++ /dev/null
@@ -1,141 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define CV_PI M_PI
-#else
-#define CV_PI M_PI_F
-#endif
-
-__kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset,
-                                     __global float *src2, int src2_step, int src2_offset,
-                                     __global float *dst1, int dst1_step, int dst1_offset, // magnitude
-                                     __global float *dst2, int dst2_step, int dst2_offset, // cartToPolar
-                                     int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-
-        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
-
-        float x = src1[src1_index];
-        float y = src2[src2_index];
-
-        float x2 = x * x;
-        float y2 = y * y;
-
-        float magnitude = sqrt(x2 + y2);
-
-        float tmp = y >= 0 ? 0 : CV_PI*2;
-        tmp = x < 0 ? CV_PI : tmp;
-
-        float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f;
-        float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp :
-                                 tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON);
-
-#ifdef DEGREE
-        cartToPolar *= (180/CV_PI);
-#endif
-
-        dst1[dst1_index] = magnitude;
-        dst2[dst2_index] = cartToPolar;
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-
-__kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset,
-                                     __global double *src2, int src2_step, int src2_offset,
-                                     __global double *dst1, int dst1_step, int dst1_offset,
-                                     __global double *dst2, int dst2_step, int dst2_offset,
-                                     int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-
-        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
-
-        double x = src1[src1_index];
-        double y = src2[src2_index];
-
-        double x2 = x * x;
-        double y2 = y * y;
-
-        double magnitude = sqrt(x2 + y2);
-
-        float tmp = y >= 0 ? 0 : CV_PI*2;
-        tmp = x < 0 ? CV_PI : tmp;
-
-        float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5;
-        double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON)  + tmp :
-                                 tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
-
-#ifdef DEGREE
-        cartToPolar *= (180/CV_PI);
-#endif
-
-        dst1[dst1_index] = magnitude;
-        dst2[dst2_index] = cartToPolar;
-    }
-}
-
-#endif
diff --git a/modules/ocl/src/opencl/arithm_compare.cl b/modules/ocl/src/opencl/arithm_compare.cl
deleted file mode 100644
index 73e6299..0000000
--- a/modules/ocl/src/opencl/arithm_compare.cl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////addWeighted//////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_compare(__global T * src1, int src1_step1, int src1_offset1,
-                              __global T * src2, int src2_step1, int src2_offset1,
-                              __global uchar * dst, int dst_step1, int dst_offset1,
-                              int cols1, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols1 && y < rows)
-    {
-        int src1_index = mad24(y, src1_step1, x + src1_offset1);
-        int src2_index = mad24(y, src2_step1, x + src2_offset1);
-        int dst_index = mad24(y, dst_step1, x + dst_offset1);
-
-        dst[dst_index] = convert_uchar(src1[src1_index] Operation src2[src2_index] ? 255 : 0);
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_exp.cl b/modules/ocl/src/opencl/arithm_exp.cl
deleted file mode 100644
index f0a1893..0000000
--- a/modules/ocl/src/opencl/arithm_exp.cl
+++ /dev/null
@@ -1,111 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Wu Zailong, bullet@yeah.net
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////EXP//////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_exp_C1(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x + dstOffset1);
-
-        dst[dstIdx] = exp(src[srcIdx]);
-    }
-}
-
-__kernel void arithm_exp_C2(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x1 = get_global_id(0) << 1;
-    int y = get_global_id(1);
-
-    if(x1 < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
-
-        dst[dstIdx] =                      exp(src[srcIdx]);
-        dst[dstIdx + 1] = x1 + 1 < cols1 ? exp(src[srcIdx + 1]) : dst[dstIdx + 1];
-    }
-}
-
-__kernel void arithm_exp_C4(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x1 = get_global_id(0) << 2;
-    int y = get_global_id(1);
-
-    if(x1 < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
-
-        dst[dstIdx] =                      exp(src[srcIdx]);
-        dst[dstIdx + 1] = x1 + 1 < cols1 ? exp(src[srcIdx + 1]) : dst[dstIdx + 1];
-        dst[dstIdx + 2] = x1 + 2 < cols1 ? exp(src[srcIdx + 2]) : dst[dstIdx + 2];
-        dst[dstIdx + 3] = x1 + 3 < cols1 ? exp(src[srcIdx + 3]) : dst[dstIdx + 3];
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_flip.cl b/modules/ocl/src/opencl/arithm_flip.cl
deleted file mode 100644
index b9bacd3..0000000
--- a/modules/ocl/src/opencl/arithm_flip.cl
+++ /dev/null
@@ -1,125 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////// flip rows ///////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_flip_rows(__global T * src, int src_step, int src_offset,
-                               __global T * dst, int dst_step, int dst_offset,
-                               int rows, int cols, int thread_rows, int thread_cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < thread_rows)
-    {
-        int src_index_0 = mad24(y,            src_step, x + src_offset);
-        int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset);
-
-        int dst_index_0 = mad24(y,            dst_step, x + dst_offset);
-        int dst_index_1 = mad24(rows - y - 1, dst_step, x + dst_offset);
-
-        T data0 = src[src_index_0], data1 = src[src_index_1];
-
-        dst[dst_index_0] = data1;
-        dst[dst_index_1] = data0;
-    }
-}
-
-__kernel void arithm_flip_rows_cols(__global T * src, int src_step, int src_offset,
-                                    __global T * dst, int dst_step, int dst_offset,
-                                    int rows, int cols, int thread_rows, int thread_cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < thread_rows)
-    {
-        int src_index_0 = mad24(y,            src_step, x            + src_offset);
-        int dst_index_0 = mad24(rows - y - 1, dst_step, cols - x - 1 + dst_offset);
-
-        int src_index_1 = mad24(rows - y - 1, src_step, cols - x - 1 + src_offset);
-        int dst_index_1 = mad24(y,            dst_step, x            + dst_offset);
-
-        T data0 = src[src_index_0], data1 = src[src_index_1];
-
-        dst[dst_index_0] = data0;
-        dst[dst_index_1] = data1;
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////// flip cols ///////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_flip_cols(__global T * src, int src_step, int src_offset,
-                               __global T * dst, int dst_step, int dst_offset,
-                               int rows, int cols, int thread_rows, int thread_cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < thread_cols && y < rows)
-    {
-        int src_index_0 = mad24(y, src_step, x            + src_offset);
-        int dst_index_0 = mad24(y, dst_step, cols - x - 1 + dst_offset);
-
-        int src_index_1 = mad24(y, src_step, cols - x - 1 + src_offset);
-        int dst_index_1 = mad24(y, dst_step, x            + dst_offset);
-
-        T data0 = src[src_index_0], data1 = src[src_index_1];
-        dst[dst_index_1] = data1;
-        dst[dst_index_0] = data0;
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_log.cl b/modules/ocl/src/opencl/arithm_log.cl
deleted file mode 100644
index ba5f32d..0000000
--- a/modules/ocl/src/opencl/arithm_log.cl
+++ /dev/null
@@ -1,111 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Wu Zailong, bullet@yeah.net
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_log_C1(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x + dstOffset1);
-
-        dst[dstIdx] = log(src[srcIdx]);
-    }
-}
-
-__kernel void arithm_log_C2(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x1 = get_global_id(0) << 1;
-    int y = get_global_id(1);
-
-    if(x1 < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
-
-        dst[dstIdx] =                      log(src[srcIdx]);
-        dst[dstIdx + 1] = x1 + 1 < cols1 ? log(src[srcIdx + 1]) : dst[dstIdx + 1];
-    }
-}
-
-__kernel void arithm_log_C4(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x1 = get_global_id(0) << 2;
-    int y = get_global_id(1);
-
-    if(x1 < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
-
-        dst[dstIdx] =                      log(src[srcIdx]);
-        dst[dstIdx + 1] = x1 + 1 < cols1 ? log(src[srcIdx + 1]) : dst[dstIdx + 1];
-        dst[dstIdx + 2] = x1 + 2 < cols1 ? log(src[srcIdx + 2]) : dst[dstIdx + 2];
-        dst[dstIdx + 3] = x1 + 3 < cols1 ? log(src[srcIdx + 3]) : dst[dstIdx + 3];
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_magnitude.cl b/modules/ocl/src/opencl/arithm_magnitude.cl
deleted file mode 100644
index 1053efd..0000000
--- a/modules/ocl/src/opencl/arithm_magnitude.cl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void arithm_magnitude(__global T *src1, int src1_step, int src1_offset,
-                               __global T *src2, int src2_step, int src2_offset,
-                               __global T *dst,  int dst_step,  int dst_offset,
-                               int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-        int dst_index  = mad24(y, dst_step,  x + dst_offset);
-
-        T data1 = src1[src1_index];
-        T data2 = src2[src2_index];
-
-        T tmp = hypot(data1, data2);
-        dst[dst_index] = tmp;
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_minMax.cl b/modules/ocl/src/opencl/arithm_minMax.cl
deleted file mode 100644
index 01db7d0..0000000
--- a/modules/ocl/src/opencl/arithm_minMax.cl
+++ /dev/null
@@ -1,176 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-/**************************************PUBLICFUNC*************************************/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#ifdef DEPTH_5
-#define MIN_VAL (-FLT_MAX)
-#define MAX_VAL FLT_MAX
-#elif defined DEPTH_6
-#define MIN_VAL (-DBL_MAX)
-#define MAX_VAL DBL_MAX
-#endif
-
-/**************************************Array minMax**************************************/
-
-__kernel void arithm_op_minMax(__global const T * src, __global T * dst,
-    int cols, int invalid_cols, int offset, int elemnum, int groupnum)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int id = get_global_id(0);
-
-    int idx = offset + id + (id / cols) * invalid_cols;
-
-    __local T localmem_max[128], localmem_min[128];
-    T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
-
-    for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
-    {
-        idx = offset + id + (id / cols) * invalid_cols;
-        temp = src[idx];
-        minval = min(minval, temp);
-        maxval = max(maxval, temp);
-    }
-
-    if (lid > 127)
-    {
-        localmem_min[lid - 128] = minval;
-        localmem_max[lid - 128] = maxval;
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lid < 128)
-    {
-        localmem_min[lid] = min(minval, localmem_min[lid]);
-        localmem_max[lid] = max(maxval, localmem_max[lid]);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for (int lsize = 64; lsize > 0; lsize >>= 1)
-    {
-        if (lid < lsize)
-        {
-            int lid2 = lsize + lid;
-            localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
-            localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if (lid == 0)
-    {
-        dst[gid] = localmem_min[0];
-        dst[gid + groupnum] = localmem_max[0];
-    }
-}
-
-__kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
-    int cols, int invalid_cols, int offset,
-    int elemnum, int groupnum,
-    const __global uchar * mask, int minvalid_cols, int moffset)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int id = get_global_id(0);
-
-    int idx = offset + id + (id / cols) * invalid_cols;
-    int midx = moffset + id + (id / cols) * minvalid_cols;
-
-    __local T localmem_max[128], localmem_min[128];
-    T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
-
-    for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
-    {
-        idx = offset + id + (id / cols) * invalid_cols;
-        midx = moffset + id + (id / cols) * minvalid_cols;
-
-        if (mask[midx])
-        {
-            temp = src[idx];
-            minval = min(minval, temp);
-            maxval = max(maxval, temp);
-        }
-    }
-
-    if (lid > 127)
-    {
-        localmem_min[lid - 128] = minval;
-        localmem_max[lid - 128] = maxval;
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lid < 128)
-    {
-        localmem_min[lid] = min(minval, localmem_min[lid]);
-        localmem_max[lid] = max(maxval, localmem_max[lid]);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for (int lsize = 64; lsize > 0; lsize >>= 1)
-    {
-        if (lid < lsize)
-        {
-            int lid2 = lsize + lid;
-            localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
-            localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if (lid == 0)
-    {
-        dst[gid] = localmem_min[0];
-        dst[gid + groupnum] = localmem_max[0];
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc.cl b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
deleted file mode 100644
index 1aac6c1..0000000
--- a/modules/ocl/src/opencl/arithm_minMaxLoc.cl
+++ /dev/null
@@ -1,258 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan, yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-/**************************************PUBLICFUNC*************************************/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define RES_TYPE double4
-#define CONVERT_RES_TYPE convert_double4
-#else
-#define RES_TYPE float4
-#define CONVERT_RES_TYPE convert_float4
-#endif
-
-#if defined (DEPTH_0)
-#define VEC_TYPE uchar4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_uchar4
-#define CONDITION_FUNC(a,b,c) (convert_int4(a) ? b : c)
-#define MIN_VAL 0
-#define MAX_VAL 255
-#endif
-#if defined (DEPTH_1)
-#define VEC_TYPE char4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_char4
-#define CONDITION_FUNC(a,b,c) (convert_int4(a) ? b : c)
-#define MIN_VAL -128
-#define MAX_VAL 127
-#endif
-#if defined (DEPTH_2)
-#define VEC_TYPE ushort4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_ushort4
-#define CONDITION_FUNC(a,b,c) (convert_int4(a) ? b : c)
-#define MIN_VAL 0
-#define MAX_VAL 65535
-#endif
-#if defined (DEPTH_3)
-#define VEC_TYPE short4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_short4
-#define CONDITION_FUNC(a,b,c) (convert_int4(a) ? b : c)
-#define MIN_VAL -32768
-#define MAX_VAL 32767
-#endif
-#if defined (DEPTH_4)
-#define VEC_TYPE int4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_int4
-#define CONDITION_FUNC(a,b,c) ((a) ? b : c)
-#define MIN_VAL INT_MIN
-#define MAX_VAL INT_MAX
-#endif
-#if defined (DEPTH_5)
-#define VEC_TYPE float4
-#define VEC_TYPE_LOC float4
-#define CONVERT_TYPE convert_float4
-#define CONDITION_FUNC(a,b,c) ((a) ? b : c)
-#define MIN_VAL (-FLT_MAX)
-#define MAX_VAL FLT_MAX
-#endif
-#if defined (DEPTH_6)
-#define VEC_TYPE double4
-#define VEC_TYPE_LOC double4
-#define CONVERT_TYPE convert_double4
-#define CONDITION_FUNC(a,b,c) ((a) ? b : c)
-#define MIN_VAL (-DBL_MAX)
-#define MAX_VAL DBL_MAX
-#endif
-
-#if defined (REPEAT_S0)
-#define repeat_s(a) a=a;
-#endif
-#if defined (REPEAT_S1)
-#define repeat_s(a) a.s0 = a.s1;
-#endif
-#if defined (REPEAT_S2)
-#define repeat_s(a) a.s0 = a.s2;a.s1 = a.s2;
-#endif
-#if defined (REPEAT_S3)
-#define repeat_s(a) a.s0 = a.s3;a.s1 = a.s3;a.s2 = a.s3;
-#endif
-
-
-#if defined (REPEAT_E0)
-#define repeat_e(a) a=a;
-#endif
-#if defined (REPEAT_E1)
-#define repeat_e(a) a.s3 = a.s2;
-#endif
-#if defined (REPEAT_E2)
-#define repeat_e(a) a.s3 = a.s1;a.s2 = a.s1;
-#endif
-#if defined (REPEAT_E3)
-#define repeat_e(a) a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0;
-#endif
-
-/**************************************Array minMax**************************************/
-
-__kernel void arithm_op_minMaxLoc(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
-                                  __global VEC_TYPE *src, __global RES_TYPE *dst)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int  id = get_global_id(0);
-    int idx = offset + id + (id / cols) * invalid_cols;
-
-    __local VEC_TYPE localmem_max[128], localmem_min[128];
-    VEC_TYPE minval, maxval, temp;
-
-    __local VEC_TYPE_LOC localmem_maxloc[128], localmem_minloc[128];
-    VEC_TYPE_LOC minloc, maxloc, temploc, negative = -1;
-
-    int idx_c;
-
-    if (id < elemnum)
-    {
-        temp = src[idx];
-        idx_c = idx << 2;
-        temploc = (VEC_TYPE_LOC)(idx_c, idx_c + 1, idx_c + 2, idx_c + 3);
-
-        if (id % cols == 0 )
-        {
-            repeat_s(temp);
-            repeat_s(temploc);
-        }
-        if (id % cols == cols - 1)
-        {
-            repeat_e(temp);
-            repeat_e(temploc);
-        }
-        minval = temp;
-        maxval = temp;
-        minloc = temploc;
-        maxloc = temploc;
-    }
-    else
-    {
-        minval = MAX_VAL;
-        maxval = MIN_VAL;
-        minloc = negative;
-        maxloc = negative;
-    }
-
-    int grainSize = (groupnum << 8);
-    for (id = id + grainSize; id < elemnum; id = id + grainSize)
-    {
-        idx = offset + id + (id / cols) * invalid_cols;
-        temp = src[idx];
-        idx_c = idx << 2;
-        temploc = (VEC_TYPE_LOC)(idx_c, idx_c+1, idx_c+2, idx_c+3);
-
-        if (id % cols == 0 )
-        {
-            repeat_s(temp);
-            repeat_s(temploc);
-        }
-        if (id % cols == cols - 1)
-        {
-            repeat_e(temp);
-            repeat_e(temploc);
-        }
-
-        minval = min(minval, temp);
-        maxval = max(maxval, temp);
-        minloc = CONDITION_FUNC(minval == temp, temploc, minloc);
-        maxloc = CONDITION_FUNC(maxval == temp, temploc, maxloc);
-    }
-
-    if (lid > 127)
-    {
-        localmem_min[lid - 128] = minval;
-        localmem_max[lid - 128] = maxval;
-        localmem_minloc[lid - 128] = minloc;
-        localmem_maxloc[lid - 128] = maxloc;
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lid < 128)
-    {
-        localmem_min[lid] = min(minval,localmem_min[lid]);
-        localmem_max[lid] = max(maxval,localmem_max[lid]);
-        VEC_TYPE minVal = localmem_min[lid], maxVal = localmem_max[lid];
-        localmem_minloc[lid] = CONDITION_FUNC(minVal == minval, minloc, localmem_minloc[lid]);
-        localmem_maxloc[lid] = CONDITION_FUNC(maxVal == maxval, maxloc, localmem_maxloc[lid]);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for (int lsize = 64; lsize > 0; lsize >>= 1)
-    {
-       if (lid < lsize)
-       {
-            int lid2 = lsize + lid;
-            localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
-            localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
-            VEC_TYPE min1 = localmem_min[lid], min2 = localmem_min[lid2];
-            localmem_minloc[lid] = CONDITION_FUNC(min1 == min2, localmem_minloc[lid2], localmem_minloc[lid]);
-            VEC_TYPE max1 = localmem_max[lid], max2 = localmem_max[lid2];
-            localmem_maxloc[lid] = CONDITION_FUNC(max1 == max2, localmem_maxloc[lid2], localmem_maxloc[lid]);
-       }
-       barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if ( lid == 0)
-    {
-        dst[gid] = CONVERT_RES_TYPE(localmem_min[0]);
-        dst[gid + groupnum] = CONVERT_RES_TYPE(localmem_max[0]);
-        dst[gid + 2 * groupnum] = CONVERT_RES_TYPE(localmem_minloc[0]);
-        dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(localmem_maxloc[0]);
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
deleted file mode 100644
index 84b26ca..0000000
--- a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+++ /dev/null
@@ -1,256 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan, yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-/**************************************PUBLICFUNC*************************************/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define RES_TYPE double4
-#define CONVERT_RES_TYPE convert_double4
-#else
-#define RES_TYPE float4
-#define CONVERT_RES_TYPE convert_float4
-#endif
-
-#if defined (DEPTH_0)
-#define TYPE uchar
-#define VEC_TYPE uchar4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_uchar4
-#define CONDITION_FUNC(a,b,c) (convert_int4(a) ? b : c)
-#define MIN_VAL 0
-#define MAX_VAL 255
-#endif
-#if defined (DEPTH_1)
-#define TYPE char
-#define VEC_TYPE char4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_char4
-#define CONDITION_FUNC(a,b,c) (convert_int4(a) ? b : c)
-#define MIN_VAL -128
-#define MAX_VAL 127
-#endif
-#if defined (DEPTH_2)
-#define TYPE ushort
-#define VEC_TYPE ushort4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_ushort4
-#define CONDITION_FUNC(a,b,c) (convert_int4(a) ? b : c)
-#define MIN_VAL 0
-#define MAX_VAL 65535
-#endif
-#if defined (DEPTH_3)
-#define TYPE short
-#define VEC_TYPE short4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_short4
-#define CONDITION_FUNC(a,b,c) (convert_int4(a) ? b : c)
-#define MIN_VAL -32768
-#define MAX_VAL 32767
-#endif
-#if defined (DEPTH_4)
-#define TYPE int
-#define VEC_TYPE int4
-#define VEC_TYPE_LOC int4
-#define CONVERT_TYPE convert_int4
-#define CONDITION_FUNC(a,b,c) ((a) ? b : c)
-#define MIN_VAL INT_MIN
-#define MAX_VAL INT_MAX
-#endif
-#if defined (DEPTH_5)
-#define TYPE float
-#define VEC_TYPE float4
-#define VEC_TYPE_LOC float4
-#define CONVERT_TYPE convert_float4
-#define CONDITION_FUNC(a,b,c) ((a) ? b : c)
-#define MIN_VAL (-FLT_MAX)
-#define MAX_VAL FLT_MAX
-#endif
-#if defined (DEPTH_6)
-#define TYPE double
-#define VEC_TYPE double4
-#define VEC_TYPE_LOC double4
-#define CONVERT_TYPE convert_double4
-#define CONDITION_FUNC(a,b,c) ((a) ? b : c)
-#define MIN_VAL (-DBL_MAX)
-#define MAX_VAL DBL_MAX
-#endif
-
-#if defined (REPEAT_E0)
-#define repeat_e(a) a=a;
-#endif
-#if defined (REPEAT_E1)
-#define repeat_e(a) a.s3 = a.s2;
-#endif
-#if defined (REPEAT_E2)
-#define repeat_e(a) a.s3 = a.s1;a.s2 = a.s1;
-#endif
-#if defined (REPEAT_E3)
-#define repeat_e(a) a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0;
-#endif
-
-#if defined (REPEAT_E0)
-#define repeat_me(a) a = a;
-#endif
-#if defined (REPEAT_E1)
-#define repeat_me(a) a.s3 = 0;
-#endif
-#if defined (REPEAT_E2)
-#define repeat_me(a) a.s3 = 0;a.s2 = 0;
-#endif
-#if defined (REPEAT_E3)
-#define repeat_me(a) a.s3 = 0;a.s2 = 0;a.s1 = 0;
-#endif
-
-/**************************************Array minMaxLoc mask**************************************/
-__kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int elemnum,int groupnum,__global TYPE *src,
-                                        int minvalid_cols,int moffset,__global uchar *mask,__global RES_TYPE  *dst)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int  id = get_global_id(0);
-    int idx = id + (id / cols) * invalid_cols;
-    int midx = id + (id / cols) * minvalid_cols;
-
-    __local VEC_TYPE lm_max[128],lm_min[128];
-    VEC_TYPE minval, maxval, temp, m_temp, zeroVal = (VEC_TYPE)(0);
-    __local VEC_TYPE_LOC lm_maxloc[128], lm_minloc[128];
-    VEC_TYPE_LOC minloc, maxloc, temploc, negative = -1, one = 1, zero = 0;
-
-    if(id < elemnum)
-    {
-        temp = vload4(idx, &src[offset]);
-        m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
-        int idx_c = (idx << 2) + offset;
-        temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
-        if (id % cols == cols - 1)
-        {
-            repeat_me(m_temp);
-            repeat_e(temploc);
-        }
-        minval = m_temp != zeroVal ? temp : (VEC_TYPE)MAX_VAL;
-        maxval = m_temp != zeroVal ? temp : (VEC_TYPE)MIN_VAL;
-        minloc = CONDITION_FUNC(m_temp != zeroVal, temploc , negative);
-        maxloc = minloc;
-    }
-    else
-    {
-        minval = MAX_VAL;
-        maxval = MIN_VAL;
-        minloc = negative;
-        maxloc = negative;
-    }
-
-    for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
-    {
-        idx = id + (id / cols) * invalid_cols;
-        midx = id + (id / cols) * minvalid_cols;
-        temp = vload4(idx, &src[offset]);
-        m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
-        int idx_c = (idx << 2) + offset;
-        temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
-        if (id % cols == cols - 1)
-        {
-            repeat_me(m_temp);
-            repeat_e(temploc);
-        }
-        minval = min(minval, m_temp != zeroVal ? temp : minval);
-        maxval = max(maxval, m_temp != zeroVal ? temp : maxval);
-
-        minloc = CONDITION_FUNC(minval == temp && m_temp != zeroVal, temploc , minloc);
-        maxloc = CONDITION_FUNC(maxval == temp && m_temp != zeroVal, temploc , maxloc);
-    }
-
-    if(lid > 127)
-    {
-        lm_min[lid - 128] = minval;
-        lm_max[lid - 128] = maxval;
-        lm_minloc[lid - 128] = minloc;
-        lm_maxloc[lid - 128] = maxloc;
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(lid < 128)
-    {
-        lm_min[lid] = min(minval, lm_min[lid]);
-        lm_max[lid] = max(maxval, lm_max[lid]);
-        VEC_TYPE con_min = CONVERT_TYPE(minloc != negative ? one : zero);
-        VEC_TYPE con_max = CONVERT_TYPE(maxloc != negative ? one : zero);
-        VEC_TYPE lmMinVal = lm_min[lid], lmMaxVal = lm_max[lid];
-        lm_minloc[lid] = CONDITION_FUNC(lmMinVal == minval && con_min != zeroVal, minloc , lm_minloc[lid]);
-        lm_maxloc[lid] = CONDITION_FUNC(lmMaxVal == maxval && con_max != zeroVal, maxloc , lm_maxloc[lid]);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for(int lsize = 64; lsize > 0; lsize >>= 1)
-    {
-        if(lid < lsize)
-        {
-            int lid2 = lsize + lid;
-            lm_min[lid] = min(lm_min[lid], lm_min[lid2]);
-            lm_max[lid] = max(lm_max[lid], lm_max[lid2]);
-            VEC_TYPE con_min = CONVERT_TYPE(lm_minloc[lid2] != negative ? one : zero);
-            VEC_TYPE con_max = CONVERT_TYPE(lm_maxloc[lid2] != negative ? one : zero);
-
-            VEC_TYPE lmMinVal1 = lm_min[lid], lmMinVal2 = lm_min[lid2];
-            VEC_TYPE lmMaxVal1 = lm_max[lid], lmMaxVal2 = lm_max[lid2];
-            lm_minloc[lid] = CONDITION_FUNC(lmMinVal1 == lmMinVal2 && con_min != zeroVal, lm_minloc[lid2] , lm_minloc[lid]);
-            lm_maxloc[lid] = CONDITION_FUNC(lmMaxVal1 == lmMaxVal2 && con_max != zeroVal, lm_maxloc[lid2] , lm_maxloc[lid]);
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if( lid == 0)
-    {
-        dst[gid] = CONVERT_RES_TYPE(lm_min[0]);
-        dst[gid + groupnum] = CONVERT_RES_TYPE(lm_max[0]);
-        dst[gid + 2 * groupnum] = CONVERT_RES_TYPE(lm_minloc[0]);
-        dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(lm_maxloc[0]);
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_nonzero.cl b/modules/ocl/src/opencl/arithm_nonzero.cl
deleted file mode 100644
index 3180c26..0000000
--- a/modules/ocl/src/opencl/arithm_nonzero.cl
+++ /dev/null
@@ -1,93 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-/**************************************Count NonZero**************************************/
-
-__kernel void arithm_op_nonzero(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
-                                  __global srcT *src, __global dstT *dst)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int  id = get_global_id(0);
-
-    int idx = offset + id + (id / cols) * invalid_cols;
-    __local dstT localmem_nonzero[128];
-    dstT nonzero = (dstT)(0);
-    srcT zero = (srcT)(0), one = (srcT)(1);
-
-    for (int grain = groupnum << 8; id < elemnum; id += grain)
-    {
-        idx = offset + id + (id / cols) * invalid_cols;
-        nonzero += src[idx] == zero ? zero : one;
-    }
-
-    if (lid > 127)
-        localmem_nonzero[lid - 128] = nonzero;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lid < 128)
-        localmem_nonzero[lid] = nonzero + localmem_nonzero[lid];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for (int lsize = 64; lsize > 0; lsize >>= 1)
-    {
-        if (lid < lsize)
-        {
-           int lid2 = lsize + lid;
-           localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if (lid == 0)
-        dst[gid] = localmem_nonzero[0];
-}
diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl
deleted file mode 100644
index 40346b2..0000000
--- a/modules/ocl/src/opencl/arithm_phase.cl
+++ /dev/null
@@ -1,171 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define CV_PI M_PI
-#define CV_2PI (2 * CV_PI)
-#else
-#define CV_PI M_PI_F
-#define CV_2PI (2 * CV_PI)
-#endif
-
-/**************************************phase inradians**************************************/
-
-__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1,
-                                         __global float *src2, int src2_step1, int src2_offset1,
-                                         __global float *dst,  int dst_step1,  int dst_offset1,
-                                         int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step1, x + src1_offset1);
-        int src2_index = mad24(y, src2_step1, x + src2_offset1);
-        int dst_index  = mad24(y, dst_step1, x + dst_offset1);
-
-        float data1 = src1[src1_index];
-        float data2 = src2[src2_index];
-        float tmp = atan2(data2, data1);
-
-        if (tmp < 0)
-            tmp += CV_2PI;
-
-        dst[dst_index] = tmp;
-    }
-}
-
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1,
-                                         __global double *src2, int src2_step1, int src2_offset1,
-                                         __global double *dst,  int dst_step1,  int dst_offset1,
-                                         int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step1, x + src1_offset1);
-        int src2_index = mad24(y, src2_step1, x + src2_offset1);
-        int dst_index  = mad24(y, dst_step1, x + dst_offset1);
-
-        double data1 = src1[src1_index];
-        double data2 = src2[src2_index];
-        double tmp = atan2(data2, data1);
-
-        if (tmp < 0)
-            tmp += CV_2PI;
-
-        dst[dst_index] = tmp;
-    }
-}
-
-#endif
-
-/**************************************phase indegrees**************************************/
-
-__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1,
-                                         __global float *src2, int src2_step1, int src2_offset1,
-                                         __global float *dst,  int dst_step1,  int dst_offset1,
-                                         int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step1, x + src1_offset1);
-        int src2_index = mad24(y, src2_step1, x + src2_offset1);
-        int dst_index  = mad24(y, dst_step1, x + dst_offset1);
-
-        float data1 = src1[src1_index];
-        float data2 = src2[src2_index];
-        float tmp = atan2(data2, data1);
-        tmp = 180 * tmp / CV_PI;
-
-        if (tmp < 0)
-            tmp += 360;
-
-        dst[dst_index] = tmp;
-    }
-}
-
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1,
-                                         __global double *src2, int src2_step1, int src2_offset1,
-                                         __global double *dst,  int dst_step1,  int dst_offset1,
-                                         int cols, int rows)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step1, x + src1_offset1);
-        int src2_index = mad24(y, src2_step1, x + src2_offset1);
-        int dst_index  = mad24(y, dst_step1, x + dst_offset1);
-
-        double data1 = src1[src1_index];
-        double data2 = src2[src2_index];
-        double tmp = atan2(data2, data1);
-
-        tmp = 180 * tmp / CV_PI;
-        if (tmp < 0)
-            tmp += 360;
-
-        dst[dst_index] = tmp;
-    }
-}
-#endif
diff --git a/modules/ocl/src/opencl/arithm_polarToCart.cl b/modules/ocl/src/opencl/arithm_polarToCart.cl
deleted file mode 100644
index 024f1f0..0000000
--- a/modules/ocl/src/opencl/arithm_polarToCart.cl
+++ /dev/null
@@ -1,197 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define CV_PI M_PI
-#else
-#define CV_PI M_PI_F
-#endif
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////polarToCart with magnitude//////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_polarToCart_mag_D5 (__global float *src1, int src1_step, int src1_offset,//magnitue
-                                         __global float *src2, int src2_step, int src2_offset,//angle
-                                         __global float *dst1, int dst1_step, int dst1_offset,
-                                         __global float *dst2, int dst2_step, int dst2_offset,
-                                         int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-
-        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
-
-        float x = src1[src1_index];
-        float y = src2[src2_index];
-
-#ifdef DEGREE
-        float ascale = CV_PI/180.0f;
-        float alpha = y * ascale;
-#else
-        float alpha = y;
-#endif
-        float a = cos(alpha) * x;
-        float b = sin(alpha) * x;
-
-        dst1[dst1_index] = a;
-        dst2[dst2_index] = b;
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_polarToCart_mag_D6 (__global double *src1, int src1_step, int src1_offset,//magnitue
-                                         __global double *src2, int src2_step, int src2_offset,//angle
-                                         __global double *dst1, int dst1_step, int dst1_offset,
-                                         __global double *dst2, int dst2_step, int dst2_offset,
-                                         int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, x + src1_offset);
-        int src2_index = mad24(y, src2_step, x + src2_offset);
-
-        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
-
-        double x = src1[src1_index];
-        double y = src2[src2_index];
-
-#ifdef DEGREE
-        float ascale = CV_PI/180.0;
-        float alpha = y * ascale;
-#else
-        float alpha = y;
-#endif
-        double a = cos(alpha) * x;
-        double b = sin(alpha) * x;
-
-        dst1[dst1_index] = a;
-        dst2[dst2_index] = b;
-    }
-}
-#endif
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////polarToCart without magnitude//////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_polarToCart_D5 (__global float *src,  int src_step,  int src_offset,//angle
-                                     __global float *dst1, int dst1_step, int dst1_offset,
-                                     __global float *dst2, int dst2_step, int dst2_offset,
-                                     int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src_index  = mad24(y, src_step,  x + src_offset);
-
-        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
-
-        float y = src[src_index];
-
-#ifdef DEGREE
-        float ascale = CV_PI/180.0f;
-        float alpha = y * ascale;
-#else
-        float alpha = y;
-#endif
-        float a = cos(alpha);
-        float b = sin(alpha);
-
-        dst1[dst1_index] = a;
-        dst2[dst2_index] = b;
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_polarToCart_D6 (__global float *src,  int src_step,  int src_offset,//angle
-                                     __global float *dst1, int dst1_step, int dst1_offset,
-                                     __global float *dst2, int dst2_step, int dst2_offset,
-                                     int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src_index  = mad24(y, src_step,  x + src_offset);
-
-        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
-
-        double y = src[src_index];
-
-#ifdef DEGREE
-        float ascale = CV_PI/180.0f;
-        float alpha = y * ascale;
-#else
-        float alpha = y;
-#endif
-        double a = cos(alpha);
-        double b = sin(alpha);
-
-        dst1[dst1_index] = a;
-        dst2[dst2_index] = b;
-    }
-}
-#endif
diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl
deleted file mode 100644
index 385e4cc..0000000
--- a/modules/ocl/src/opencl/arithm_pow.cl
+++ /dev/null
@@ -1,73 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-/************************************** pow **************************************/
-
-__kernel void arithm_pow(__global VT * src, int src_step, int src_offset,
-                         __global VT * dst, int dst_step, int dst_offset,
-                         int rows, int cols, T p)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src_index = mad24(y, src_step, x + src_offset);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-
-        VT src_data = src[src_index];
-        VT tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
-
-        dst[dst_index] = tmp;
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_setidentity.cl b/modules/ocl/src/opencl/arithm_setidentity.cl
deleted file mode 100644
index 0ead5b0..0000000
--- a/modules/ocl/src/opencl/arithm_setidentity.cl
+++ /dev/null
@@ -1,69 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void setIdentity(__global T * src, int src_step, int src_offset,
-    int cols, int rows, __global const T * scalar)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src_index = mad24(y, src_step, src_offset + x);
-
-        if (x == y)
-            src[src_index] = *scalar;
-        else
-            src[src_index] = 0;
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_sqrt.cl b/modules/ocl/src/opencl/arithm_sqrt.cl
deleted file mode 100644
index 142a52a..0000000
--- a/modules/ocl/src/opencl/arithm_sqrt.cl
+++ /dev/null
@@ -1,111 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peter Andreas Entschev, peter@entschev.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void arithm_sqrt_C1(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x + dstOffset1);
-
-        dst[dstIdx] = sqrt(src[srcIdx]);
-    }
-}
-
-__kernel void arithm_sqrt_C2(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x1 = get_global_id(0) << 1;
-    int y = get_global_id(1);
-
-    if(x1 < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
-
-        dst[dstIdx] =                      sqrt(src[srcIdx]);
-        dst[dstIdx + 1] = x1 + 1 < cols1 ? sqrt(src[srcIdx + 1]) : dst[dstIdx + 1];
-    }
-}
-
-__kernel void arithm_sqrt_C4(__global srcT *src, __global srcT *dst,
-    int cols1, int rows,
-    int srcOffset1, int dstOffset1,
-    int srcStep1, int dstStep1)
-{
-    int x1 = get_global_id(0) << 2;
-    int y = get_global_id(1);
-
-    if(x1 < cols1 && y < rows)
-    {
-        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
-        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
-
-        dst[dstIdx] =                      sqrt(src[srcIdx]);
-        dst[dstIdx + 1] = x1 + 1 < cols1 ? sqrt(src[srcIdx + 1]) : dst[dstIdx + 1];
-        dst[dstIdx + 2] = x1 + 2 < cols1 ? sqrt(src[srcIdx + 2]) : dst[dstIdx + 2];
-        dst[dstIdx + 3] = x1 + 3 < cols1 ? sqrt(src[srcIdx + 3]) : dst[dstIdx + 3];
-    }
-}
diff --git a/modules/ocl/src/opencl/arithm_sum.cl b/modules/ocl/src/opencl/arithm_sum.cl
deleted file mode 100644
index 514cf2a..0000000
--- a/modules/ocl/src/opencl/arithm_sum.cl
+++ /dev/null
@@ -1,104 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#if FUNC_SUM
-#define FUNC(a, b) b += a;
-#elif FUNC_ABS_SUM
-#define FUNC(a, b) b += a >= (dstT)(0) ? a : -a;
-#elif FUNC_SQR_SUM
-#define FUNC(a, b) b += a * a;
-#else
-#error No sum function
-#endif
-
-/**************************************Array buffer SUM**************************************/
-
-__kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
-                                __global srcT *src, __global dstT *dst)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int id = get_global_id(0);
-    int idx = offset + id + (id / cols) * invalid_cols;
-
-    __local dstT localmem_sum[128];
-    dstT sum = (dstT)(0), temp;
-
-    for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
-    {
-        idx = offset + id + (id / cols) * invalid_cols;
-        temp = convertToDstT(src[idx]);
-        FUNC(temp, sum);
-    }
-
-    if (lid > 127)
-        localmem_sum[lid - 128] = sum;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lid < 128)
-        localmem_sum[lid] = sum + localmem_sum[lid];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for (int lsize = 64; lsize > 0; lsize >>= 1)
-    {
-        if (lid < lsize)
-        {
-            int lid2 = lsize + lid;
-            localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if (lid == 0)
-        dst[gid] = localmem_sum[0];
-}
diff --git a/modules/ocl/src/opencl/arithm_transpose.cl b/modules/ocl/src/opencl/arithm_transpose.cl
deleted file mode 100644
index 8cde654..0000000
--- a/modules/ocl/src/opencl/arithm_transpose.cl
+++ /dev/null
@@ -1,139 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#define TILE_DIM      32
-#define BLOCK_ROWS    8
-#define LDS_STEP      TILE_DIM
-
-__kernel void transpose(__global const T* src, __global T* dst,
-    int src_cols, int src_rows,
-    int src_step, int dst_step,
-    int src_offset, int dst_offset)
-{
-    int gp_x = get_group_id(0),   gp_y = get_group_id(1);
-    int gs_x = get_num_groups(0), gs_y = get_num_groups(1);
-
-    int groupId_x, groupId_y;
-
-    if(src_rows == src_cols)
-    {
-        groupId_y = gp_x;
-        groupId_x = (gp_x + gp_y) % gs_x;
-    }
-    else
-    {
-        int bid = gp_x + gs_x * gp_y;
-        groupId_y =  bid % gs_y;
-        groupId_x = ((bid / gs_y) + groupId_y) % gs_x;
-    }
-
-    int lx = get_local_id(0);
-    int ly = get_local_id(1);
-
-    int x = groupId_x * TILE_DIM + lx;
-    int y = groupId_y * TILE_DIM + ly;
-
-    int x_index = groupId_y * TILE_DIM + lx;
-    int y_index = groupId_x * TILE_DIM + ly;
-
-    __local T title[TILE_DIM * LDS_STEP];
-
-    if (x < src_cols && y < src_rows)
-    {
-        int index_src = mad24(y, src_step, x);
-
-        for(int i = 0; i < TILE_DIM; i += BLOCK_ROWS)
-        {
-            if (y + i < src_rows)
-            {
-                title[(ly + i) * LDS_STEP + lx] = src[src_offset + index_src];
-                index_src = mad24(BLOCK_ROWS, src_step, index_src);
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (x_index < src_rows && y_index < src_cols)
-    {
-        int index_dst = mad24(y_index, dst_step, x_index);
-
-        for(int i = 0; i < TILE_DIM; i += BLOCK_ROWS)
-        {
-            if ((y_index + i) < src_cols)
-            {
-                dst[dst_offset + index_dst] = title[lx * LDS_STEP + ly + i];
-                index_dst +=  dst_step * BLOCK_ROWS;
-            }
-        }
-    }
-}
-
-__kernel void transpose_inplace(__global T* src, __global T* dst,
-    int src_cols, int src_rows,
-    int src_step, int dst_step,
-    int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < src_rows && x < y)
-    {
-        int srcIdx = mad24(y, src_step, src_offset + x);
-        int dstIdx = mad24(x, dst_step, dst_offset + y);
-
-        T tmp = dst[dstIdx];
-        dst[dstIdx] = src[srcIdx];
-        src[srcIdx] = tmp;
-    }
-}
diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
deleted file mode 100644
index 6a95316..0000000
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ /dev/null
@@ -1,540 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if defined (CN1)
-#define T_FRAME uchar
-#define T_MEAN_VAR float
-#define CONVERT_TYPE convert_uchar_sat
-#define F_ZERO (0.0f)
-inline float cvt(uchar val)
-{
-    return val;
-}
-
-inline float sqr(float val)
-{
-    return val * val;
-}
-
-inline float sum(float val)
-{
-    return val;
-}
-
-inline float clamp1(float var, float learningRate, float diff, float minVar)
-{
-    return fmax(var + learningRate * (diff * diff - var), minVar);
-}
-
-#else
-
-#define T_FRAME uchar4
-#define T_MEAN_VAR float4
-#define CONVERT_TYPE convert_uchar4_sat
-#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
-
-inline float4 cvt(const uchar4 val)
-{
-    float4 result;
-    result.x = val.x;
-    result.y = val.y;
-    result.z = val.z;
-    result.w = val.w;
-
-    return result;
-}
-
-inline float sqr(const float4 val)
-{
-    return val.x * val.x + val.y * val.y + val.z * val.z;
-}
-
-inline float sum(const float4 val)
-{
-    return (val.x + val.y + val.z);
-}
-
-inline void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
-{
-    float4 val = ptr[(k * rows + y) * ptr_step + x];
-    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
-    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
-}
-
-
-inline float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
-{
-    float4 result;
-    result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
-    result.y = fmax(var.y + learningRate * (diff.y * diff.y - var.y), minVar);
-    result.z = fmax(var.z + learningRate * (diff.z * diff.z - var.z), minVar);
-    result.w = 0.0f;
-    return result;
-}
-
-#endif
-
-typedef struct
-{
-    float c_Tb;
-    float c_TB;
-    float c_Tg;
-    float c_varInit;
-    float c_varMin;
-    float c_varMax;
-    float c_tau;
-    uchar c_shadowVal;
-} con_srtuct_t;
-
-inline void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
-{
-    float val = ptr[(k * rows + y) * ptr_step + x];
-    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
-    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
-}
-
-__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
-    __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
-    int frame_row, int frame_col, int frame_step, int fgmask_step,
-    int weight_step, int mean_step, int var_step,
-    float varThreshold, float backgroundRatio, int fgmask_offset_x,
-    int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < frame_col && y < frame_row)
-    {
-        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
-
-        int kHit = -1;
-        int kForeground = -1;
-
-        for (int k = 0; k < (NMIXTURES); ++k)
-        {
-            if (weight[(k * frame_row + y) * weight_step + x] < 1.192092896e-07f)
-                break;
-
-            T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
-            T_MEAN_VAR _var = var[(k * frame_row + y) + var_step + x];
-
-            T_MEAN_VAR diff = pix - mu;
-
-            if (sqr(diff) < varThreshold * sum(_var))
-            {
-                kHit = k;
-                break;
-            }
-        }
-
-        if (kHit >= 0)
-        {
-            float wsum = 0.0f;
-            for (int k = 0; k < (NMIXTURES); ++k)
-            {
-                wsum += weight[(k * frame_row + y) * weight_step + x];
-
-                if (wsum > backgroundRatio)
-                {
-                    kForeground = k + 1;
-                    break;
-                }
-            }
-        }
-        if(kHit < 0 || kHit >= kForeground)
-            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (-1);
-        else
-            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (0);
-    }
-}
-
-__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask,
-    __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean,
-    __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step,
-    int weight_step, int sortKey_step, int mean_step, int var_step,
-    float varThreshold, float backgroundRatio, float learningRate, float minVar,
-    int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
-{
-    const float w0 = 0.05f;
-    const float sk0 = w0 / 30.0f;
-    const float var0 = 900.f;
-
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x >= frame_col || y >= frame_row) return;
-    float wsum = 0.0f;
-    int kHit = -1;
-    int kForeground = -1;
-    int k = 0;
-
-    T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
-
-    for (; k < (NMIXTURES); ++k)
-    {
-        float w = weight[(k * frame_row + y) * weight_step + x];
-        wsum += w;
-
-        if (w < 1.192092896e-07f)
-            break;
-
-        T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
-        T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x];
-
-        float sortKey_prev, weight_prev;
-        T_MEAN_VAR mean_prev, var_prev;
-        if (sqr(pix - mu) < varThreshold * sum(_var))
-        {
-            wsum -= w;
-            float dw = learningRate * (1.0f - w);
-
-            _var = clamp1(_var, learningRate, pix - mu, minVar);
-
-            sortKey_prev = w / sqr(sum(_var));
-            sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
-
-            weight_prev = w + dw;
-            weight[(k * frame_row + y) * weight_step + x] = weight_prev;
-
-            mean_prev = mu + learningRate * (pix - mu);
-            mean[(k * frame_row + y) * mean_step + x] = mean_prev;
-
-            var_prev = _var;
-            var[(k * frame_row + y) * var_step + x] = var_prev;
-        }
-
-        int k1 = k - 1;
-
-        if (k1 >= 0 && sqr(pix - mu) < varThreshold * sum(_var))
-        {
-            float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
-            float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
-            T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
-            T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
-
-            for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
-            {
-                sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev;
-                sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next;
-
-                weight[(k1 * frame_row + y) * weight_step + x] = weight_prev;
-                weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next;
-
-                mean[(k1 * frame_row + y) * mean_step + x] = mean_prev;
-                mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next;
-
-                var[(k1 * frame_row + y) * var_step + x] = var_prev;
-                var[((k1 + 1) * frame_row + y) * var_step + x] = var_next;
-
-                sortKey_prev = sortKey_next;
-                sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f;
-
-                weight_prev = weight_next;
-                weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f;
-
-                mean_prev = mean_next;
-                mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO;
-
-                var_prev = var_next;
-                var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO;
-            }
-        }
-
-        kHit = k1 + 1;
-        break;
-    }
-
-    if (kHit < 0)
-    {
-        kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1);
-        wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
-
-        weight[(k * frame_row + y) * weight_step + x] = w0;
-        mean[(k * frame_row + y) * mean_step + x] = pix;
-#if defined (CN1)
-        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
-#else
-        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
-#endif
-        sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
-    }
-    else
-    {
-        for( ; k < (NMIXTURES); k++)
-            wsum += weight[(k * frame_row + y) * weight_step + x];
-    }
-
-    float wscale = 1.0f / wsum;
-    wsum = 0;
-    for (k = 0; k < (NMIXTURES); ++k)
-    {
-        float w = weight[(k * frame_row + y) * weight_step + x];
-        w *= wscale;
-        wsum += w;
-
-        weight[(k * frame_row + y) * weight_step + x] = w;
-        sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale;
-
-        kForeground = select(kForeground, k + 1, wsum > backgroundRatio && kForeground < 0);
-    }
-    fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-(kHit >= kForeground));
-}
-
-
-__kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst,
-    int dst_row, int dst_col, int weight_step, int mean_step, int dst_step,
-    float backgroundRatio)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < dst_col && y < dst_row)
-    {
-        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
-        float totalWeight = 0.0f;
-
-        for (int mode = 0; mode < (NMIXTURES); ++mode)
-        {
-            float _weight = weight[(mode * dst_row + y) * weight_step + x];
-
-            T_MEAN_VAR _mean = mean[(mode * dst_row + y) * mean_step + x];
-            meanVal = meanVal + _weight * _mean;
-
-            totalWeight += _weight;
-
-            if(totalWeight > backgroundRatio)
-                break;
-        }
-        meanVal = meanVal * (1.f / totalWeight);
-        dst[y * dst_step + x] = CONVERT_TYPE(meanVal);
-    }
-}
-
-__kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
-        __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step,
-        int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune,
-        int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < frame_col && y < frame_row)
-    {
-        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + x + frame_offset_x]);
-
-        bool background = false; // true - the pixel classified as background
-
-        bool fitsPDF = false; //if it remains zero a new GMM mode will be added
-
-        int nmodes = modesUsed[y * modesUsed_step + x];
-        int nNewModes = nmodes; //current number of modes in GMM
-
-        float totalWeight = 0.0f;
-
-        for (int mode = 0; mode < nmodes; ++mode)
-        {
-            float _weight = alpha1 * weight[(mode * frame_row + y) * weight_step + x] + prune;
-
-            if (!fitsPDF)
-            {
-                float var = variance[(mode * frame_row + y) * var_step + x];
-
-                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
-
-                T_MEAN_VAR diff = _mean - pix;
-                float dist2 = sqr(diff);
-
-                if (totalWeight < constants -> c_TB && dist2 < constants -> c_Tb * var)
-                    background = true;
-
-                if (dist2 < constants -> c_Tg * var)
-                {
-                    fitsPDF = true;
-                    _weight += alphaT;
-                    float k = alphaT / _weight;
-                    mean[(mode * frame_row + y) * mean_step + x] = _mean - k * diff;
-                    float varnew = var + k * (dist2 - var);
-                    varnew = fmax(varnew, constants -> c_varMin);
-                    varnew = fmin(varnew, constants -> c_varMax);
-
-                    variance[(mode * frame_row + y) * var_step + x] = varnew;
-                    for (int i = mode; i > 0; --i)
-                    {
-                        if (_weight < weight[((i - 1) * frame_row + y) * weight_step + x])
-                            break;
-                        swap(weight, x, y, i - 1, frame_row, weight_step);
-                        swap(variance, x, y, i - 1, frame_row, var_step);
-                        #if defined (CN1)
-                        swap(mean, x, y, i - 1, frame_row, mean_step);
-                        #else
-                        swap4(mean, x, y, i - 1, frame_row, mean_step);
-                        #endif
-                    }
-                }
-            } // !fitsPDF
-
-            if (_weight < -prune)
-            {
-                _weight = 0.0f;
-                nmodes--;
-            }
-
-            weight[(mode * frame_row + y) * weight_step + x] = _weight; //update weight by the calculated value
-            totalWeight += _weight;
-        }
-
-        totalWeight = 1.f / totalWeight;
-        for (int mode = 0; mode < nmodes; ++mode)
-            weight[(mode * frame_row + y) * weight_step + x] *= totalWeight;
-
-        nmodes = nNewModes;
-
-        if (!fitsPDF)
-        {
-            int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
-
-            if (nmodes == 1)
-                weight[(mode * frame_row + y) * weight_step + x] = 1.f;
-            else
-            {
-                weight[(mode * frame_row + y) * weight_step + x] = alphaT;
-
-                for (int i = 0; i < nmodes - 1; ++i)
-                    weight[(i * frame_row + y) * weight_step + x] *= alpha1;
-            }
-
-            mean[(mode * frame_row + y) * mean_step + x] = pix;
-            variance[(mode * frame_row + y) * var_step + x] = constants -> c_varInit;
-
-            for (int i = nmodes - 1; i > 0; --i)
-            {
-                // check one up
-                if (alphaT < weight[((i - 1) * frame_row + y) * weight_step + x])
-                    break;
-
-                swap(weight, x, y, i - 1, frame_row, weight_step);
-                swap(variance, x, y, i - 1, frame_row, var_step);
-                #if defined (CN1)
-                swap(mean, x, y, i - 1, frame_row, mean_step);
-                #else
-                swap4(mean, x, y, i - 1, frame_row, mean_step);
-                #endif
-            }
-        }
-
-        modesUsed[y * modesUsed_step + x] = nmodes;
-
-        bool isShadow = false;
-        if (detectShadows_flag && !background)
-        {
-            float tWeight = 0.0f;
-
-            for (int mode = 0; mode < nmodes; ++mode)
-            {
-                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
-
-                T_MEAN_VAR pix_mean = pix * _mean;
-
-                float numerator = sum(pix_mean);
-                float denominator = sqr(_mean);
-
-                if (denominator == 0)
-                    break;
-
-                if (numerator <= denominator && numerator >= constants -> c_tau * denominator)
-                {
-                    float a = numerator / denominator;
-
-                    T_MEAN_VAR dD = a * _mean - pix;
-
-                    if (sqr(dD) < constants -> c_Tb * variance[(mode * frame_row + y) * var_step + x] * a * a)
-                    {
-                        isShadow = true;
-                        break;
-                    }
-                }
-
-                tWeight += weight[(mode * frame_row + y) * weight_step + x];
-                if (tWeight > constants -> c_TB)
-                    break;
-            }
-        }
-
-        fgmask[(y + fgmask_offset_y) * fgmask_step + x + fgmask_offset_x] = background ? 0 : isShadow ? constants -> c_shadowVal : 255;
-    }
-}
-
-__kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
-    __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step,
-    int mean_step, int dst_step, int dst_x, int dst_y)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < modesUsed_col && y < modesUsed_row)
-    {
-        int nmodes = modesUsed[y * modesUsed_step + x];
-
-        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
-
-        float totalWeight = 0.0f;
-
-        for (int mode = 0; mode < nmodes; ++mode)
-        {
-            float _weight = weight[(mode * modesUsed_row + y) * weight_step + x];
-
-            T_MEAN_VAR _mean = mean[(mode * modesUsed_row + y) * mean_step + x];
-            meanVal = meanVal + _weight * _mean;
-
-            totalWeight += _weight;
-
-            if(totalWeight > c_TB)
-                break;
-        }
-
-        meanVal = meanVal * (1.f / totalWeight);
-        dst[(y + dst_y) * dst_step + x + dst_x] = CONVERT_TYPE(meanVal);
-    }
-}
diff --git a/modules/ocl/src/opencl/blend_linear.cl b/modules/ocl/src/opencl/blend_linear.cl
deleted file mode 100644
index bc7aa46..0000000
--- a/modules/ocl/src/opencl/blend_linear.cl
+++ /dev/null
@@ -1,78 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, MulticoreWare Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Liu Liujun, liujun@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void blendLinear(__global const T * src1, int src1_offset, int src1_step,
-                          __global const T * src2, int src2_offset, int src2_step,
-                          __global const float * weight1, int weight1_offset, int weight1_step,
-                          __global const float * weight2, int weight2_offset, int weight2_step,
-                          __global T * dst, int dst_offset, int dst_step,
-                          int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, src1_offset + x);
-        int src2_index = mad24(y, src2_step, src2_offset + x);
-        int weight1_index = mad24(y, weight1_step, weight1_offset + x);
-        int weight2_index = mad24(y, weight2_step, weight2_offset + x);
-        int dst_index = mad24(y, dst_step, dst_offset + x);
-
-        FT w1 = (FT)(weight1[weight1_index]), w2 = (FT)(weight2[weight2_index]);
-        FT den = w1 + w2 + (FT)(1e-5f);
-        FT num = w1 * convertToFT(src1[src1_index]) + w2 * convertToFT(src2[src2_index]);
-
-        dst[dst_index] = convertToT(num / den);
-    }
-}
diff --git a/modules/ocl/src/opencl/brief.cl b/modules/ocl/src/opencl/brief.cl
deleted file mode 100644
index 343e95b..0000000
--- a/modules/ocl/src/opencl/brief.cl
+++ /dev/null
@@ -1,173 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Matthias Bady, aegirxx ==> gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef BYTES
- #define BYTES 16
-#endif
-
-#ifndef KERNEL_SIZE
- #define KERNEL_SIZE 9
-#endif
-
-#ifndef BORDER
- #define BORDER 0
-#endif
-
-#define HALF_KERNEL (KERNEL_SIZE/2)
-
-__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
-
-__constant char tests[32 * BYTES] =
-{
--2,-1,7,-1,-14,-1,-3,3,1,-2,11,2,1,6,-10,-7,13,2,-1,0,-14,5,5,-3,-2,8,2,4,-11,8,-15,5,
--6,-23,8,-9,-12,6,-10,8,-3,-1,8,1,3,6,5,6,-7,-6,5,-5,22,-2,-11,-8,14,7,8,5,-1,14,-5,-14,
--14,9,2,0,7,-3,22,6,-6,6,-8,-5,-5,9,7,-1,-3,-7,-10,-18,4,-5,0,11,2,3,9,10,-10,3,4,9,
-0,12,-3,19,1,15,-11,-5,14,-1,7,8,7,-23,-5,5,0,-6,-10,17,13,-4,-3,-4,-12,1,-12,2,0,8,3,22,
--13,13,3,-1,-16,17,6,10,7,15,-5,0,2,-12,19,-2,3,-6,-4,-15,8,3,0,14,4,-11,5,5,11,-7,7,1,
-6,12,21,3,-3,2,14,1,5,1,-5,11,3,-17,-6,2,6,8,5,-10,-14,-2,0,4,5,-7,-6,5,10,4,4,-7,
-22,0,7,-18,-1,-3,0,18,-4,22,-5,3,1,-7,2,-3,19,-20,17,-2,3,-10,-8,24,-5,-14,7,5,-2,12,-4,-15,
-4,12,0,-19,20,13,3,5,-8,-12,5,0,-5,6,-7,-11,6,-11,-3,-22,15,4,10,1,-7,-4,15,-6,5,10,0,24,
-3,6,22,-2,-13,14,4,-4,-13,8,-18,-22,-1,-1,-7,3,-19,-12,4,3,8,10,13,-2,-6,-1,-6,-5,2,-21,-3,2,
-4,-7,0,16,-6,-5,-12,-1,1,-1,9,18,-7,10,-11,6,4,3,19,-7,-18,5,-4,5,4,0,-20,4,7,-11,18,12,
--20,17,-18,7,2,15,19,-11,-18,6,-7,3,-4,1,-14,13,17,3,2,-8,-7,2,1,6,17,-9,-2,8,-8,-6,-1,12,
--2,4,-1,6,-2,7,6,8,-8,-1,-7,-9,8,-9,15,0,0,22,-4,-15,-14,-1,3,-2,-7,-4,17,-7,-8,-2,9,-4,
-5,-7,7,7,-5,13,-8,11,11,-4,0,8,5,-11,-9,-6,2,-6,3,-20,-6,2,6,10,-6,-6,-15,7,-6,-3,2,1,
-11,0,-3,2,7,-12,14,5,0,-7,-1,-1,-16,0,6,8,22,11,0,-3,19,0,5,-17,-23,-14,-13,-19,-8,10,-11,-2,
--11,6,-10,13,1,-7,14,0,-12,1,-5,-5,4,7,8,-1,-1,-5,15,2,-3,-1,7,-10,3,-6,10,-18,-7,-13,-13,10,
-1,-1,13,-10,-19,14,8,-14,-4,-13,7,1,1,-2,12,-7,3,-5,1,-5,-2,-2,8,-10,2,14,8,7,3,9,8,2
-#if BYTES > 16
-,-9,1,-18,0,4,0,1,12,0,9,-14,-10,-13,-9,-2,6,1,5,10,10,-3,-6,-16,-5,11,6,-5,0,-23,10,1,2,
-13,-5,-3,9,-4,-1,-13,-5,10,13,-11,8,19,20,-9,2,4,-8,0,-9,-14,10,15,19,-14,-12,-10,-3,-23,-3,17,-2,
--3,-11,6,-14,19,-2,-4,2,-5,5,3,-13,2,-2,-5,4,17,4,17,-11,-7,-2,1,23,8,13,1,-16,-13,-5,1,-17,
-4,6,-8,-3,-5,-9,-2,-10,-9,0,-7,-2,5,0,5,2,-4,-16,6,3,2,-15,-2,12,4,-1,6,2,1,1,-2,-8,
--2,12,-5,-2,-8,8,-9,9,2,-10,3,1,-4,10,-9,4,6,12,2,5,-3,-8,0,5,-13,1,-7,2,-1,-10,7,-18,
--1,8,-9,-10,-23,-1,6,2,-5,-3,3,2,0,11,-4,-7,15,2,-10,-3,-20,-8,-13,3,-19,-12,5,-11,-17,-13,-3,2,
-7,4,-12,0,5,-1,-14,-6,-4,11,0,-4,3,10,7,-3,13,21,-11,6,-12,24,-7,-4,4,16,3,-14,-3,5,-7,-12,
-0,-4,7,-5,-17,-9,13,-7,22,-6,-11,5,2,-8,23,-11,7,-10,-1,14,-3,-10,8,3,-13,1,-6,0,-7,-21,6,-14,
-18,19,-4,-6,10,7,-1,-4,-1,21,1,-5,-10,6,-11,-2,18,-3,-1,7,-3,-9,-5,10,-13,14,17,-3,11,-19,-1,-18,
-8,-2,-18,-23,0,-5,-2,-9,-4,-11,2,-8,14,6,-3,-6,-3,0,-15,0,-9,4,-15,-9,-1,11,3,11,-10,-16,-7,7,
--2,-10,-10,-2,-5,-3,5,-23,13,-8,-15,-11,-15,11,6,-6,-16,-3,-2,2,6,12,-16,24,-10,0,8,11,-7,7,-19,-7,
-5,16,9,-3,9,7,-7,-16,3,2,-10,9,21,1,8,7,7,0,1,17,-8,12,9,6,11,-7,-8,-6,19,0,9,3,
-1,-7,-5,-11,0,8,-2,14,12,-2,-15,-6,4,12,0,-21,17,-4,-6,-7,-10,-9,-14,-7,-15,-10,-15,-14,-7,-5,5,-12,
--4,0,15,-4,5,2,-6,-23,-4,-21,-6,4,-10,5,-15,6,4,-3,-1,5,-4,19,-23,-4,-4,17,13,-11,1,12,4,-14,
--11,-6,-20,10,4,5,3,20,-8,-20,3,1,-19,9,9,-3,18,15,11,-4,12,16,8,7,-14,-8,-3,9,-6,0,2,-4,
-1,-10,-1,2,8,-7,-6,18,9,12,-7,-23,8,-6,5,2,-9,6,-12,-7,-1,-2,-7,2,9,9,7,15,6,2,-6,6
-#endif
-#if BYTES > 32
-,16,12,0,19,4,3,6,0,-2,-1,2,17,8,1,3,1,-12,-1,-11,0,-11,2,7,9,-1,3,-19,4,-1,-11,-1,3,
-1,-10,-10,-4,-2,3,6,11,3,7,-9,-8,24,-14,-2,-10,-3,-3,-18,-6,-13,-10,-7,-1,2,-7,9,-6,2,-4,6,-13,
-4,-4,-2,3,-4,2,9,13,-11,5,-6,-11,4,-2,11,-9,-19,0,-23,-5,-5,-7,-3,-6,-6,-4,12,14,12,-11,-8,-16,
--21,15,-12,6,-2,-1,-8,16,6,-1,-8,-2,1,-1,-9,8,3,-4,-2,-2,-7,0,4,-8,11,-11,-12,2,2,3,11,7,
--7,-4,-9,-6,3,-7,-5,0,3,-7,-10,-5,-3,-1,8,-10,0,8,5,1,9,0,1,16,8,4,-11,-3,-15,9,8,17,
-0,2,-9,17,-6,-11,-10,-3,1,1,15,-8,-12,-13,-2,4,-6,4,-6,-10,5,-7,7,-5,10,6,8,9,-5,7,-18,-3,
--6,3,5,4,-10,-13,-5,-3,-11,2,-16,0,7,-21,-5,-13,-14,-14,-4,-4,4,9,7,-3,4,11,10,-4,6,17,9,17,
--10,8,0,-11,-6,-16,-6,8,-13,5,10,-5,3,2,12,16,13,-8,0,-6,10,0,4,-11,8,5,10,-2,11,-7,-13,3,
-2,4,-7,-3,-14,-2,-11,16,11,-6,7,6,-3,15,8,-10,-3,8,12,-12,-13,6,-14,7,-11,-5,-8,-6,7,-6,6,3,
--4,10,5,1,9,16,10,13,-17,10,2,8,-5,1,4,-4,-14,8,-5,2,4,-9,-6,-3,3,-7,-10,0,-2,-8,-10,4,
--8,5,-9,24,2,-8,8,-9,-4,17,-5,2,14,0,-9,9,11,15,-6,5,-8,1,-3,4,9,-21,10,2,2,-1,4,11,
-24,3,2,-2,-8,17,-14,-10,6,5,-13,7,11,10,0,-1,4,6,-10,6,-12,-2,5,6,3,-1,8,-15,1,-4,-7,11,
-1,11,5,0,6,-12,10,1,-3,-2,-1,4,-2,-11,-1,12,7,-8,-20,-18,2,0,-9,2,-13,-1,-16,2,3,-1,-5,-17,
-15,8,3,-14,-13,-12,6,15,2,-8,2,6,6,22,-3,-23,-2,-7,-6,0,13,-10,-6,6,6,7,-10,12,-6,7,-2,11,
-0,-22,-2,-17,-4,-1,-11,-14,-2,-8,7,12,12,-5,7,-13,2,-2,-7,6,0,8,-3,23,6,12,13,-11,-21,-10,10,8,
--3,0,7,15,7,-6,-5,-12,-21,-10,12,-11,-5,-11,8,-11,5,0,-11,-1,8,-9,7,-1,11,-23,21,-5,0,-5,-8,6,
--6,8,8,12,-7,5,3,-2,-5,-20,-12,9,-6,12,-11,3,4,5,13,11,2,12,13,-12,-4,-13,4,7,0,15,-3,-16,
--3,2,-2,14,4,-14,16,-11,-13,3,23,10,9,-19,2,5,5,3,14,-7,19,-13,-11,15,14,0,-2,-5,11,-4,0,-6,
--2,5,-13,-8,-11,-15,-7,-17,1,3,-10,-8,-13,-10,7,-12,0,-13,23,-6,2,-17,-7,-3,1,3,4,-10,13,4,14,-6,
--19,-2,-1,5,9,-8,10,-5,7,-1,5,7,9,-10,19,0,7,5,-4,-7,-11,1,-1,-11,2,-1,-4,11,-1,7,2,-2,
-1,-20,-9,-6,-4,-18,8,-18,-16,-2,7,-6,-3,-6,-1,-4,0,-16,24,-5,-4,-2,-1,9,-8,2,-6,15,11,4,0,-3,
-7,6,2,-10,-7,-9,12,-6,24,15,-8,-1,15,-9,-3,-15,17,-5,11,-10,-2,13,-15,4,-2,-1,4,-23,-16,3,-7,-14,
--3,-5,-10,-9,-5,3,-2,-1,-1,4,1,8,12,9,9,-14,-9,17,-3,0,5,4,13,-6,-1,-8,19,10,8,-5,-15,2,
--12,-9,-4,-5,12,0,24,4,8,-2,14,4,8,-4,-7,16,5,-1,-8,-4,-2,18,-5,17,8,-2,-9,-2,3,-7,1,-6,
--5,-22,-5,-2,-8,-10,14,1,-3,-13,3,9,-4,-1,-1,0,-7,-21,12,-19,-8,8,24,8,12,-6,-2,3,-5,-11,-22,-4,
--3,5,-4,4,-16,24,7,-9,-10,23,-9,18,1,12,17,21,24,-6,-3,-11,-7,17,1,-6,4,4,2,-7,14,6,-12,3,
--6,0,-16,13,-10,5,7,12,5,2,6,-3,7,0,-23,1,15,-5,1,14,-3,-1,6,6,6,-9,-9,12,4,-2,-4,7,
--4,-5,4,4,-13,0,6,-10,2,-12,-6,-3,16,0,-3,3,5,-14,6,11,5,11,0,-13,7,5,-1,-5,12,4,6,10,
--10,4,-1,-11,4,10,-14,5,11,-14,-13,0,2,8,12,24,-1,3,-1,2,9,-14,-23,3,-8,-6,0,9,-15,14,10,-10,
--10,-6,-7,-5,11,5,-3,-15,1,0,1,8,-11,-6,-4,-18,9,0,22,-4,-5,-1,-9,4,-20,2,1,6,1,2,-9,-12,
-5,15,4,-6,19,4,4,11,17,-4,-8,-1,-8,-12,7,-3,11,9,8,1,9,22,-15,15,-7,-7,1,-23,-5,13,-8,2,
-3,-5,11,-11,3,-18,14,-5,-20,7,-10,-23,-2,-5,6,0,-17,-13,-3,2,-6,-1,14,-2,-12,-16,15,6,-12,-2,3,-19
-#endif
-};
-
-inline int smoothedSum(__read_only image2d_t sum, const int2 kpPos, const int2 pt)
-{
-    return ( read_imagei( sum, sampler, kpPos + pt + (int2)(  HALF_KERNEL + 1,  HALF_KERNEL + 1 ))
-           - read_imagei( sum, sampler, kpPos + pt + (int2)( -HALF_KERNEL,      HALF_KERNEL + 1 ))
-           - read_imagei( sum, sampler, kpPos + pt + (int2)(  HALF_KERNEL + 1, -HALF_KERNEL ))
-           + read_imagei( sum, sampler, kpPos + pt + (int2)( -HALF_KERNEL,     -HALF_KERNEL ))).x;
-}
-
-__kernel void extractBriefDescriptors(
-    __read_only image2d_t sumImg,
-    __global float* keypoints, int kpRowStep,
-    __global uchar* descriptors, int dscRowStep,
-    __global uchar* mask)
-{
-    const int  byte  = get_local_id(0);
-    const int  kpId  = get_group_id(0);
-
-    if( !mask[kpId])
-    {
-        return;
-    }
-    const float2 kpPos = (float2)(keypoints[kpId], keypoints[kpRowStep/4 + kpId]);
-    if( kpPos.x < BORDER
-     || kpPos.y < BORDER
-     || kpPos.x >= (get_image_width(  sumImg ) - BORDER)
-     || kpPos.y >= (get_image_height( sumImg ) - BORDER) )
-    {
-        if( byte == 0) mask[kpId] = 0;
-        return;
-    }
-    uchar descByte = 0;
-    const int2 pt = (int2)( kpPos.x + 0.5f, kpPos.y + 0.5f );
-    for(int i = 0; i<8; ++i)
-    {
-        descByte |= (
-            smoothedSum(sumImg, pt, (int2)( tests[byte * 32 + (i * 4) + 1], tests[byte * 32 + (i * 4) + 0] ))
-          < smoothedSum(sumImg, pt, (int2)( tests[byte * 32 + (i * 4) + 3], tests[byte * 32 + (i * 4) + 2] ))
-          ) << (7-i);
-    }
-    descriptors[kpId * dscRowStep + byte] = descByte;
-    if( byte == 0) mask[kpId] = 1;
-}
diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl
deleted file mode 100644
index 5447370..0000000
--- a/modules/ocl/src/opencl/brute_force_match.cl
+++ /dev/null
@@ -1,789 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Nathan, liujun@multicorewareinc.com
-//    Peng Xiao, pengxiao@outlook.com
-//    Baichuan Su, baichuan@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
-#define MAX_FLOAT 3.40282e+038f
-
-#ifndef T
-#define T float
-#endif
-
-#ifndef BLOCK_SIZE
-#define BLOCK_SIZE 16
-#endif
-#ifndef MAX_DESC_LEN
-#define MAX_DESC_LEN 64
-#endif
-
-#ifndef DIST_TYPE
-#define DIST_TYPE 0
-#endif
-
-// dirty fix for non-template support
-#if   (DIST_TYPE == 0) // L1Dist
-#   ifdef T_FLOAT
-#       define DIST(x, y) fabs((x) - (y))
-        typedef float value_type;
-        typedef float result_type;
-#   else
-#       define DIST(x, y) abs((x) - (y))
-        typedef int value_type;
-        typedef int result_type;
-#   endif
-#define DIST_RES(x) (x)
-#elif (DIST_TYPE == 1) // L2Dist
-#define DIST(x, y) (((x) - (y)) * ((x) - (y)))
-typedef float value_type;
-typedef float result_type;
-#define DIST_RES(x) sqrt(x)
-#elif (DIST_TYPE == 2) // Hamming
-//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-inline int bit1Count(int v)
-{
-    v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
-    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
-}
-#define DIST(x, y) bit1Count( (x) ^ (y) )
-typedef int value_type;
-typedef int result_type;
-#define DIST_RES(x) (x)
-#endif
-
-inline result_type reduce_block(
-    __local value_type *s_query,
-    __local value_type *s_train,
-    int lidx,
-    int lidy
-    )
-{
-    result_type result = 0;
-    #pragma unroll
-    for (int j = 0 ; j < BLOCK_SIZE ; j++)
-    {
-        result += DIST(
-            s_query[lidy * BLOCK_SIZE + j],
-            s_train[j * BLOCK_SIZE + lidx]);
-    }
-    return DIST_RES(result);
-}
-
-inline result_type reduce_block_match(
-    __local value_type *s_query,
-    __local value_type *s_train,
-    int lidx,
-    int lidy
-    )
-{
-    result_type result = 0;
-    #pragma unroll
-    for (int j = 0 ; j < BLOCK_SIZE ; j++)
-    {
-        result += DIST(
-            s_query[lidy * BLOCK_SIZE + j],
-            s_train[j * BLOCK_SIZE + lidx]);
-    }
-    return (result);
-}
-
-inline result_type reduce_multi_block(
-    __local value_type *s_query,
-    __local value_type *s_train,
-    int block_index,
-    int lidx,
-    int lidy
-    )
-{
-    result_type result = 0;
-    #pragma unroll
-    for (int j = 0 ; j < BLOCK_SIZE ; j++)
-    {
-        result += DIST(
-            s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j],
-            s_train[j * BLOCK_SIZE + lidx]);
-    }
-    return result;
-}
-
-/* 2dim launch, global size: dim0 is (query rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, dim1 is BLOCK_SIZE
-local size: dim0 is BLOCK_SIZE, dim1 is BLOCK_SIZE.
-*/
-__kernel void BruteForceMatch_UnrollMatch(
-    __global T *query,
-    __global T *train,
-    //__global float *mask,
-    __global int *bestTrainIdx,
-    __global float *bestDistance,
-    __local float *sharebuffer,
-    int query_rows,
-    int query_cols,
-    int train_rows,
-    int train_cols,
-    int step
-)
-{
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-
-    __local value_type *s_query = (__local value_type *)sharebuffer;
-    __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
-
-    int queryIdx = groupidx * BLOCK_SIZE + lidy;
-    // load the query into local memory.
-    #pragma unroll
-    for (int i = 0 ;  i <  MAX_DESC_LEN / BLOCK_SIZE; i ++)
-    {
-        int loadx = lidx + i * BLOCK_SIZE;
-        s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-    }
-
-    float myBestDistance = MAX_FLOAT;
-    int myBestTrainIdx = -1;
-
-    // loopUnrolledCached to find the best trainIdx and best distance.
-    for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; t++)
-    {
-        result_type result = 0;
-        #pragma unroll
-        for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
-        {
-            //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
-            const int loadx = lidx + i * BLOCK_SIZE;
-            s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
-
-            //synchronize to make sure each elem for reduceIteration in share memory is written already.
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        result = DIST_RES(result);
-
-        int trainIdx = t * BLOCK_SIZE + lidx;
-
-        if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
-        {
-            myBestDistance = result;
-            myBestTrainIdx = trainIdx;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    __local float *s_distance = (__local float*)(sharebuffer);
-    __local int* s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
-
-    //find BestMatch
-    s_distance += lidy * BLOCK_SIZE;
-    s_trainIdx += lidy * BLOCK_SIZE;
-    s_distance[lidx] = myBestDistance;
-    s_trainIdx[lidx] = myBestTrainIdx;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //reduce -- now all reduce implement in each threads.
-    #pragma unroll
-    for (int k = 0 ; k < BLOCK_SIZE; k++)
-    {
-        if (myBestDistance > s_distance[k])
-        {
-            myBestDistance = s_distance[k];
-            myBestTrainIdx = s_trainIdx[k];
-        }
-    }
-
-    if (queryIdx < query_rows && lidx == 0)
-    {
-        bestTrainIdx[queryIdx] = myBestTrainIdx;
-        bestDistance[queryIdx] = myBestDistance;
-    }
-}
-
-__kernel void BruteForceMatch_Match(
-    __global T *query,
-    __global T *train,
-    //__global float *mask,
-    __global int *bestTrainIdx,
-    __global float *bestDistance,
-    __local float *sharebuffer,
-    int query_rows,
-    int query_cols,
-    int train_rows,
-    int train_cols,
-    int step
-)
-{
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-
-    const int queryIdx = groupidx * BLOCK_SIZE + lidy;
-
-    float myBestDistance = MAX_FLOAT;
-    int myBestTrainIdx = -1;
-
-    __local value_type *s_query = (__local value_type *)sharebuffer;
-    __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
-
-    // loop
-    for (int t = 0 ;  t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
-    {
-        result_type result = 0;
-        for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; i++)
-        {
-            const int loadx = lidx + i * BLOCK_SIZE;
-            //load query and train into local memory
-            s_query[lidy * BLOCK_SIZE + lidx] = 0;
-            s_train[lidx * BLOCK_SIZE + lidy] = 0;
-
-            if (loadx < query_cols)
-            {
-                s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
-                s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
-            }
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            result += reduce_block_match(s_query, s_train, lidx, lidy);
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        result = DIST_RES(result);
-
-        const int trainIdx = t * BLOCK_SIZE + lidx;
-
-        if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
-        {
-            myBestDistance = result;
-            myBestTrainIdx = trainIdx;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    __local float *s_distance = (__local float *)sharebuffer;
-    __local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
-
-    //findBestMatch
-    s_distance += lidy * BLOCK_SIZE;
-    s_trainIdx += lidy * BLOCK_SIZE;
-    s_distance[lidx] = myBestDistance;
-    s_trainIdx[lidx] = myBestTrainIdx;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //reduce -- now all reduce implement in each threads.
-    for (int k = 0 ; k < BLOCK_SIZE; k++)
-    {
-        if (myBestDistance > s_distance[k])
-        {
-            myBestDistance = s_distance[k];
-            myBestTrainIdx = s_trainIdx[k];
-        }
-    }
-
-    if (queryIdx < query_rows && lidx == 0)
-    {
-        bestTrainIdx[queryIdx] = myBestTrainIdx;
-        bestDistance[queryIdx] = myBestDistance;
-    }
-}
-
-//radius_unrollmatch
-__kernel void BruteForceMatch_RadiusUnrollMatch(
-    __global T *query,
-    __global T *train,
-    float maxDistance,
-    //__global float *mask,
-    __global int *bestTrainIdx,
-    __global float *bestDistance,
-    __global int *nMatches,
-    __local float *sharebuffer,
-    int query_rows,
-    int query_cols,
-    int train_rows,
-    int train_cols,
-    int bestTrainIdx_cols,
-    int step,
-    int ostep
-)
-{
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-    const int groupidy = get_group_id(1);
-
-    const int queryIdx = groupidy * BLOCK_SIZE + lidy;
-    const int trainIdx = groupidx * BLOCK_SIZE + lidx;
-
-    __local value_type *s_query = (__local value_type *)sharebuffer;
-    __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
-
-    result_type result = 0;
-    for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; ++i)
-    {
-        //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
-        const int loadx = lidx + i * BLOCK_SIZE;
-
-        s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-        s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-
-        //synchronize to make sure each elem for reduceIteration in share memory is written already.
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        result += reduce_block(s_query, s_train, lidx, lidy);
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if (queryIdx < query_rows && trainIdx < train_rows &&
-        convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
-    {
-        int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
-
-        if(ind < bestTrainIdx_cols)
-        {
-            bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
-            bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
-        }
-    }
-}
-
-//radius_match
-__kernel void BruteForceMatch_RadiusMatch(
-    __global T *query,
-    __global T *train,
-    float maxDistance,
-    //__global float *mask,
-    __global int *bestTrainIdx,
-    __global float *bestDistance,
-    __global int *nMatches,
-    __local float *sharebuffer,
-    int query_rows,
-    int query_cols,
-    int train_rows,
-    int train_cols,
-    int bestTrainIdx_cols,
-    int step,
-    int ostep
-)
-{
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-    const int groupidy = get_group_id(1);
-
-    const int queryIdx = groupidy * BLOCK_SIZE + lidy;
-    const int trainIdx = groupidx * BLOCK_SIZE + lidx;
-
-    __local value_type *s_query = (__local value_type *)sharebuffer;
-    __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
-
-    result_type result = 0;
-    for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i)
-    {
-        //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
-        const int loadx = lidx + i * BLOCK_SIZE;
-
-        s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-        s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-
-        //synchronize to make sure each elem for reduceIteration in share memory is written already.
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        result += reduce_block(s_query, s_train, lidx, lidy);
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if (queryIdx < query_rows && trainIdx < train_rows &&
-        convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
-    {
-        int ind = atom_inc(nMatches + queryIdx);
-
-        if(ind < bestTrainIdx_cols)
-        {
-            bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
-            bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
-        }
-    }
-}
-
-
-__kernel void BruteForceMatch_knnUnrollMatch(
-    __global T *query,
-    __global T *train,
-    //__global float *mask,
-    __global int2 *bestTrainIdx,
-    __global float2 *bestDistance,
-    __local float *sharebuffer,
-    int query_rows,
-    int query_cols,
-    int train_rows,
-    int train_cols,
-    int step
-)
-{
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-
-    const int queryIdx = groupidx * BLOCK_SIZE + lidy;
-    __local value_type *s_query = (__local value_type *)sharebuffer;
-    __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
-
-    // load the query into local memory.
-    for (int i = 0 ;  i <  MAX_DESC_LEN / BLOCK_SIZE; i ++)
-    {
-        int loadx = lidx + i * BLOCK_SIZE;
-        s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-    }
-
-    float myBestDistance1 = MAX_FLOAT;
-    float myBestDistance2 = MAX_FLOAT;
-    int myBestTrainIdx1 = -1;
-    int myBestTrainIdx2 = -1;
-
-    //loopUnrolledCached
-    for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
-    {
-        result_type result = 0;
-        for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
-        {
-            //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
-            const int loadx = lidx + i * BLOCK_SIZE;
-            s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
-
-            //synchronize to make sure each elem for reduceIteration in share memory is written already.
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        result = DIST_RES(result);
-
-        const int trainIdx = t * BLOCK_SIZE + lidx;
-
-        if (queryIdx < query_rows && trainIdx < train_rows)
-        {
-            if (result < myBestDistance1)
-            {
-                myBestDistance2 = myBestDistance1;
-                myBestTrainIdx2 = myBestTrainIdx1;
-                myBestDistance1 = result;
-                myBestTrainIdx1 = trainIdx;
-            }
-            else if (result < myBestDistance2)
-            {
-                myBestDistance2 = result;
-                myBestTrainIdx2 = trainIdx;
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    __local float *s_distance = (local float *)sharebuffer;
-    __local int *s_trainIdx = (local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
-
-    // find BestMatch
-    s_distance += lidy * BLOCK_SIZE;
-    s_trainIdx += lidy * BLOCK_SIZE;
-
-    s_distance[lidx] = myBestDistance1;
-    s_trainIdx[lidx] = myBestTrainIdx1;
-
-    float bestDistance1 = MAX_FLOAT;
-    float bestDistance2 = MAX_FLOAT;
-    int bestTrainIdx1 = -1;
-    int bestTrainIdx2 = -1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lidx == 0)
-    {
-        for (int i = 0 ; i < BLOCK_SIZE ; i++)
-        {
-            float val = s_distance[i];
-            if (val < bestDistance1)
-            {
-                bestDistance2 = bestDistance1;
-                bestTrainIdx2 = bestTrainIdx1;
-
-                bestDistance1 = val;
-                bestTrainIdx1 = s_trainIdx[i];
-            }
-            else if (val < bestDistance2)
-            {
-                bestDistance2 = val;
-                bestTrainIdx2 = s_trainIdx[i];
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    s_distance[lidx] = myBestDistance2;
-    s_trainIdx[lidx] = myBestTrainIdx2;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lidx == 0)
-    {
-        for (int i = 0 ; i < BLOCK_SIZE ; i++)
-        {
-            float val = s_distance[i];
-
-            if (val < bestDistance2)
-            {
-                bestDistance2 = val;
-                bestTrainIdx2 = s_trainIdx[i];
-            }
-        }
-    }
-
-    myBestDistance1 = bestDistance1;
-    myBestDistance2 = bestDistance2;
-
-    myBestTrainIdx1 = bestTrainIdx1;
-    myBestTrainIdx2 = bestTrainIdx2;
-
-    if (queryIdx < query_rows && lidx == 0)
-    {
-        bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
-        bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
-    }
-}
-
-__kernel void BruteForceMatch_knnMatch(
-    __global T *query,
-    __global T *train,
-    //__global float *mask,
-    __global int2 *bestTrainIdx,
-    __global float2 *bestDistance,
-    __local float *sharebuffer,
-    int query_rows,
-    int query_cols,
-    int train_rows,
-    int train_cols,
-    int step
-)
-{
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-
-    const int queryIdx = groupidx * BLOCK_SIZE + lidy;
-    __local value_type *s_query = (__local value_type *)sharebuffer;
-    __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
-
-    float myBestDistance1 = MAX_FLOAT;
-    float myBestDistance2 = MAX_FLOAT;
-    int myBestTrainIdx1 = -1;
-    int myBestTrainIdx2 = -1;
-
-    //loop
-    for (int  t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
-    {
-        result_type result = 0.0f;
-        for (int i = 0 ; i < (query_cols + BLOCK_SIZE -1) / BLOCK_SIZE ; i++)
-        {
-            const int loadx = lidx + i * BLOCK_SIZE;
-            //load query and train into local memory
-            s_query[lidy * BLOCK_SIZE + lidx] = 0;
-            s_train[lidx * BLOCK_SIZE + lidy] = 0;
-
-            if (loadx < query_cols)
-            {
-                s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
-                s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
-            }
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            result += reduce_block_match(s_query, s_train, lidx, lidy);
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        result = DIST_RES(result);
-
-        const int trainIdx = t * BLOCK_SIZE + lidx;
-
-        if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)
-        {
-            if (result < myBestDistance1)
-            {
-                myBestDistance2 = myBestDistance1;
-                myBestTrainIdx2 = myBestTrainIdx1;
-                myBestDistance1 = result;
-                myBestTrainIdx1 = trainIdx;
-            }
-            else if (result < myBestDistance2)
-            {
-                myBestDistance2 = result;
-                myBestTrainIdx2 = trainIdx;
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    __local float *s_distance = (__local float *)sharebuffer;
-    __local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
-
-    //findBestMatch
-    s_distance += lidy * BLOCK_SIZE;
-    s_trainIdx += lidy * BLOCK_SIZE;
-
-    s_distance[lidx] = myBestDistance1;
-    s_trainIdx[lidx] = myBestTrainIdx1;
-
-    float bestDistance1 = MAX_FLOAT;
-    float bestDistance2 = MAX_FLOAT;
-    int bestTrainIdx1 = -1;
-    int bestTrainIdx2 = -1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lidx == 0)
-    {
-        for (int i = 0 ; i < BLOCK_SIZE ; i++)
-        {
-            float val = s_distance[i];
-            if (val < bestDistance1)
-            {
-                bestDistance2 = bestDistance1;
-                bestTrainIdx2 = bestTrainIdx1;
-
-                bestDistance1 = val;
-                bestTrainIdx1 = s_trainIdx[i];
-            }
-            else if (val < bestDistance2)
-            {
-                bestDistance2 = val;
-                bestTrainIdx2 = s_trainIdx[i];
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    s_distance[lidx] = myBestDistance2;
-    s_trainIdx[lidx] = myBestTrainIdx2;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lidx == 0)
-    {
-        for (int i = 0 ; i < BLOCK_SIZE ; i++)
-        {
-            float val = s_distance[i];
-
-            if (val < bestDistance2)
-            {
-                bestDistance2 = val;
-                bestTrainIdx2 = s_trainIdx[i];
-            }
-        }
-    }
-
-    myBestDistance1 = bestDistance1;
-    myBestDistance2 = bestDistance2;
-
-    myBestTrainIdx1 = bestTrainIdx1;
-    myBestTrainIdx2 = bestTrainIdx2;
-
-    if (queryIdx < query_rows && lidx == 0)
-    {
-        bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
-        bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
-    }
-}
-
-kernel void BruteForceMatch_calcDistanceUnrolled(
-    __global T *query,
-    __global T *train,
-    //__global float *mask,
-    __global float *allDist,
-    __local float *sharebuffer,
-    int query_rows,
-    int query_cols,
-    int train_rows,
-    int train_cols,
-    int step)
-{
-    /* Todo */
-}
-
-kernel void BruteForceMatch_calcDistance(
-    __global T *query,
-    __global T *train,
-    //__global float *mask,
-    __global float *allDist,
-    __local float *sharebuffer,
-    int query_rows,
-    int query_cols,
-    int train_rows,
-    int train_cols,
-    int step)
-{
-    /* Todo */
-}
-
-kernel void BruteForceMatch_findBestMatch(
-    __global float *allDist,
-    __global int *bestTrainIdx,
-    __global float *bestDistance,
-    int k
-)
-{
-    /* Todo */
-}
diff --git a/modules/ocl/src/opencl/build_warps.cl b/modules/ocl/src/opencl/build_warps.cl
deleted file mode 100644
index bd5e002..0000000
--- a/modules/ocl/src/opencl/build_warps.cl
+++ /dev/null
@@ -1,207 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-__kernel void buildWarpPlaneMaps(__global float * xmap, __global float * ymap,
-                                 __constant float * KRT,
-                                 int tl_u, int tl_v,
-                                 int cols, int rows,
-                                 int xmap_step, int ymap_step,
-                                 int xmap_offset, int ymap_offset,
-                                 float scale)
-{
-    int du = get_global_id(0);
-    int dv = get_global_id(1);
-
-    __constant float * ck_rinv = KRT;
-    __constant float * ct      = KRT + 9;
-
-    if (du < cols && dv < rows)
-    {
-        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
-        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
-
-        float u = tl_u + du;
-        float v = tl_v + dv;
-        float x, y;
-
-        float x_ = u / scale - ct[0];
-        float y_ = v / scale - ct[1];
-
-        float z;
-        x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
-        y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
-        z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
-
-        x /= z;
-        y /= z;
-
-        xmap[xmap_index] = x;
-        ymap[ymap_index] = y;
-    }
-}
-
-__kernel void buildWarpCylindricalMaps(__global float * xmap, __global float * ymap,
-                                       __constant float * ck_rinv,
-                                       int tl_u, int tl_v,
-                                       int cols, int rows,
-                                       int xmap_step, int ymap_step,
-                                       int xmap_offset, int ymap_offset,
-                                       float scale)
-{
-    int du = get_global_id(0);
-    int dv = get_global_id(1);
-
-    if (du < cols && dv < rows)
-    {
-        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
-        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
-
-        float u = tl_u + du;
-        float v = tl_v + dv;
-        float x, y;
-
-        u /= scale;
-        float x_ = sin(u);
-        float y_ = v / scale;
-        float z_ = cos(u);
-
-        float z;
-        x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
-        y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
-        z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
-
-        if (z > 0) { x /= z; y /= z; }
-        else x = y = -1;
-
-        xmap[xmap_index] = x;
-        ymap[ymap_index] = y;
-    }
-}
-
-__kernel void buildWarpSphericalMaps(__global float * xmap, __global float * ymap,
-                                     __constant float * ck_rinv,
-                                     int tl_u, int tl_v,
-                                     int cols, int rows,
-                                     int xmap_step, int ymap_step,
-                                     int xmap_offset, int ymap_offset,
-                                     float scale)
-{
-    int du = get_global_id(0);
-    int dv = get_global_id(1);
-
-    if (du < cols && dv < rows)
-    {
-        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
-        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
-
-        float u = tl_u + du;
-        float v = tl_v + dv;
-        float x, y;
-
-        v /= scale;
-        u /= scale;
-
-        float sinv = sin(v);
-        float x_ = sinv * sin(u);
-        float y_ = - cos(v);
-        float z_ = sinv * cos(u);
-
-        float z;
-        x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
-        y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
-        z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
-
-        if (z > 0) { x /= z; y /= z; }
-        else x = y = -1;
-
-        xmap[xmap_index] = x;
-        ymap[ymap_index] = y;
-    }
-}
-
-__kernel void buildWarpAffineMaps(__global float * xmap, __global float * ymap,
-                                  __constant float * c_warpMat,
-                                  int cols, int rows,
-                                  int xmap_step, int ymap_step,
-                                  int xmap_offset, int ymap_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int xmap_index = mad24(y, xmap_step, x + xmap_offset);
-        int ymap_index = mad24(y, ymap_step, x + ymap_offset);
-
-        float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
-        float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
-
-        xmap[xmap_index] = xcoo;
-        ymap[ymap_index] = ycoo;
-    }
-}
-
-__kernel void buildWarpPerspectiveMaps(__global float * xmap, __global float * ymap,
-                                       __constant float * c_warpMat,
-                                       int cols, int rows,
-                                       int xmap_step, int ymap_step,
-                                       int xmap_offset, int ymap_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int xmap_index = mad24(y, xmap_step, x + xmap_offset);
-        int ymap_index = mad24(y, ymap_step, x + ymap_offset);
-
-        float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
-        float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
-        float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
-
-        xmap[xmap_index] = xcoo;
-        ymap[ymap_index] = ycoo;
-    }
-}
diff --git a/modules/ocl/src/opencl/convertC3C4.cl b/modules/ocl/src/opencl/convertC3C4.cl
deleted file mode 100644
index 4c519fd..0000000
--- a/modules/ocl/src/opencl/convertC3C4.cl
+++ /dev/null
@@ -1,153 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
-                         int cols, int rows,
-                         int dstStep_in_piexl, int pixel_end)
-{
-    int id = get_global_id(0);
-    int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2));
-    pixelid = clamp(pixelid,0,pixel_end);
-    GENTYPE4 pixel0, pixel1, pixel2, outpix0,outpix1,outpix2,outpix3;
-
-    pixel0 = src[pixelid.x];
-    pixel1 = src[pixelid.y];
-    pixel2 = src[pixelid.z];
-
-    outpix0 = (GENTYPE4)(pixel0.x,pixel0.y,pixel0.z,0);
-    outpix1 = (GENTYPE4)(pixel0.w,pixel1.x,pixel1.y,0);
-    outpix2 = (GENTYPE4)(pixel1.z,pixel1.w,pixel2.x,0);
-    outpix3 = (GENTYPE4)(pixel2.y,pixel2.z,pixel2.w,0);
-
-    int4 outy = (id<<2)/cols;
-    int4 outx = (id<<2)%cols;
-
-    outx += (int4)(0, 1, 2, 3);
-    outy = select(outy, outy+1, outx>=cols);
-    outx = select(outx, outx-cols, outx>=cols);
-
-    // when cols == 1
-    outy = select(outy, outy + 1, outx >= cols);
-    outx = select(outx, outx-cols, outx >= cols);
-    outy = select(outy, outy + 1, outx >= cols);
-    outx = select(outx, outx-cols, outx >= cols);
-
-    int4 addr = mad24(outy,(int4)dstStep_in_piexl,outx);
-
-    if(outx.w<cols && outy.w<rows)
-    {
-        dst[addr.x] = outpix0;
-        dst[addr.y] = outpix1;
-        dst[addr.z] = outpix2;
-        dst[addr.w] = outpix3;
-    }
-    else if(outx.z<cols && outy.z<rows)
-    {
-        dst[addr.x] = outpix0;
-        dst[addr.y] = outpix1;
-        dst[addr.z] = outpix2;
-    }
-    else if(outx.y<cols && outy.y<rows)
-    {
-        dst[addr.x] = outpix0;
-        dst[addr.y] = outpix1;
-    }
-    else if(outx.x<cols && outy.x<rows)
-        dst[addr.x] = outpix0;
-}
-
-__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
-                          int cols, int rows,
-                          int srcStep_in_pixel, int pixel_end)
-{
-    int id = get_global_id(0)<<2;
-    int y = id / cols;
-    int x = id % cols;
-
-    int4 x4 = (int4)(x,x+1,x+2,x+3);
-    int4 y4 = select((int4)y,(int4)(y+1),x4>=(int4)cols);
-    x4 = select(x4,x4-(int4)cols,x4>=(int4)cols);
-
-    // when cols == 1
-    y4 = select(y4, y4 + 1,x4>=(int4)cols);
-    x4 = select(x4, x4 - (int4)cols,x4>=(int4)cols);
-    y4 = select(y4, y4 + 1,x4>=(int4)cols);
-    x4 = select(x4, x4-(int4)cols,x4>=(int4)cols);
-
-    y4=clamp(y4,(int4)0,(int4)(rows-1));
-    int4 addr = mad24(y4, (int4)srcStep_in_pixel, x4);
-
-    GENTYPE4 pixel0,pixel1,pixel2,pixel3, outpixel1, outpixel2;
-    pixel0 = src[addr.x];
-    pixel1 = src[addr.y];
-    pixel2 = src[addr.z];
-    pixel3 = src[addr.w];
-
-    pixel0.w = pixel1.x;
-    outpixel1.x = pixel1.y;
-    outpixel1.y = pixel1.z;
-    outpixel1.z = pixel2.x;
-    outpixel1.w = pixel2.y;
-    outpixel2.x = pixel2.z;
-    outpixel2.y = pixel3.x;
-    outpixel2.z = pixel3.y;
-    outpixel2.w = pixel3.z;
-
-    int4 outaddr = mul24(id>>2 , 3);
-    outaddr.y++;
-    outaddr.z+=2;
-
-    if(outaddr.z <= pixel_end)
-    {
-        dst[outaddr.x] = pixel0;
-        dst[outaddr.y] = outpixel1;
-        dst[outaddr.z] = outpixel2;
-    }
-    else if(outaddr.y <= pixel_end)
-    {
-        dst[outaddr.x] = pixel0;
-        dst[outaddr.y] = outpixel1;
-    }
-    else if(outaddr.x <= pixel_end)
-        dst[outaddr.x] = pixel0;
-}
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
deleted file mode 100644
index 5c236f0..0000000
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ /dev/null
@@ -1,1599 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-/**************************************PUBLICFUNC*************************************/
-
-#ifndef hscale
-#define hscale 0
-#endif
-
-#ifndef hrange
-#define hrange 0
-#endif
-
-#ifdef DEPTH_0
-#define DATA_TYPE uchar
-#define VECTOR2 uchar2
-#define VECTOR4 uchar4
-#define VECTOR8 uchar8
-#define VECTOR16 uchar16
-#define COEFF_TYPE int
-#define MAX_NUM  255
-#define HALF_MAX 128
-#define SAT_CAST(num) convert_uchar_sat_rte(num)
-#define SAT_CAST2(num) convert_uchar2_sat(num)
-#define SAT_CAST4(num) convert_uchar4_sat(num)
-#endif
-
-#ifdef DEPTH_2
-#define DATA_TYPE ushort
-#define VECTOR2 ushort2
-#define VECTOR4 ushort4
-#define VECTOR8 ushort8
-#define VECTOR16 ushort16
-#define COEFF_TYPE int
-#define MAX_NUM  65535
-#define HALF_MAX 32768
-#define SAT_CAST(num) convert_ushort_sat_rte(num)
-#define SAT_CAST2(num) convert_ushort2_sat(num)
-#define SAT_CAST4(num) convert_ushort4_sat(num)
-#endif
-
-#ifdef DEPTH_5
-#define DATA_TYPE float
-#define VECTOR2 float2
-#define VECTOR4 float4
-#define VECTOR8 float8
-#define VECTOR16 float16
-#define COEFF_TYPE float
-#define MAX_NUM  1.0f
-#define HALF_MAX 0.5f
-#define SAT_CAST(num) (num)
-#endif
-
-#ifndef bidx
-    #define bidx 0
-#endif
-
-#ifndef pixels_per_work_item
-    #define pixels_per_work_item 1
-#endif
-
-#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
-
-enum
-{
-    yuv_shift  = 14,
-    xyz_shift  = 12,
-    hsv_shift  = 12,
-    R2Y        = 4899,
-    G2Y        = 9617,
-    B2Y        = 1868,
-    BLOCK_SIZE = 256
-};
-
-///////////////////////////////////// RGB <-> GRAY //////////////////////////////////////
-
-__constant float c_RGB2GrayCoeffs_f[3]  = { 0.114f, 0.587f, 0.299f };
-__constant int   c_RGB2GrayCoeffs_i[3]  = { B2Y, G2Y, R2Y };
-
-__kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
-                       __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                       int src_offset, int dst_offset)
-{
-    int x = get_global_id(0) * pixels_per_work_item;
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        int src_idx = mad24(y, src_step, src_offset + (x << 2));
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-#ifndef INTEL_DEVICE
-
-#ifdef DEPTH_5
-        dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f;
-#else
-        dst[dst_idx] = (DATA_TYPE)CV_DESCALE((src[src_idx + bidx] * B2Y + src[src_idx + 1] * G2Y + src[src_idx + (bidx^2)] * R2Y), yuv_shift);
-#endif
-
-#else   //INTEL_DEVICE
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#ifdef DEPTH_5
-        __constant float * coeffs = c_RGB2GrayCoeffs_f;
-#else
-        __constant int * coeffs = c_RGB2GrayCoeffs_i;
-#endif
-
-#if (1 == pixels_per_work_item)
-        {
-#ifdef DEPTH_5
-            *dst_ptr = src_ptr[bidx] * coeffs[0] + src_ptr[1] * coeffs[1] + src_ptr[(bidx^2)] *coeffs[2];
-#else
-            *dst_ptr = (DATA_TYPE)CV_DESCALE((src_ptr[bidx] * coeffs[0] + src_ptr[1] * coeffs[1] + src_ptr[(bidx^2)] * coeffs[2]), yuv_shift);
-#endif
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const VECTOR8 r0 = vload8(0, src_ptr);
-
-#ifdef DEPTH_5
-            const float2 c0 = r0.s04;
-            const float2 c1 = r0.s15;
-            const float2 c2 = r0.s26;
-
-            const float2 Y = c0 * coeffs[bidx] + c1 * coeffs[1] + c2 * coeffs[bidx^2];
-#else
-            const int2 c0 = convert_int2(r0.s04);
-            const int2 c1 = convert_int2(r0.s15);
-            const int2 c2 = convert_int2(r0.s26);
-
-            const int2 yi = CV_DESCALE(c0 * coeffs[bidx] + c1 * coeffs[1] + c2 * coeffs[bidx^2], yuv_shift);
-            const VECTOR2 Y = SAT_CAST2(yi);
-#endif
-
-            vstore2(Y, 0, dst_ptr);
-        }
-#elif (4 == pixels_per_work_item)
-        {
-#ifndef DEPTH_5
-            const VECTOR16 r0 = vload16(0, src_ptr);
-
-            const int4 c0 = convert_int4(r0.s048c);
-            const int4 c1 = convert_int4(r0.s159d);
-            const int4 c2 = convert_int4(r0.s26ae);
-            const int4 Y = CV_DESCALE(c0 * coeffs[bidx] + c1 * coeffs[1] + c2 * coeffs[bidx^2], yuv_shift);
-
-            vstore4(SAT_CAST4(Y), 0, dst_ptr);
-#endif
-        }
-#endif //pixels_per_work_item
-#endif //INTEL_DEVICE
-    }
-}
-
-__kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step,
-                       __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                       int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
-
-        DATA_TYPE val = src[src_idx];
-        dst[dst_idx] = val;
-        dst[dst_idx + 1] = val;
-        dst[dst_idx + 2] = val;
-#if dcn == 4
-        dst[dst_idx + 3] = MAX_NUM;
-#endif
-    }
-}
-
-///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
-
-__constant float c_RGB2YUVCoeffs_f[5]  = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
-__constant int   c_RGB2YUVCoeffs_i[5]  = { B2Y, G2Y, R2Y, 8061, 14369 };
-
-__kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
-                      __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0) * pixels_per_work_item;
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#ifdef DEPTH_5
-        __constant float * coeffs = c_RGB2YUVCoeffs_f;
-#else
-        __constant int * coeffs = c_RGB2YUVCoeffs_i;
-        const int delta = HALF_MAX * (1 << yuv_shift);
-#endif
-
-#if (1 == pixels_per_work_item)
-        {
-            const DATA_TYPE rgb[] = {src_ptr[0], src_ptr[1], src_ptr[2]};
-
-#ifdef DEPTH_5
-            float Y = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
-            float U = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
-            float V = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX;
-#else
-            int Y = CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift);
-            int U = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift);
-            int V = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift);
-#endif
-
-            dst_ptr[0] = SAT_CAST( Y );
-            dst_ptr[1] = SAT_CAST( U );
-            dst_ptr[2] = SAT_CAST( V );
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const VECTOR8 r0 = vload8(0, src_ptr);
-
-#ifdef DEPTH_5
-            const float2 c0 = r0.s04;
-            const float2 c1 = r0.s15;
-            const float2 c2 = r0.s26;
-
-            const float2 Y = (bidx == 0) ? (c0 * coeffs[2] + c1 * coeffs[1] + c2 * coeffs[0]) : (c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2]);
-            const float2 U = (bidx == 0) ? ((c2 - Y) * coeffs[3] + HALF_MAX) : ((c0 - Y) * coeffs[3] + HALF_MAX);
-            const float2 V = (bidx == 0) ? ((c0 - Y) * coeffs[4] + HALF_MAX) : ((c2 - Y) * coeffs[4] + HALF_MAX);
-#else
-            const int2 c0 = convert_int2(r0.s04);
-            const int2 c1 = convert_int2(r0.s15);
-            const int2 c2 = convert_int2(r0.s26);
-
-            const int2 yi = (bidx == 0) ? CV_DESCALE(c0 * coeffs[2] + c1 * coeffs[1] + c2 * coeffs[0], yuv_shift) : CV_DESCALE(c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2], yuv_shift);
-            const int2 ui = (bidx == 0) ? CV_DESCALE((c2 - yi) * coeffs[3] + delta, yuv_shift) : CV_DESCALE((c0 - yi) * coeffs[3] + delta, yuv_shift);
-            const int2 vi = (bidx == 0) ? CV_DESCALE((c0 - yi) * coeffs[4] + delta, yuv_shift) : CV_DESCALE((c2 - yi) * coeffs[4] + delta, yuv_shift);
-
-            const VECTOR2 Y = SAT_CAST2(yi);
-            const VECTOR2 U = SAT_CAST2(ui);
-            const VECTOR2 V = SAT_CAST2(vi);
-#endif
-
-            vstore8((VECTOR8)(Y.s0, U.s0, V.s0, 0, Y.s1, U.s1, V.s1, 0), 0, dst_ptr);
-        }
-#elif (4 == pixels_per_work_item)
-        {
-#ifndef DEPTH_5
-            const VECTOR16 r0 = vload16(0, src_ptr);
-
-            const int4 c0 = convert_int4(r0.s048c);
-            const int4 c1 = convert_int4(r0.s159d);
-            const int4 c2 = convert_int4(r0.s26ae);
-
-            const int4 yi = (bidx == 0) ? CV_DESCALE(c0 * coeffs[2] + c1 * coeffs[1] + c2 * coeffs[0], yuv_shift) : CV_DESCALE(c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2], yuv_shift);
-            const int4 ui = (bidx == 0) ? CV_DESCALE((c2 - yi) * coeffs[3] + delta, yuv_shift) : CV_DESCALE((c0 - yi) * coeffs[3] + delta, yuv_shift);
-            const int4 vi = (bidx == 0) ? CV_DESCALE((c0 - yi) * coeffs[4] + delta, yuv_shift) : CV_DESCALE((c2 - yi) * coeffs[4] + delta, yuv_shift);
-
-            const VECTOR4 Y = SAT_CAST4(yi);
-            const VECTOR4 U = SAT_CAST4(ui);
-            const VECTOR4 V = SAT_CAST4(vi);
-
-            vstore16((VECTOR16)(Y.s0, U.s0, V.s0, 0, Y.s1, U.s1, V.s1, 0, Y.s2, U.s2, V.s2, 0, Y.s3, U.s3, V.s3, 0), 0, dst_ptr);
-#endif
-        }
-#endif //pixels_per_work_item
-    }
-}
-
-__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
-__constant int   c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
-
-__kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
-                      __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0) * pixels_per_work_item;
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#ifdef DEPTH_5
-        __constant float * coeffs = c_YUV2RGBCoeffs_f;
-#else
-        __constant int * coeffs = c_YUV2RGBCoeffs_i;
-#endif
-
-#if (1 == pixels_per_work_item)
-        {
-            const DATA_TYPE yuv[] = {src_ptr[0], src_ptr[1], src_ptr[2]};
-
-#ifdef DEPTH_5
-            float B = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[3];
-            float G = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1];
-            float R = yuv[0] + (yuv[1] - HALF_MAX) * coeffs[0];
-#else
-            int B = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[3], yuv_shift);
-            int G = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1], yuv_shift);
-            int R = yuv[0] + CV_DESCALE((yuv[1] - HALF_MAX) * coeffs[0], yuv_shift);
-#endif
-
-            dst_ptr[bidx]     = SAT_CAST( B );
-            dst_ptr[1]        = SAT_CAST( G );
-            dst_ptr[(bidx^2)] = SAT_CAST( R );
-#if dcn == 4
-            dst_ptr[3]         = MAX_NUM;
-#endif
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const VECTOR8 r0 = vload8(0, src_ptr);
-
-#ifdef DEPTH_5
-            const float2 Y = r0.s04;
-            const float2 U = r0.s15;
-            const float2 V = r0.s26;
-
-            const float2 c0 = (bidx == 0) ? (Y + (V - HALF_MAX) * coeffs[3]) : (Y + (U - HALF_MAX) * coeffs[0]);
-            const float2 c1 = Y + (V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1];
-            const float2 c2 = (bidx == 0) ? (Y + (U - HALF_MAX) * coeffs[0]) : (Y + (V - HALF_MAX) * coeffs[3]);
-#else
-            const int2 Y = convert_int2(r0.s04);
-            const int2 U = convert_int2(r0.s15);
-            const int2 V = convert_int2(r0.s26);
-
-            const int2 c0i = (bidx == 0) ? (Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift)) : (Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift));
-            const int2 c1i = Y + CV_DESCALE((V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1], yuv_shift);
-            const int2 c2i = (bidx == 0) ? (Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift)) : (Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift));
-
-            const VECTOR2 c0 = SAT_CAST2(c0i);
-            const VECTOR2 c1 = SAT_CAST2(c1i);
-            const VECTOR2 c2 = SAT_CAST2(c2i);
-#endif
-
-#if dcn == 4
-            vstore8((VECTOR8)(c0.s0, c1.s0, c2.s0, MAX_NUM, c0.s1, c1.s1, c2.s1, MAX_NUM), 0, dst_ptr);
-#else
-            vstore8((VECTOR8)(c0.s0, c1.s0, c2.s0, 0, c0.s1, c1.s1, c2.s1, 0), 0, dst_ptr);
-#endif
-        }
-#elif (4 == pixels_per_work_item)
-        {
-#ifndef DEPTH_5
-            const VECTOR16 r0 = vload16(0, src_ptr);
-
-            const int4 Y = convert_int4(r0.s048c);
-            const int4 U = convert_int4(r0.s159d);
-            const int4 V = convert_int4(r0.s26ae);
-
-            const int4 c0i = (bidx == 0) ? (Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift)) : (Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift));
-            const int4 c1i = Y + CV_DESCALE((V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1], yuv_shift);
-            const int4 c2i = (bidx == 0) ? (Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift)) : (Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift));
-
-            const VECTOR4 c0 = SAT_CAST4(c0i);
-            const VECTOR4 c1 = SAT_CAST4(c1i);
-            const VECTOR4 c2 = SAT_CAST4(c2i);
-
-#if dcn == 4
-            vstore16((VECTOR16)(c0.s0, c1.s0, c2.s0, MAX_NUM, c0.s1, c1.s1, c2.s1, MAX_NUM, c0.s2, c1.s2, c2.s2, MAX_NUM, c0.s3, c1.s3, c2.s3, MAX_NUM), 0, dst_ptr);
-#else
-            vstore16((VECTOR16)(c0.s0, c1.s0, c2.s0, 0, c0.s1, c1.s1, c2.s1, 0, c0.s2, c1.s2, c2.s2, 0, c0.s3, c1.s3, c2.s3, 0), 0, dst_ptr);
-#endif
-#endif
-        }
-#endif  //pixels_per_work_item
-    }
-}
-
-__constant int ITUR_BT_601_CY = 1220542;
-__constant int ITUR_BT_601_CUB = 2116026;
-__constant int ITUR_BT_601_CUG = 409993;
-__constant int ITUR_BT_601_CVG = 852492;
-__constant int ITUR_BT_601_CVR = 1673527;
-__constant int ITUR_BT_601_SHIFT = 20;
-
-__kernel void YUV2RGBA_NV12(int cols, int rows, int src_step, int dst_step,
-                            __global const uchar* src, __global uchar* dst,
-                            int src_offset, int dst_offset)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (y < rows / 2 && x < cols / 2 )
-    {
-        __global const uchar* ysrc = src + mad24(y << 1, src_step, (x << 1) + src_offset);
-        __global const uchar* usrc = src + mad24(rows + y, src_step, (x << 1) + src_offset);
-        __global uchar*       dst1 = dst + mad24(y << 1, dst_step, (x << 3) + dst_offset);
-        __global uchar*       dst2 = dst + mad24((y << 1) + 1, dst_step, (x << 3) + dst_offset);
-
-        int Y1 = ysrc[0];
-        int Y2 = ysrc[1];
-        int Y3 = ysrc[src_step];
-        int Y4 = ysrc[src_step + 1];
-
-        int U  = usrc[0] - 128;
-        int V  = usrc[1] - 128;
-
-        int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V;
-        int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U;
-        int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U;
-
-        Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY;
-        dst1[2 - bidx]     = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT);
-        dst1[1]        = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT);
-        dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT);
-        dst1[3]        = 255;
-
-        Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY;
-        dst1[6 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT);
-        dst1[5]        = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT);
-        dst1[4 + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT);
-        dst1[7]        = 255;
-
-        Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY;
-        dst2[2 - bidx]     = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT);
-        dst2[1]        = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT);
-        dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT);
-        dst2[3]        = 255;
-
-        Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY;
-        dst2[6 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT);
-        dst2[5]        = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT);
-        dst2[4 + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT);
-        dst2[7]        = 255;
-    }
-}
-
-///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
-
-__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
-__constant int   c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
-
-__kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
-                      __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0) * pixels_per_work_item;
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#ifdef DEPTH_5
-        __constant float * coeffs = c_RGB2YCrCbCoeffs_f;
-#else
-        __constant int * coeffs = c_RGB2YCrCbCoeffs_i;
-        const int delta = HALF_MAX * (1 << yuv_shift);
-#endif
-
-#if (1 == pixels_per_work_item)
-        {
-            const DATA_TYPE rgb[] = {src_ptr[0], src_ptr[1], src_ptr[2]};
-
-#ifdef DEPTH_5
-            float Y  = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
-            float Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
-            float Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX;
-#else
-            int Y =  CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift);
-            int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift);
-            int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift);
-#endif
-
-            dst_ptr[0] = SAT_CAST( Y );
-            dst_ptr[1] = SAT_CAST( Cr );
-            dst_ptr[2] = SAT_CAST( Cb );
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const VECTOR8 r0 = vload8(0, src_ptr);
-
-#ifdef DEPTH_5
-            const float2 c0 = r0.s04;
-            const float2 c1 = r0.s15;
-            const float2 c2 = r0.s26;
-
-            const float2 Y  = (bidx == 0) ? (c0 * coeffs[2] + c1 * coeffs[1] + c2 * coeffs[0]) : (c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2]);
-            const float2 Cr = (bidx == 0) ? ((c2 - Y) * coeffs[3] + HALF_MAX) : ((c0 - Y) * coeffs[3] + HALF_MAX);
-            const float2 Cb = (bidx == 0) ? ((c0 - Y) * coeffs[4] + HALF_MAX) : ((c2 - Y) * coeffs[4] + HALF_MAX);
-#else
-            const int2 c0 = convert_int2(r0.s04);
-            const int2 c1 = convert_int2(r0.s15);
-            const int2 c2 = convert_int2(r0.s26);
-
-            const int2 yi = (bidx == 0) ? CV_DESCALE(c0 * coeffs[2] + c1 * coeffs[1] + c2 * coeffs[0], yuv_shift) : CV_DESCALE(c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2], yuv_shift);
-            const int2 ui = (bidx == 0) ? CV_DESCALE((c2 - yi) * coeffs[3] + delta, yuv_shift) : CV_DESCALE((c0 - yi) * coeffs[3] + delta, yuv_shift);
-            const int2 vi = (bidx == 0) ? CV_DESCALE((c0 - yi) * coeffs[4] + delta, yuv_shift) : CV_DESCALE((c2 - yi) * coeffs[4] + delta, yuv_shift);
-
-            const VECTOR2 Y  = SAT_CAST2(yi);
-            const VECTOR2 Cr = SAT_CAST2(ui);
-            const VECTOR2 Cb = SAT_CAST2(vi);
-#endif
-
-            vstore8((VECTOR8)(Y.s0, Cr.s0, Cb.s0, 0, Y.s1, Cr.s1, Cb.s1, 0), 0, dst_ptr);
-        }
-#elif (4 == pixels_per_work_item)
-        {
-#ifndef DEPTH_5
-            const VECTOR16 r0 = vload16(0, src_ptr);
-            const int4 c0 = convert_int4(r0.s048c);
-            const int4 c1 = convert_int4(r0.s159d);
-            const int4 c2 = convert_int4(r0.s26ae);
-
-            const int4 yi = (bidx == 0) ? CV_DESCALE(c0 * coeffs[2] + c1 * coeffs[1] + c2 * coeffs[0], yuv_shift) : CV_DESCALE(c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2], yuv_shift);
-            const int4 ui = (bidx == 0) ? CV_DESCALE((c2 - yi) * coeffs[3] + delta, yuv_shift) : CV_DESCALE((c0 - yi) * coeffs[3] + delta, yuv_shift);
-            const int4 vi = (bidx == 0) ? CV_DESCALE((c0 - yi) * coeffs[4] + delta, yuv_shift) : CV_DESCALE((c2 - yi) * coeffs[4] + delta, yuv_shift);
-
-            const VECTOR4 Y  = SAT_CAST4(yi);
-            const VECTOR4 Cr = SAT_CAST4(ui);
-            const VECTOR4 Cb = SAT_CAST4(vi);
-
-            vstore16((VECTOR16)(Y.s0, Cr.s0, Cb.s0, 0, Y.s1, Cr.s1, Cb.s1, 0, Y.s2, Cr.s2, Cb.s2, 0, Y.s3, Cr.s3, Cb.s3, 0), 0, dst_ptr);
-#endif
-        }
-#endif //pixels_per_work_item
-    }
-}
-
-__constant float c_YCrCb2RGBCoeffs_f[4] = { 1.403f, -0.714f, -0.344f, 1.773f };
-__constant int   c_YCrCb2RGBCoeffs_i[4] = { 22987, -11698, -5636, 29049 };
-
-__kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
-                      __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0) * pixels_per_work_item;
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#ifdef DEPTH_5
-        __constant float * coeffs = c_YCrCb2RGBCoeffs_f;
-#else
-        __constant int * coeffs = c_YCrCb2RGBCoeffs_i;
-#endif
-
-#if (1 == pixels_per_work_item)
-        {
-            const DATA_TYPE ycrcb[] = {src_ptr[0], src_ptr[1], src_ptr[2]};
-
-#ifdef DEPTH_5
-            float B = ycrcb[0] + (ycrcb[2] - HALF_MAX) * coeffs[3];
-            float G = ycrcb[0] + (ycrcb[2] - HALF_MAX) * coeffs[2] + (ycrcb[1] - HALF_MAX) * coeffs[1];
-            float R = ycrcb[0] + (ycrcb[1] - HALF_MAX) * coeffs[0];
-#else
-            int B = ycrcb[0] + CV_DESCALE((ycrcb[2] - HALF_MAX) * coeffs[3], yuv_shift);
-            int G = ycrcb[0] + CV_DESCALE((ycrcb[2] - HALF_MAX) * coeffs[2] + (ycrcb[1] - HALF_MAX) * coeffs[1], yuv_shift);
-            int R = ycrcb[0] + CV_DESCALE((ycrcb[1] - HALF_MAX) * coeffs[0], yuv_shift);
-#endif
-
-            dst_ptr[bidx]     = SAT_CAST( B );
-            dst_ptr[1]        = SAT_CAST( G );
-            dst_ptr[(bidx^2)] = SAT_CAST( R );
-#if dcn == 4
-            dst_ptr[3]         = MAX_NUM;
-#endif
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const VECTOR8 r0 = vload8(0, src_ptr);
-
-#ifdef DEPTH_5
-            const float2 Y  = r0.s04;
-            const float2 Cr = r0.s15;
-            const float2 Cb = r0.s26;
-
-            const float2 c0 = (bidx == 0) ? (Y + (Cb - HALF_MAX) * coeffs[3]) : (Y + (Cr - HALF_MAX) * coeffs[0]);
-            const float2 c1 = Y + (Cb - HALF_MAX) * coeffs[2] + (Cr - HALF_MAX) * coeffs[1];
-            const float2 c2 = (bidx == 0) ? (Y + (Cr - HALF_MAX) * coeffs[0]) : (Y + (Cb - HALF_MAX) * coeffs[3]);
-#else
-            const int2 Y  = convert_int2(r0.s04);
-            const int2 Cr = convert_int2(r0.s15);
-            const int2 Cb = convert_int2(r0.s26);
-
-            const int2 c0i = (bidx == 0) ? (Y + CV_DESCALE((Cb - HALF_MAX) * coeffs[3], yuv_shift)) : (Y + CV_DESCALE((Cr - HALF_MAX) * coeffs[0], yuv_shift));
-            const int2 c1i = Y + CV_DESCALE((Cb - HALF_MAX) * coeffs[2] + (Cr - HALF_MAX) * coeffs[1], yuv_shift);
-            const int2 c2i = (bidx == 0) ? (Y + CV_DESCALE((Cr - HALF_MAX) * coeffs[0], yuv_shift)) : (Y + CV_DESCALE((Cb - HALF_MAX) * coeffs[3], yuv_shift));
-
-            const VECTOR2 c0 = SAT_CAST2(c0i);
-            const VECTOR2 c1 = SAT_CAST2(c1i);
-            const VECTOR2 c2 = SAT_CAST2(c2i);
-#endif
-
-#if dcn == 4
-            vstore8((VECTOR8)(c0.s0, c1.s0, c2.s0, MAX_NUM, c0.s1, c1.s1, c2.s1, MAX_NUM), 0, dst_ptr);
-#else
-            vstore8((VECTOR8)(c0.s0, c1.s0, c2.s0, 0, c0.s1, c1.s1, c2.s1, 0), 0, dst_ptr);
-#endif
-        }
-#elif (4 == pixels_per_work_item)
-        {
-#ifndef DEPTH_5
-            const VECTOR16 r0 = vload16(0, src_ptr);
-
-            const int4 Y  = convert_int4(r0.s048c);
-            const int4 Cr = convert_int4(r0.s159d);
-            const int4 Cb = convert_int4(r0.s26ae);
-
-            const int4 c0i = (bidx == 0) ? (Y + CV_DESCALE((Cb - HALF_MAX) * coeffs[3], yuv_shift)) : (Y + CV_DESCALE((Cr - HALF_MAX) * coeffs[0], yuv_shift));
-            const int4 c1i = Y + CV_DESCALE((Cb - HALF_MAX) * coeffs[2] + (Cr - HALF_MAX) * coeffs[1], yuv_shift);
-            const int4 c2i = (bidx == 0) ? (Y + CV_DESCALE((Cr - HALF_MAX) * coeffs[0], yuv_shift)) : (Y + CV_DESCALE((Cb - HALF_MAX) * coeffs[3], yuv_shift));
-
-            const VECTOR4 c0 = SAT_CAST4(c0i);
-            const VECTOR4 c1 = SAT_CAST4(c1i);
-            const VECTOR4 c2 = SAT_CAST4(c2i);
-
-#if dcn == 4
-            vstore16((VECTOR16)(c0.s0, c1.s0, c2.s0, MAX_NUM, c0.s1, c1.s1, c2.s1, MAX_NUM, c0.s2, c1.s2, c2.s2, MAX_NUM, c0.s3, c1.s3, c2.s3, MAX_NUM), 0, dst_ptr);
-#else
-            vstore16((VECTOR16)(c0.s0, c1.s0, c2.s0, 0, c0.s1, c1.s1, c2.s1, 0, c0.s2, c1.s2, c2.s2, 0, c0.s3, c1.s3, c2.s3, 0), 0, dst_ptr);
-#endif
-#endif
-        }
-#endif //pixels_per_work_item
-    }
-}
-
-///////////////////////////////////// RGB <-> XYZ //////////////////////////////////////
-
-__kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
-                      __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                      int src_offset, int dst_offset, __constant COEFF_TYPE * coeffs)
-{
-    int dx = get_global_id(0) * pixels_per_work_item;
-    int dy = get_global_id(1);
-
-    if (dy < rows && dx < cols)
-    {
-        dx <<= 2;
-        int src_idx = mad24(dy, src_step, src_offset + dx);
-        int dst_idx = mad24(dy, dst_step, dst_offset + dx);
-
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#if (1 == pixels_per_work_item)
-        {
-            DATA_TYPE R = src_ptr[0], G = src_ptr[1], B = src_ptr[2];
-
-#ifdef DEPTH_5
-            float X = R * coeffs[0] + G * coeffs[1] + B * coeffs[2];
-            float Y = R * coeffs[3] + G * coeffs[4] + B * coeffs[5];
-            float Z = R * coeffs[6] + G * coeffs[7] + B * coeffs[8];
-#else
-            int X = CV_DESCALE(R * coeffs[0] + G * coeffs[1] + B * coeffs[2], xyz_shift);
-            int Y = CV_DESCALE(R * coeffs[3] + G * coeffs[4] + B * coeffs[5], xyz_shift);
-            int Z = CV_DESCALE(R * coeffs[6] + G * coeffs[7] + B * coeffs[8], xyz_shift);
-#endif
-
-            dst_ptr[0] = SAT_CAST( X );
-            dst_ptr[1] = SAT_CAST( Y );
-            dst_ptr[2] = SAT_CAST( Z );
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const VECTOR8 r0 = vload8(0, src_ptr);
-
-#ifdef DEPTH_5
-            const float2 R = r0.s04;
-            const float2 G = r0.s15;
-            const float2 B = r0.s26;
-
-            const float2 X = R * coeffs[0] + G * coeffs[1] + B * coeffs[2];
-            const float2 Y = R * coeffs[3] + G * coeffs[4] + B * coeffs[5];
-            const float2 Z = R * coeffs[6] + G * coeffs[7] + B * coeffs[8];
-#else
-            const int2 R = convert_int2(r0.s04);
-            const int2 G = convert_int2(r0.s15);
-            const int2 B = convert_int2(r0.s26);
-
-            const int2 xi = CV_DESCALE(R * coeffs[0] + G * coeffs[1] + B * coeffs[2], xyz_shift);
-            const int2 yi = CV_DESCALE(R * coeffs[3] + G * coeffs[4] + B * coeffs[5], xyz_shift);
-            const int2 zi = CV_DESCALE(R * coeffs[6] + G * coeffs[7] + B * coeffs[8], xyz_shift);
-
-            const VECTOR2 X = SAT_CAST2(xi);
-            const VECTOR2 Y = SAT_CAST2(yi);
-            const VECTOR2 Z = SAT_CAST2(zi);
-#endif
-
-            vstore8((VECTOR8)(X.s0, Y.s0, Z.s0, 0, X.s1, Y.s1, Z.s1, 0), 0, dst_ptr);
-        }
-#elif (4 == pixels_per_work_item)
-        {
-#ifndef DEPTH_5
-            const VECTOR16 r0 = vload16(0, src_ptr);
-
-            const int4 R = convert_int4(r0.s048c);
-            const int4 G = convert_int4(r0.s159d);
-            const int4 B = convert_int4(r0.s26ae);
-
-            const int4 xi = CV_DESCALE(R * coeffs[0] + G * coeffs[1] + B * coeffs[2], xyz_shift);
-            const int4 yi = CV_DESCALE(R * coeffs[3] + G * coeffs[4] + B * coeffs[5], xyz_shift);
-            const int4 zi = CV_DESCALE(R * coeffs[6] + G * coeffs[7] + B * coeffs[8], xyz_shift);
-
-            const VECTOR4 X = SAT_CAST4(xi);
-            const VECTOR4 Y = SAT_CAST4(yi);
-            const VECTOR4 Z = SAT_CAST4(zi);
-
-            vstore16((VECTOR16)(X.s0, Y.s0, Z.s0, 0, X.s1, Y.s1, Z.s1, 0, X.s2, Y.s2, Z.s2, 0, X.s3, Y.s3, Z.s3, 0), 0, dst_ptr);
-#endif
-        }
-#endif //pixels_per_work_item
-    }
-}
-
-__kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
-                      __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                      int src_offset, int dst_offset, __constant COEFF_TYPE * coeffs)
-{
-    int dx = get_global_id(0) * pixels_per_work_item;
-    int dy = get_global_id(1);
-
-    if (dy < rows && dx < cols)
-    {
-        dx <<= 2;
-        int src_idx = mad24(dy, src_step, src_offset + dx);
-        int dst_idx = mad24(dy, dst_step, dst_offset + dx);
-
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#if (1 == pixels_per_work_item)
-        {
-            const DATA_TYPE X = src_ptr[0], Y = src_ptr[1], Z = src_ptr[2];
-
-#ifdef DEPTH_5
-            float B = X * coeffs[0] + Y * coeffs[1] + Z * coeffs[2];
-            float G = X * coeffs[3] + Y * coeffs[4] + Z * coeffs[5];
-            float R = X * coeffs[6] + Y * coeffs[7] + Z * coeffs[8];
-#else
-            int B = CV_DESCALE(X * coeffs[0] + Y * coeffs[1] + Z * coeffs[2], xyz_shift);
-            int G = CV_DESCALE(X * coeffs[3] + Y * coeffs[4] + Z * coeffs[5], xyz_shift);
-            int R = CV_DESCALE(X * coeffs[6] + Y * coeffs[7] + Z * coeffs[8], xyz_shift);
-#endif
-
-            dst_ptr[0] = SAT_CAST( B );
-            dst_ptr[1] = SAT_CAST( G );
-            dst_ptr[2] = SAT_CAST( R );
-#if dcn == 4
-            dst_ptr[3] = MAX_NUM;
-#endif
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const VECTOR8 r0 = vload8(0, src_ptr);
-
-#ifdef DEPTH_5
-            const float2 X = r0.s04;
-            const float2 Y = r0.s15;
-            const float2 Z = r0.s26;
-
-            float2 B = X * coeffs[0] + Y * coeffs[1] + Z * coeffs[2];
-            float2 G = X * coeffs[3] + Y * coeffs[4] + Z * coeffs[5];
-            float2 R = X * coeffs[6] + Y * coeffs[7] + Z * coeffs[8];
-#else
-            const int2 xi = convert_int2(r0.s04);
-            const int2 yi = convert_int2(r0.s15);
-            const int2 zi = convert_int2(r0.s26);
-
-            const int2 bi = CV_DESCALE(xi * coeffs[0] + yi * coeffs[1] + zi * coeffs[2], xyz_shift);
-            const int2 gi = CV_DESCALE(xi * coeffs[3] + yi * coeffs[4] + zi * coeffs[5], xyz_shift);
-            const int2 ri = CV_DESCALE(xi * coeffs[6] + yi * coeffs[7] + zi * coeffs[8], xyz_shift);
-
-            const VECTOR2 R = SAT_CAST2(ri);
-            const VECTOR2 G = SAT_CAST2(gi);
-            const VECTOR2 B = SAT_CAST2(bi);
-#endif
-
-#if dcn == 4
-            vstore8((VECTOR8)(B.s0, G.s0, R.s0, MAX_NUM, B.s1, G.s1, R.s1, MAX_NUM), 0, dst_ptr);
-#else
-            vstore8((VECTOR8)(B.s0, G.s0, R.s0, 0, B.s1, G.s1, R.s1, 0), 0, dst_ptr);
-#endif
-        }
-#elif (4 == pixels_per_work_item)
-        {
-#ifndef DEPTH_5
-            const VECTOR16 r0 = vload16(0, src_ptr);
-
-            const int4 xi = convert_int4(r0.s048c);
-            const int4 yi = convert_int4(r0.s159d);
-            const int4 zi = convert_int4(r0.s26ae);
-
-            const int4 bi = CV_DESCALE(xi * coeffs[0] + yi * coeffs[1] + zi * coeffs[2], xyz_shift);
-            const int4 gi = CV_DESCALE(xi * coeffs[3] + yi * coeffs[4] + zi * coeffs[5], xyz_shift);
-            const int4 ri = CV_DESCALE(xi * coeffs[6] + yi * coeffs[7] + zi * coeffs[8], xyz_shift);
-
-            const VECTOR4 R = SAT_CAST4(ri);
-            const VECTOR4 G = SAT_CAST4(gi);
-            const VECTOR4 B = SAT_CAST4(bi);
-
-#if dcn == 4
-            vstore16((VECTOR16)(B.s0, G.s0, R.s0, MAX_NUM, B.s1, G.s1, R.s1, MAX_NUM, B.s2, G.s2, R.s2, MAX_NUM, B.s3, G.s3, R.s3, MAX_NUM), 0, dst_ptr);
-#else
-            vstore16((VECTOR16)(B.s0, G.s0, R.s0, 0, B.s1, G.s1, R.s1, 0, B.s2, G.s2, R.s2, 0, B.s3, G.s3, R.s3, 0), 0, dst_ptr);
-#endif
-#endif
-        }
-#endif // pixels_per_work_item
-    }
-}
-
-///////////////////////////////////// RGB[A] <-> BGR[A] //////////////////////////////////////
-
-__kernel void RGB(int cols, int rows, int src_step, int dst_step,
-                  __global const DATA_TYPE * src, __global DATA_TYPE * dst,
-                  int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-#ifndef INTEL_DEVICE
-#ifdef REVERSE
-        dst[dst_idx] = src[src_idx + 2];
-        dst[dst_idx + 1] = src[src_idx + 1];
-        dst[dst_idx + 2] = src[src_idx];
-#elif defined ORDER
-        dst[dst_idx] = src[src_idx];
-        dst[dst_idx + 1] = src[src_idx + 1];
-        dst[dst_idx + 2] = src[src_idx + 2];
-#endif
-
-#if dcn == 4
-#if scn == 3
-        dst[dst_idx + 3] = MAX_NUM;
-#else
-        dst[dst_idx + 3] = src[src_idx + 3];
-#endif
-#endif
-#else //INTEL_DEVICE
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-        const VECTOR4 r0 = vload4(0, src_ptr);
-#ifdef REVERSE
-        if (3 == dcn)
-        {
-            vstore4((VECTOR4)(r0.s210, 0), 0, dst_ptr);
-        }
-        else if (3 == scn)
-        {
-            vstore4((VECTOR4)(r0.s210, MAX_NUM), 0, dst_ptr);
-        }
-        else {
-            vstore4((VECTOR4)(r0.s2103), 0, dst_ptr);
-        }
-#elif defined ORDER
-        if (3 == dcn)
-        {
-            vstore4((VECTOR4)(r0.s012, 0), 0, dst_ptr);
-        }
-        else if (3 == scn)
-        {
-            vstore4((VECTOR4)(r0.s012, MAX_NUM), 0, dst_ptr);
-        }
-        else {
-            vstore4(r0, 0, dst_ptr);
-        }
-#endif
-#endif //INTEL_DEVICE
-    }
-}
-
-///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
-
-__kernel void RGB5x52RGB(int cols, int rows, int src_step, int dst_step,
-                         __global const ushort * src, __global uchar * dst,
-                         int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
-        ushort t = src[src_idx];
-
-#if greenbits == 6
-        dst[dst_idx + bidx] = (uchar)(t << 3);
-        dst[dst_idx + 1] = (uchar)((t >> 3) & ~3);
-        dst[dst_idx + (bidx^2)] = (uchar)((t >> 8) & ~7);
-#else
-        dst[dst_idx + bidx] = (uchar)(t << 3);
-        dst[dst_idx + 1] = (uchar)((t >> 2) & ~7);
-        dst[dst_idx + (bidx^2)] = (uchar)((t >> 7) & ~7);
-#endif
-
-#if dcn == 4
-#if greenbits == 6
-        dst[dst_idx + 3] = 255;
-#else
-        dst[dst_idx + 3] = t & 0x8000 ? 255 : 0;
-#endif
-#endif
-    }
-}
-
-__kernel void RGB2RGB5x5(int cols, int rows, int src_step, int dst_step,
-                         __global const uchar * src, __global ushort * dst,
-                         int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        int src_idx = mad24(y, src_step, src_offset + (x << 2));
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-#if greenbits == 6
-            dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~3) << 3)|((src[src_idx + (bidx^2)]&~7) << 8));
-#elif scn == 3
-            dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~7) << 2)|((src[src_idx + (bidx^2)]&~7) << 7));
-#else
-            dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~7) << 2)|
-                ((src[src_idx + (bidx^2)]&~7) << 7)|(src[src_idx + 3] ? 0x8000 : 0));
-#endif
-    }
-}
-
-///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
-
-__kernel void BGR5x52Gray(int cols, int rows, int src_step, int dst_step,
-                          __global const ushort * src, __global uchar * dst,
-                          int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-        int t = src[src_idx];
-
-#if greenbits == 6
-        dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
-                                         ((t >> 3) & 0xfc)*G2Y +
-                                         ((t >> 8) & 0xf8)*R2Y, yuv_shift);
-#else
-        dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
-                                         ((t >> 2) & 0xf8)*G2Y +
-                                         ((t >> 7) & 0xf8)*R2Y, yuv_shift);
-#endif
-    }
-}
-
-__kernel void Gray2BGR5x5(int cols, int rows, int src_step, int dst_step,
-                          __global const uchar * src, __global ushort * dst,
-                          int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-        int t = src[src_idx];
-
-#if greenbits == 6
-        dst[dst_idx] = (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
-#else
-        t >>= 3;
-        dst[dst_idx] = (ushort)(t|(t << 5)|(t << 10));
-#endif
-    }
-}
-
-///////////////////////////////////// RGB <-> HSV //////////////////////////////////////
-
-__constant int sector_data[][3] = { {1, 3, 0}, { 1, 0, 2 }, { 3, 0, 1 }, { 0, 2, 1 }, { 0, 1, 3 }, { 2, 1, 0 } };
-
-#ifdef DEPTH_0
-
-__kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step,
-                      __global const uchar * src, __global uchar * dst,
-                      int src_offset, int dst_offset,
-                      __constant int * sdiv_table, __constant int * hdiv_table)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        int b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
-        int h, s, v = b;
-        int vmin = b, diff;
-        int vr, vg;
-
-        v = max( v, g );
-        v = max( v, r );
-        vmin = min( vmin, g );
-        vmin = min( vmin, r );
-
-        diff = v - vmin;
-        vr = v == r ? -1 : 0;
-        vg = v == g ? -1 : 0;
-
-        s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
-        h = (vr & (g - b)) +
-            (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
-        h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
-        h += h < 0 ? hrange : 0;
-
-        dst[dst_idx] = convert_uchar_sat_rte(h);
-        dst[dst_idx + 1] = (uchar)s;
-        dst[dst_idx + 2] = (uchar)v;
-    }
-}
-
-__kernel void HSV2RGB(int cols, int rows, int src_step, int dst_step,
-                      __global const uchar * src, __global uchar * dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        float h = src[src_idx], s = src[src_idx + 1]*(1/255.f), v = src[src_idx + 2]*(1/255.f);
-        float b, g, r;
-
-        if (s != 0)
-        {
-            float tab[4];
-            int sector;
-            h *= hscale;
-            if( h < 0 )
-                do h += 6; while( h < 0 );
-            else if( h >= 6 )
-                do h -= 6; while( h >= 6 );
-            sector = convert_int_sat_rtn(h);
-            h -= sector;
-            if( (unsigned)sector >= 6u )
-            {
-                sector = 0;
-                h = 0.f;
-            }
-
-            tab[0] = v;
-            tab[1] = v*(1.f - s);
-            tab[2] = v*(1.f - s*h);
-            tab[3] = v*(1.f - s*(1.f - h));
-
-            b = tab[sector_data[sector][0]];
-            g = tab[sector_data[sector][1]];
-            r = tab[sector_data[sector][2]];
-        }
-        else
-            b = g = r = v;
-
-        dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
-        dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
-        dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
-#if dcn == 4
-        dst[dst_idx + 3] = MAX_NUM;
-#endif
-    }
-}
-
-#elif defined DEPTH_5
-
-__kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step,
-                      __global const float * src, __global float * dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        float b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
-        float h, s, v;
-
-        float vmin, diff;
-
-        v = vmin = r;
-        if( v < g ) v = g;
-        if( v < b ) v = b;
-        if( vmin > g ) vmin = g;
-        if( vmin > b ) vmin = b;
-
-        diff = v - vmin;
-        s = diff/(float)(fabs(v) + FLT_EPSILON);
-        diff = (float)(60./(diff + FLT_EPSILON));
-        if( v == r )
-            h = (g - b)*diff;
-        else if( v == g )
-            h = (b - r)*diff + 120.f;
-        else
-            h = (r - g)*diff + 240.f;
-
-        if( h < 0 ) h += 360.f;
-
-        dst[dst_idx] = h*hscale;
-        dst[dst_idx + 1] = s;
-        dst[dst_idx + 2] = v;
-    }
-}
-
-__kernel void HSV2RGB(int cols, int rows, int src_step, int dst_step,
-                      __global const float * src, __global float * dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        float h = src[src_idx], s = src[src_idx + 1], v = src[src_idx + 2];
-        float b, g, r;
-
-        if (s != 0)
-        {
-            float tab[4];
-            int sector;
-            h *= hscale;
-            if(h < 0)
-                do h += 6; while (h < 0);
-            else if (h >= 6)
-                do h -= 6; while (h >= 6);
-            sector = convert_int_sat_rtn(h);
-            h -= sector;
-            if ((unsigned)sector >= 6u)
-            {
-                sector = 0;
-                h = 0.f;
-            }
-
-            tab[0] = v;
-            tab[1] = v*(1.f - s);
-            tab[2] = v*(1.f - s*h);
-            tab[3] = v*(1.f - s*(1.f - h));
-
-            b = tab[sector_data[sector][0]];
-            g = tab[sector_data[sector][1]];
-            r = tab[sector_data[sector][2]];
-        }
-        else
-            b = g = r = v;
-
-        dst[dst_idx + bidx] = b;
-        dst[dst_idx + 1] = g;
-        dst[dst_idx + (bidx^2)] = r;
-#if dcn == 4
-        dst[dst_idx + 3] = MAX_NUM;
-#endif
-    }
-}
-
-#endif
-
-///////////////////////////////////// RGB <-> HLS //////////////////////////////////////
-
-#ifdef DEPTH_0
-
-__kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step,
-                      __global const uchar * src, __global uchar * dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        float b = src[src_idx + bidx]*(1/255.f), g = src[src_idx + 1]*(1/255.f), r = src[src_idx + (bidx^2)]*(1/255.f);
-        float h = 0.f, s = 0.f, l;
-        float vmin, vmax, diff;
-
-        vmax = vmin = r;
-        if (vmax < g) vmax = g;
-        if (vmax < b) vmax = b;
-        if (vmin > g) vmin = g;
-        if (vmin > b) vmin = b;
-
-        diff = vmax - vmin;
-        l = (vmax + vmin)*0.5f;
-
-        if (diff > FLT_EPSILON)
-        {
-            s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
-            diff = 60.f/diff;
-
-            if( vmax == r )
-                h = (g - b)*diff;
-            else if( vmax == g )
-                h = (b - r)*diff + 120.f;
-            else
-                h = (r - g)*diff + 240.f;
-
-            if( h < 0.f ) h += 360.f;
-        }
-
-        dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
-        dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
-        dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
-    }
-}
-
-__kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step,
-                      __global const uchar * src, __global uchar * dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        float h = src[src_idx], l = src[src_idx + 1]*(1.f/255.f), s = src[src_idx + 2]*(1.f/255.f);
-        float b, g, r;
-
-        if (s != 0)
-        {
-            float tab[4];
-
-            float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
-            float p1 = 2*l - p2;
-
-            h *= hscale;
-            if( h < 0 )
-                do h += 6; while( h < 0 );
-            else if( h >= 6 )
-                do h -= 6; while( h >= 6 );
-
-            int sector = convert_int_sat_rtn(h);
-            h -= sector;
-
-            tab[0] = p2;
-            tab[1] = p1;
-            tab[2] = p1 + (p2 - p1)*(1-h);
-            tab[3] = p1 + (p2 - p1)*h;
-
-            b = tab[sector_data[sector][0]];
-            g = tab[sector_data[sector][1]];
-            r = tab[sector_data[sector][2]];
-        }
-        else
-            b = g = r = l;
-
-        dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
-        dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
-        dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
-#if dcn == 4
-        dst[dst_idx + 3] = MAX_NUM;
-#endif
-    }
-}
-
-#elif defined DEPTH_5
-
-__kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step,
-                      __global const float * src, __global float * dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        float b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
-        float h = 0.f, s = 0.f, l;
-        float vmin, vmax, diff;
-
-        vmax = vmin = r;
-        if (vmax < g) vmax = g;
-        if (vmax < b) vmax = b;
-        if (vmin > g) vmin = g;
-        if (vmin > b) vmin = b;
-
-        diff = vmax - vmin;
-        l = (vmax + vmin)*0.5f;
-
-        if (diff > FLT_EPSILON)
-        {
-            s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
-            diff = 60.f/diff;
-
-            if( vmax == r )
-                h = (g - b)*diff;
-            else if( vmax == g )
-                h = (b - r)*diff + 120.f;
-            else
-                h = (r - g)*diff + 240.f;
-
-            if( h < 0.f ) h += 360.f;
-        }
-
-        dst[dst_idx] = h*hscale;
-        dst[dst_idx + 1] = l;
-        dst[dst_idx + 2] = s;
-    }
-}
-
-__kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step,
-                      __global const float * src, __global float * dst,
-                      int src_offset, int dst_offset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        float h = src[src_idx], l = src[src_idx + 1], s = src[src_idx + 2];
-        float b, g, r;
-
-        if (s != 0)
-        {
-            float tab[4];
-            int sector;
-
-            float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
-            float p1 = 2*l - p2;
-
-            h *= hscale;
-            if( h < 0 )
-                do h += 6; while( h < 0 );
-            else if( h >= 6 )
-                do h -= 6; while( h >= 6 );
-
-            sector = convert_int_sat_rtn(h);
-            h -= sector;
-
-            tab[0] = p2;
-            tab[1] = p1;
-            tab[2] = p1 + (p2 - p1)*(1-h);
-            tab[3] = p1 + (p2 - p1)*h;
-
-            b = tab[sector_data[sector][0]];
-            g = tab[sector_data[sector][1]];
-            r = tab[sector_data[sector][2]];
-        }
-        else
-            b = g = r = l;
-
-        dst[dst_idx + bidx] = b;
-        dst[dst_idx + 1] = g;
-        dst[dst_idx + (bidx^2)] = r;
-#if dcn == 4
-        dst[dst_idx + 3] = MAX_NUM;
-#endif
-    }
-}
-
-#endif
-
-/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
-
-#ifdef DEPTH_0
-
-__kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
-                        __global const uchar * src, __global uchar * dst,
-                        int src_offset, int dst_offset)
-{
-    int x = get_global_id(0) * pixels_per_work_item;
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#if (1 == pixels_per_work_item)
-        {
-            const uchar4 r0 = vload4(0, src_ptr);
-
-            dst_ptr[0] = (r0.s0 * r0.s3 + HALF_MAX) / MAX_NUM;
-            dst_ptr[1] = (r0.s1 * r0.s3 + HALF_MAX) / MAX_NUM;
-            dst_ptr[2] = (r0.s2 * r0.s3 + HALF_MAX) / MAX_NUM;
-            dst_ptr[3] = r0.s3;
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const uchar8 r0 = vload8(0, src_ptr);
-
-            const int2 v0 = convert_int2(r0.s04);
-            const int2 v1 = convert_int2(r0.s15);
-            const int2 v2 = convert_int2(r0.s26);
-            const int2 v3 = convert_int2(r0.s37);
-
-            const int2 ri = (v0 * v3 + HALF_MAX) / MAX_NUM;
-            const int2 gi = (v1 * v3 + HALF_MAX) / MAX_NUM;
-            const int2 bi = (v2 * v3 + HALF_MAX) / MAX_NUM;
-
-            const uchar2 r = convert_uchar2(ri);
-            const uchar2 g = convert_uchar2(gi);
-            const uchar2 b = convert_uchar2(bi);
-
-            vstore8((uchar8)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1), 0, dst_ptr);
-        }
-#elif (4 == pixels_per_work_item)
-        {
-            const uchar16 r0 = vload16(0, src_ptr);
-
-            const int4 v0 = convert_int4(r0.s048c);
-            const int4 v1 = convert_int4(r0.s159d);
-            const int4 v2 = convert_int4(r0.s26ae);
-            const int4 v3 = convert_int4(r0.s37bf);
-
-            const int4 ri = (v0 * v3 + HALF_MAX) / MAX_NUM;
-            const int4 gi = (v1 * v3 + HALF_MAX) / MAX_NUM;
-            const int4 bi = (v2 * v3 + HALF_MAX) / MAX_NUM;
-
-            const uchar4 r = convert_uchar4(ri);
-            const uchar4 g = convert_uchar4(gi);
-            const uchar4 b = convert_uchar4(bi);
-
-            vstore16((uchar16)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1, r.s2, g.s2, b.s2, v3.s2, r.s3, g.s3, b.s3, v3.s3), 0, dst_ptr);
-        }
-#endif // pixels_per_work_item
-    }
-}
-
-__kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
-                        __global const uchar * src, __global uchar * dst,
-                        int src_offset, int dst_offset)
-{
-    int x = get_global_id(0) * pixels_per_work_item;
-    int y = get_global_id(1);
-
-    if (y < rows && x < cols)
-    {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
-
-        global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
-        global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
-
-#if (1 == pixels_per_work_item)
-        {
-            const uchar4 r0 = vload4(0, src_ptr);
-            const uchar v3_half = r0.s3 / 2;
-
-            const uchar r = (r0.s3 == 0) ? 0 : (r0.s0 * MAX_NUM + v3_half) / r0.s3;
-            const uchar g = (r0.s3 == 0) ? 0 : (r0.s1 * MAX_NUM + v3_half) / r0.s3;
-            const uchar b = (r0.s3 == 0) ? 0 : (r0.s2 * MAX_NUM + v3_half) / r0.s3;
-
-            vstore4((uchar4)(r, g, b, r0.s3), 0, dst_ptr);
-        }
-#elif (2 == pixels_per_work_item)
-        {
-            const uchar8 r0 = vload8(0, src_ptr);
-
-            const int2 v0 = convert_int2(r0.s04);
-            const int2 v1 = convert_int2(r0.s15);
-            const int2 v2 = convert_int2(r0.s26);
-            const int2 v3 = convert_int2(r0.s37);
-            const int2 v3_half = v3 / 2;
-
-            const int2 ri = (v3 == 0) ? 0 : (v0 * MAX_NUM + v3_half) / v3;
-            const int2 gi = (v3 == 0) ? 0 : (v1 * MAX_NUM + v3_half) / v3;
-            const int2 bi = (v3 == 0) ? 0 : (v2 * MAX_NUM + v3_half) / v3;
-
-            const uchar2 r = convert_uchar2(ri);
-            const uchar2 g = convert_uchar2(gi);
-            const uchar2 b = convert_uchar2(bi);
-
-            vstore8((uchar8)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1), 0, dst_ptr);
-        }
-#elif (4 == pixels_per_work_item)
-        {
-            const uchar16 r0 = vload16(0, src_ptr);
-
-            const int4 v0 = convert_int4(r0.s048c);
-            const int4 v1 = convert_int4(r0.s159d);
-            const int4 v2 = convert_int4(r0.s26ae);
-            const int4 v3 = convert_int4(r0.s37bf);
-            const int4 v3_half = v3 / 2;
-
-
-            const int4 ri = (v3 == 0) ? 0 : (v0 * MAX_NUM + v3_half) / v3;
-            const int4 gi = (v3 == 0) ? 0 : (v1 * MAX_NUM + v3_half) / v3;
-            const int4 bi = (v3 == 0) ? 0 : (v2 * MAX_NUM + v3_half) / v3;
-
-            const uchar4 r = convert_uchar4(ri);
-            const uchar4 g = convert_uchar4(gi);
-            const uchar4 b = convert_uchar4(bi);
-
-            vstore16((uchar16)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1, r.s2, g.s2, b.s2, v3.s2, r.s3, g.s3, b.s3, v3.s3), 0, dst_ptr);
-        }
-#endif // pixels_per_work_item
-    }
-}
-
-#endif
diff --git a/modules/ocl/src/opencl/featdetect_fast.cl b/modules/ocl/src/opencl/featdetect_fast.cl
deleted file mode 100644
index 44d4f44..0000000
--- a/modules/ocl/src/opencl/featdetect_fast.cl
+++ /dev/null
@@ -1,1331 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Peter Andreas Entschev, peter@entschev.com
-//
-//M*/
-
-#define X_ROW 0
-#define Y_ROW 1
-#define RESPONSE_ROW 2
-#define ROWS_COUNT 3
-
-
-__constant uchar c_table[] = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-                               0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
-                               0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
-                               0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
-                               0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
-                               0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x80,
-                               0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
-                               0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
-                               0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
-                               0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
-                               0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
-                               0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
-                               0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
-                               0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
-                               0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
-                               0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
-                               0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
-                               0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
-                               0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
-                               0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0xc0,
-                               0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-                               0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-                               0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-                               0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-                               0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-                               0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-                               0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-                               0x80, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
-                               0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
-                               0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
-                               0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0xaa, 0xaa, 0xaa,
-                               0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
-                               0xaa, 0xaa, 0xaa, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff,
-                               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-                               0xff, 0xff };
-
-
-// 1 -> v > x + th
-// 2 -> v < x - th
-// 0 -> x - th <= v <= x + th
-__inline int diffType(const int v, const int x, const int th)
-{
-    const int diff = x - v;
-
-    return (int)(diff < -th) + ((int)(diff > th) << 1);
-}
-
-// For OpenCL 1.1 compatibility
-__inline int popCount(int x) {
-    x = x - ((x >> 1) & 0x55555555);
-    x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
-    x = (x + (x >> 4)) & 0x0F0F0F0F;
-    x = x + (x >> 8);
-    x = x + (x >> 16);
-
-    return x & 0x0000003F;
-}
-
-
-void calcMask(
-    const uint C[4],
-    const int v,
-    const int th,
-    int* mask1,
-    int* mask2)
-{
-    *mask1 = 0;
-    *mask2 = 0;
-
-    int d1, d2;
-
-
-
-    d1 = diffType(v, C[0] & 0xff, th);
-    d2 = diffType(v, C[2] & 0xff, th);
-
-    if ((d1 | d2) == 0)
-        return;
-
-    *mask1 |= (d1 & 1) << 0;
-    *mask2 |= ((d1 & 2) >> 1) << 0;
-
-    *mask1 |= (d2 & 1) << 8;
-    *mask2 |= ((d2 & 2) >> 1) << 8;
-
-
-
-    d1 = diffType(v, C[1] & 0xff, th);
-    d2 = diffType(v, C[3] & 0xff, th);
-
-    if ((d1 | d2) == 0)
-        return;
-
-    *mask1 |= (d1 & 1) << 4;
-    *mask2 |= ((d1 & 2) >> 1) << 4;
-
-    *mask1 |= (d2 & 1) << 12;
-    *mask2 |= ((d2 & 2) >> 1) << 12;
-
-
-
-    d1 = diffType(v, (C[0] >> (2 * 8)) & 0xff, th);
-    d2 = diffType(v, (C[2] >> (2 * 8)) & 0xff, th);
-
-    if ((d1 | d2) == 0)
-        return;
-
-    *mask1 |= (d1 & 1) << 2;
-    *mask2 |= ((d1 & 2) >> 1) << 2;
-
-    *mask1 |= (d2 & 1) << 10;
-    *mask2 |= ((d2 & 2) >> 1) << 10;
-
-
-
-    d1 = diffType(v, (C[1] >> (2 * 8)) & 0xff, th);
-    d2 = diffType(v, (C[3] >> (2 * 8)) & 0xff, th);
-
-    if ((d1 | d2) == 0)
-        return;
-
-    *mask1 |= (d1 & 1) << 6;
-    *mask2 |= ((d1 & 2) >> 1) << 6;
-
-    *mask1 |= (d2 & 1) << 14;
-    *mask2 |= ((d2 & 2) >> 1) << 14;
-
-
-
-    d1 = diffType(v, (C[0] >> (1 * 8)) & 0xff, th);
-    d2 = diffType(v, (C[2] >> (1 * 8)) & 0xff, th);
-
-    /*if ((d1 | d2) == 0)
-        return;*/
-
-    *mask1 |= (d1 & 1) << 1;
-    *mask2 |= ((d1 & 2) >> 1) << 1;
-
-    *mask1 |= (d2 & 1) << 9;
-    *mask2 |= ((d2 & 2) >> 1) << 9;
-
-
-
-    d1 = diffType(v, (C[0] >> (3 * 8)) & 0xff, th);
-    d2 = diffType(v, (C[2] >> (3 * 8)) & 0xff, th);
-
-    /*if ((d1 | d2) == 0)
-        return;*/
-
-    *mask1 |= (d1 & 1) << 3;
-    *mask2 |= ((d1 & 2) >> 1) << 3;
-
-    *mask1 |= (d2 & 1) << 11;
-    *mask2 |= ((d2 & 2) >> 1) << 11;
-
-
-
-    d1 = diffType(v, (C[1] >> (1 * 8)) & 0xff, th);
-    d2 = diffType(v, (C[3] >> (1 * 8)) & 0xff, th);
-
-    /*if ((d1 | d2) == 0)
-        return;*/
-
-    *mask1 |= (d1 & 1) << 5;
-    *mask2 |= ((d1 & 2) >> 1) << 5;
-
-    *mask1 |= (d2 & 1) << 13;
-    *mask2 |= ((d2 & 2) >> 1) << 13;
-
-
-
-    d1 = diffType(v, (C[1] >> (3 * 8)) & 0xff, th);
-    d2 = diffType(v, (C[3] >> (3 * 8)) & 0xff, th);
-
-    *mask1 |= (d1 & 1) << 7;
-    *mask2 |= ((d1 & 2) >> 1) << 7;
-
-    *mask1 |= (d2 & 1) << 15;
-    *mask2 |= ((d2 & 2) >> 1) << 15;
-}
-
-// 1 -> v > x + th
-// 2 -> v < x - th
-// 0 -> not a keypoint
-__inline bool isKeyPoint(int mask1, int mask2)
-{
-    // TODO: Use OpenCL's popcount() function if OpenCL version >= 1.2
-    return (popCount(mask1) > 8 && (c_table[(mask1 >> 3) - 63] & (1 << (mask1 & 7)))) ||
-           (popCount(mask2) > 8 && (c_table[(mask2 >> 3) - 63] & (1 << (mask2 & 7))));
-}
-
-int cornerScore(const uint C[4], const int v, const int threshold)
-{
-    // binary search in [threshold + 1, 255]
-
-    int min = threshold + 1;
-    int max = 255;
-
-    while (min <= max)
-    {
-        const int mid = (min + max) >> 1;
-
-        int mask1 = 0;
-        int mask2 = 0;
-
-        calcMask(C, v, mid, &mask1, &mask2);
-
-        int isKp = (int)isKeyPoint(mask1, mask2);
-
-        min = isKp * (mid + 1) + (isKp ^ 1) * min;
-        max = (isKp ^ 1) * (mid - 1) + isKp * max;
-    }
-
-    return min - 1;
-}
-
-///////////////////////////////////////////////////////////////////////////
-// calcKeypoints
-
-__kernel
-void calcKeypointsWithMask(
-    __global const uchar* img,
-    __global const uchar* mask,
-    __global int* kpLoc,
-    __global uint* score,
-    __global int* counter,
-    const int calcScore,
-    const unsigned int maxKeypoints,
-    const int threshold,
-    const int c_img_step,
-    const int c_img_rows,
-    const int c_img_cols,
-    const int c_mask_step,
-    const int c_kploc_step,
-    const int c_score_step)
-{
-    const int j = get_global_id(0) + 3;
-    const int i = get_global_id(1) + 3;
-
-    if (i < c_img_rows - 3 && j < c_img_cols - 3 && mask[i * c_mask_step + j])
-    {
-        int v;
-        uint C[4] = {0,0,0,0};
-
-        C[2] |= (uint)img[(i - 3) * c_img_step + j - 1] << 8;
-        C[2] |= (uint)img[(i - 3) * c_img_step + j];
-        C[1] |= (uint)img[(i - 3) * c_img_step + j + 1] << (3 * 8);
-
-        C[2] |= (uint)img[(i - 2) * c_img_step + j - 2] << (2 * 8);
-        C[1] |= (uint)img[(i - 2) * c_img_step + j + 2] << (2 * 8);
-
-        C[2] |= (uint)img[(i - 1) * c_img_step + j - 3] << (3 * 8);
-        C[1] |= (uint)img[(i - 1) * c_img_step + j + 3] << 8;
-
-        C[3] |= (uint)img[i * c_img_step + j - 3];
-        v     = (int) img[i * c_img_step + j];
-        C[1] |= (uint)img[i * c_img_step + j + 3];
-
-        int d1 = diffType(v, C[1] & 0xff, threshold);
-        int d2 = diffType(v, C[3] & 0xff, threshold);
-
-        if ((d1 | d2) == 0)
-            return;
-
-        C[3] |= (uint)img[(i + 1) * c_img_step + j - 3] << 8;
-        C[0] |= (uint)img[(i + 1) * c_img_step + j + 3] << (3 * 8);
-
-        C[3] |= (uint)img[(i + 2) * c_img_step + j - 2] << (2 * 8);
-        C[0] |= (uint)img[(i + 2) * c_img_step + j + 2] << (2 * 8);
-
-        C[3] |= (uint)img[(i + 3) * c_img_step + j - 1] << (3 * 8);
-        C[0] |= (uint)img[(i + 3) * c_img_step + j];
-        C[0] |= (uint)img[(i + 3) * c_img_step + j + 1] << 8;
-
-        int mask1 = 0;
-        int mask2 = 0;
-
-        calcMask(C, v, threshold, &mask1, &mask2);
-
-        if (isKeyPoint(mask1, mask2))
-        {
-            if (calcScore) score[i * c_score_step + j] = cornerScore(C, v, threshold);
-
-            uint idx = atomic_inc(counter);
-
-            if (idx < maxKeypoints)
-            {
-                kpLoc[X_ROW * c_kploc_step + idx] = j;
-                kpLoc[Y_ROW * c_kploc_step + idx] = i;
-            }
-        }
-    }
-}
-
-__kernel
-void calcKeypoints(
-    __global const uchar* img,
-    __global int* kpLoc,
-    __global uint* score,
-    __global int* counter,
-    const int calcScore,
-    const unsigned int maxKeypoints,
-    const int threshold,
-    const int c_img_step,
-    const int c_img_rows,
-    const int c_img_cols,
-    const int c_kploc_step,
-    const int c_score_step)
-{
-    const int j = get_global_id(0) + 3;
-    const int i = get_global_id(1) + 3;
-
-    if (i < c_img_rows - 3 && j < c_img_cols - 3)
-    {
-        int v;
-        uint C[4] = {0,0,0,0};
-
-        C[2] |= (uint)img[(i - 3) * c_img_step + j - 1] << 8;
-        C[2] |= (uint)img[(i - 3) * c_img_step + j];
-        C[1] |= (uint)img[(i - 3) * c_img_step + j + 1] << (3 * 8);
-
-        C[2] |= (uint)img[(i - 2) * c_img_step + j - 2] << (2 * 8);
-        C[1] |= (uint)img[(i - 2) * c_img_step + j + 2] << (2 * 8);
-
-        C[2] |= (uint)img[(i - 1) * c_img_step + j - 3] << (3 * 8);
-        C[1] |= (uint)img[(i - 1) * c_img_step + j + 3] << 8;
-
-        C[3] |= (uint)img[i * c_img_step + j - 3];
-        v     = (int) img[i * c_img_step + j];
-        C[1] |= (uint)img[i * c_img_step + j + 3];
-
-        int d1 = diffType(v, C[1] & 0xff, threshold);
-        int d2 = diffType(v, C[3] & 0xff, threshold);
-
-        if ((d1 | d2) == 0)
-            return;
-
-        C[3] |= (uint)img[(i + 1) * c_img_step + j - 3] << 8;
-        C[0] |= (uint)img[(i + 1) * c_img_step + j + 3] << (3 * 8);
-
-        C[3] |= (uint)img[(i + 2) * c_img_step + j - 2] << (2 * 8);
-        C[0] |= (uint)img[(i + 2) * c_img_step + j + 2] << (2 * 8);
-
-        C[3] |= (uint)img[(i + 3) * c_img_step + j - 1] << (3 * 8);
-        C[0] |= (uint)img[(i + 3) * c_img_step + j];
-        C[0] |= (uint)img[(i + 3) * c_img_step + j + 1] << 8;
-
-        int mask1 = 0;
-        int mask2 = 0;
-
-        calcMask(C, v, threshold, &mask1, &mask2);
-
-        if (isKeyPoint(mask1, mask2))
-        {
-            if (calcScore) score[i * c_score_step + j] = cornerScore(C, v, threshold);
-
-            uint idx = atomic_inc(counter);
-
-            if (idx < maxKeypoints)
-            {
-                kpLoc[X_ROW * c_kploc_step + idx] = j;
-                kpLoc[Y_ROW * c_kploc_step + idx] = i;
-            }
-        }
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////
-// nonmaxSupression
-
-__kernel
-void nonmaxSupression(
-    __global const int* kpLoc,
-    __global const uint* score,
-    __global float* keypoints,
-    __global int* new_counter,
-    const int counter,
-    const int c_kploc_step,
-    const int c_score_step,
-    const int c_keypoints_step)
-{
-    const int i = get_global_id(0);
-
-    if (i < counter)
-    {
-        int loc_x = kpLoc[X_ROW * c_kploc_step + i];
-        int loc_y = kpLoc[Y_ROW * c_kploc_step + i];
-
-        int s = score[loc_y * c_score_step + loc_x];
-
-        bool ismax =
-            s > score[(loc_y - 1) * c_score_step + loc_x - 1] &&
-            s > score[(loc_y - 1) * c_score_step + loc_x    ] &&
-            s > score[(loc_y - 1) * c_score_step + loc_x + 1] &&
-
-            s > score[loc_y       * c_score_step + loc_x - 1] &&
-            s > score[loc_y       * c_score_step + loc_x + 1] &&
-
-            s > score[(loc_y + 1) * c_score_step + loc_x - 1] &&
-            s > score[(loc_y + 1) * c_score_step + loc_x    ] &&
-            s > score[(loc_y + 1) * c_score_step + loc_x + 1];
-
-        if (ismax)
-        {
-            uint idx = atomic_inc(new_counter);
-
-            keypoints[X_ROW * c_keypoints_step + idx] = (float)loc_x;
-            keypoints[Y_ROW * c_keypoints_step + idx] = (float)loc_y;
-            keypoints[RESPONSE_ROW * c_keypoints_step + idx] = (float)s;
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/filter_sep_col.cl b/modules/ocl/src/opencl/filter_sep_col.cl
deleted file mode 100644
index 0d1998c..0000000
--- a/modules/ocl/src/opencl/filter_sep_col.cl
+++ /dev/null
@@ -1,118 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
-#define RADIUS 1
-#if CN ==1
-#define ALIGN (((RADIUS)+3)>>2<<2)
-#elif CN==2
-#define ALIGN (((RADIUS)+1)>>1<<1)
-#elif CN==3
-#define ALIGN (((RADIUS)+3)>>2<<2)
-#elif CN==4
-#define ALIGN (RADIUS)
-#define READ_TIMES_ROW ((2*(RADIUS+LSIZE0)-1)/LSIZE0)
-#endif
-
-/**********************************************************************************
-These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
-Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
-kernel must be in the center. ROI is not supported either.
-Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed
-from LDS to calculate the result.
-The length of the convovle kernel supported is only related to the MAX size of LDS,
-which is HW related.
-Niko
-6/29/2011
-The info above maybe obsolete.
-***********************************************************************************/
-
-
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
-                        (__global const GENTYPE_SRC * restrict src,
-                         __global GENTYPE_DST * dst,
-                         const int dst_cols,
-                         const int dst_rows,
-                         const int src_whole_cols,
-                         const int src_whole_rows,
-                         const int src_step_in_pixel,
-                         //const int src_offset_x,
-                         //const int src_offset_y,
-                         const int dst_step_in_pixel,
-                         const int dst_offset_in_pixel,
-                         __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSY+1)))))
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    int l_x = get_local_id(0);
-    int l_y = get_local_id(1);
-
-    int start_addr = mad24(y, src_step_in_pixel, x);
-    int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
-
-    int i;
-    GENTYPE_SRC sum, temp[READ_TIMES_COL];
-    __local GENTYPE_SRC LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
-
-    //read pixels from src
-    for(i = 0;i<READ_TIMES_COL;i++)
-    {
-        int current_addr = start_addr+i*LSIZE1*src_step_in_pixel;
-        current_addr = current_addr < end_addr ? current_addr : 0;
-        temp[i] = src[current_addr];
-    }
-    //save pixels to lds
-    for(i = 0;i<READ_TIMES_COL;i++)
-    {
-        LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    //read pixels from lds and calculate the result
-    sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY];
-    for(i=1;i<=RADIUSY;i++)
-    {
-        temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x];
-        temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x];
-        sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i];
-    }
-    //write the result to dst
-    if((x<dst_cols) & (y<dst_rows))
-    {
-        start_addr = mad24(y,dst_step_in_pixel,x+dst_offset_in_pixel);
-        dst[start_addr] = convert_to_DST(sum);
-    }
-}
diff --git a/modules/ocl/src/opencl/filter_sep_row.cl b/modules/ocl/src/opencl/filter_sep_row.cl
deleted file mode 100644
index d745409..0000000
--- a/modules/ocl/src/opencl/filter_sep_row.cl
+++ /dev/null
@@ -1,457 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#define READ_TIMES_ROW ((2*(RADIUSX+LSIZE0)-1)/LSIZE0) //for c4 only
-#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
-//#pragma OPENCL EXTENSION cl_amd_printf : enable
-#define RADIUS 1
-#if CN ==1
-#define ALIGN (((RADIUS)+3)>>2<<2)
-#elif CN==2
-#define ALIGN (((RADIUS)+1)>>1<<1)
-#elif CN==3
-#define ALIGN (((RADIUS)+3)>>2<<2)
-#elif CN==4
-#define ALIGN (RADIUS)
-#endif
-
-#ifdef BORDER_CONSTANT
-#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        x = max(min(x, maxV - 1), 0); \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        if (x < 0) \
-            x -= ((x - maxV + 1) / maxV) * maxV; \
-        if (x >= maxV) \
-            x %= maxV; \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
-#define EXTRAPOLATE_(x, maxV, delta) \
-    { \
-        if (maxV == 1) \
-            x = 0; \
-        else \
-            do \
-            { \
-                if ( x < 0 ) \
-                    x = -x - 1 + delta; \
-                else \
-                    x = maxV - 1 - (x - maxV) - delta; \
-            } \
-            while (x >= maxV || x < 0); \
-    }
-#ifdef BORDER_REFLECT
-#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0)
-#else
-#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1)
-#endif
-#else
-#error No extrapolation method
-#endif
-
-/**********************************************************************************
-These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
-Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
-kernel must be in the center. ROI is not supported either.
-For channels =1,2,4, each kernels read 4 elements(not 4 pixels), and for channels =3,
-the kernel read 4 pixels, save them to LDS and read the data needed from LDS to
-calculate the result.
-The length of the convovle kernel supported is related to the LSIZE0 and the MAX size
-of LDS, which is HW related.
-For channels = 1,3 the RADIUS is no more than LSIZE0*2
-For channels = 2, the RADIUS is no more than LSIZE0
-For channels = 4, arbitary RADIUS is supported unless the LDS is not enough
-Niko
-6/29/2011
-The info above maybe obsolete.
-***********************************************************************************/
-
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0
-    (__global uchar * restrict src,
-     __global float * dst,
-     int dst_cols, int dst_rows,
-     int src_whole_cols, int src_whole_rows,
-     int src_step_in_pixel,
-     int src_offset_x, int src_offset_y,
-     int dst_step_in_pixel, int radiusy,
-     __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
-{
-    int x = get_global_id(0)<<2;
-    int y = get_global_id(1);
-    int l_x = get_local_id(0);
-    int l_y = get_local_id(1);
-
-    int start_x = x+src_offset_x - RADIUSX & 0xfffffffc;
-    int offset = src_offset_x - RADIUSX & 3;
-    int start_y = y + src_offset_y - radiusy;
-    int start_addr = mad24(start_y, src_step_in_pixel, start_x);
-    int i;
-    float4 sum;
-    uchar4 temp[READ_TIMES_ROW];
-
-    __local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
-#ifdef BORDER_CONSTANT
-    int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
-
-    // read pixels from src
-    for (i = 0; i < READ_TIMES_ROW; i++)
-    {
-        int current_addr = start_addr+i*LSIZE0*4;
-        current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
-        temp[i] = *(__global uchar4*)&src[current_addr];
-    }
-
-    // judge if read out of boundary
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        temp[i].x = ELEM(start_x+i*LSIZE0*4,0,src_whole_cols,0,temp[i].x);
-        temp[i].y = ELEM(start_x+i*LSIZE0*4+1,0,src_whole_cols,0,temp[i].y);
-        temp[i].z = ELEM(start_x+i*LSIZE0*4+2,0,src_whole_cols,0,temp[i].z);
-        temp[i].w = ELEM(start_x+i*LSIZE0*4+3,0,src_whole_cols,0,temp[i].w);
-        temp[i]   = ELEM(start_y,0,src_whole_rows,(uchar4)0,temp[i]);
-    }
-#else
-    int not_all_in_range = (start_x<0) | (start_x + READ_TIMES_ROW*LSIZE0*4+4>src_whole_cols)| (start_y<0) | (start_y >= src_whole_rows);
-    int4 index[READ_TIMES_ROW];
-    int4 addr;
-    int s_y;
-
-    if (not_all_in_range)
-    {
-        // judge if read out of boundary
-        for (i = 0; i < READ_TIMES_ROW; i++)
-        {
-            index[i] = (int4)(start_x+i*LSIZE0*4) + (int4)(0, 1, 2, 3);
-            EXTRAPOLATE(index[i].x, src_whole_cols);
-            EXTRAPOLATE(index[i].y, src_whole_cols);
-            EXTRAPOLATE(index[i].z, src_whole_cols);
-            EXTRAPOLATE(index[i].w, src_whole_cols);
-        }
-        s_y = start_y;
-        EXTRAPOLATE(s_y, src_whole_rows);
-
-        // read pixels from src
-        for (i = 0; i<READ_TIMES_ROW; i++)
-        {
-            addr = mad24((int4)s_y,(int4)src_step_in_pixel,index[i]);
-            temp[i].x = src[addr.x];
-            temp[i].y = src[addr.y];
-            temp[i].z = src[addr.z];
-            temp[i].w = src[addr.w];
-        }
-    }
-    else
-    {
-        // read pixels from src
-        for (i = 0; i<READ_TIMES_ROW; i++)
-            temp[i] = *(__global uchar4*)&src[start_addr+i*LSIZE0*4];
-    }
-#endif
-
-    // save pixels to lds
-    for (i = 0; i<READ_TIMES_ROW; i++)
-        LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // read pixels from lds and calculate the result
-    sum =convert_float4(vload4(0,(__local uchar*)&LDS_DAT[l_y][l_x]+RADIUSX+offset))*mat_kernel[RADIUSX];
-    for (i=1; i<=RADIUSX; i++)
-    {
-        temp[0] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset - i);
-        temp[1] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset + i);
-        sum += convert_float4(temp[0]) * mat_kernel[RADIUSX-i] + convert_float4(temp[1]) * mat_kernel[RADIUSX+i];
-    }
-
-    start_addr = mad24(y,dst_step_in_pixel,x);
-
-    // write the result to dst
-    if ((x+3<dst_cols) & (y<dst_rows))
-        *(__global float4*)&dst[start_addr] = sum;
-    else if ((x+2<dst_cols) && (y<dst_rows))
-    {
-        dst[start_addr] = sum.x;
-        dst[start_addr+1] = sum.y;
-        dst[start_addr+2] = sum.z;
-    }
-    else if ((x+1<dst_cols) && (y<dst_rows))
-    {
-        dst[start_addr] = sum.x;
-        dst[start_addr+1] = sum.y;
-    }
-    else if (x<dst_cols && y<dst_rows)
-        dst[start_addr] = sum.x;
-}
-
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C4_D0
-    (__global uchar4 * restrict src,
-     __global float4 * dst,
-     int dst_cols, int dst_rows,
-     int src_whole_cols, int src_whole_rows,
-     int src_step_in_pixel,
-     int src_offset_x, int src_offset_y,
-     int dst_step_in_pixel, int radiusy,
-     __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    int l_x = get_local_id(0);
-    int l_y = get_local_id(1);
-    int start_x = x+src_offset_x-RADIUSX;
-    int start_y = y+src_offset_y-radiusy;
-    int start_addr = mad24(start_y,src_step_in_pixel,start_x);
-    int i;
-    float4 sum;
-    uchar4 temp[READ_TIMES_ROW];
-
-    __local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
-#ifdef BORDER_CONSTANT
-    int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
-
-    // read pixels from src
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        int current_addr = start_addr+i*LSIZE0;
-        current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
-        temp[i] = src[current_addr];
-    }
-
-    //judge if read out of boundary
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        temp[i]= ELEM(start_x+i*LSIZE0,0,src_whole_cols,(uchar4)0,temp[i]);
-        temp[i]= ELEM(start_y,0,src_whole_rows,(uchar4)0,temp[i]);
-    }
-#else
-    int index[READ_TIMES_ROW];
-    int s_x,s_y;
-
-    // judge if read out of boundary
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        s_x = start_x+i*LSIZE0;
-        EXTRAPOLATE(s_x, src_whole_cols);
-        s_y = start_y;
-        EXTRAPOLATE(s_y, src_whole_rows);
-        index[i]=mad24(s_y,src_step_in_pixel,s_x);
-    }
-
-    //read pixels from src
-    for (i = 0; i<READ_TIMES_ROW; i++)
-        temp[i] = src[index[i]];
-#endif
-
-    //save pixels to lds
-    for (i = 0; i<READ_TIMES_ROW; i++)
-        LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //read pixels from lds and calculate the result
-    sum =convert_float4(LDS_DAT[l_y][l_x+RADIUSX])*mat_kernel[RADIUSX];
-    for (i=1; i<=RADIUSX; i++)
-    {
-        temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
-        temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
-        sum += convert_float4(temp[0])*mat_kernel[RADIUSX-i]+convert_float4(temp[1])*mat_kernel[RADIUSX+i];
-    }
-    //write the result to dst
-    if (x<dst_cols && y<dst_rows)
-    {
-        start_addr = mad24(y,dst_step_in_pixel,x);
-        dst[start_addr] = sum;
-    }
-}
-
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D5
-    (__global float * restrict src,
-     __global float * dst,
-     int dst_cols, int dst_rows,
-     int src_whole_cols, int src_whole_rows,
-     int src_step_in_pixel,
-     int src_offset_x, int src_offset_y,
-     int dst_step_in_pixel, int radiusy,
-     __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    int l_x = get_local_id(0);
-    int l_y = get_local_id(1);
-    int start_x = x+src_offset_x-RADIUSX;
-    int start_y = y+src_offset_y-radiusy;
-    int start_addr = mad24(start_y,src_step_in_pixel,start_x);
-    int i;
-    float sum;
-    float temp[READ_TIMES_ROW];
-
-    __local float LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
-#ifdef BORDER_CONSTANT
-    int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
-
-    // read pixels from src
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        int current_addr = start_addr+i*LSIZE0;
-        current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
-        temp[i] = src[current_addr];
-    }
-
-    // judge if read out of boundary
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        temp[i]= ELEM(start_x+i*LSIZE0,0,src_whole_cols,(float)0,temp[i]);
-        temp[i]= ELEM(start_y,0,src_whole_rows,(float)0,temp[i]);
-    }
-#else
-    int index[READ_TIMES_ROW];
-    int s_x,s_y;
-    // judge if read out of boundary
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        s_x = start_x + i*LSIZE0, s_y = start_y;
-        EXTRAPOLATE(s_x, src_whole_cols);
-        EXTRAPOLATE(s_y, src_whole_rows);
-
-        index[i]=mad24(s_y, src_step_in_pixel, s_x);
-    }
-    // read pixels from src
-    for (i = 0; i<READ_TIMES_ROW; i++)
-        temp[i] = src[index[i]];
-#endif
-
-    //save pixels to lds
-    for (i = 0; i<READ_TIMES_ROW; i++)
-        LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // read pixels from lds and calculate the result
-    sum =LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX];
-    for (i=1; i<=RADIUSX; i++)
-    {
-        temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
-        temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
-        sum += temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i];
-    }
-
-    // write the result to dst
-    if (x<dst_cols && y<dst_rows)
-    {
-        start_addr = mad24(y,dst_step_in_pixel,x);
-        dst[start_addr] = sum;
-    }
-}
-
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C4_D5
-    (__global float4 * restrict src,
-     __global float4 * dst,
-     int dst_cols, int dst_rows,
-     int src_whole_cols, int src_whole_rows,
-     int src_step_in_pixel,
-     int src_offset_x, int src_offset_y,
-     int dst_step_in_pixel, int radiusy,
-     __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    int l_x = get_local_id(0);
-    int l_y = get_local_id(1);
-    int start_x = x+src_offset_x-RADIUSX;
-    int start_y = y+src_offset_y-radiusy;
-    int start_addr = mad24(start_y,src_step_in_pixel,start_x);
-    int i;
-    float4 sum;
-    float4 temp[READ_TIMES_ROW];
-
-    __local float4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
-#ifdef BORDER_CONSTANT
-    int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
-
-    // read pixels from src
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        int current_addr = start_addr+i*LSIZE0;
-        current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
-        temp[i] = src[current_addr];
-    }
-
-    // judge if read out of boundary
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        temp[i]= ELEM(start_x+i*LSIZE0,0,src_whole_cols,(float4)0,temp[i]);
-        temp[i]= ELEM(start_y,0,src_whole_rows,(float4)0,temp[i]);
-    }
-#else
-    int index[READ_TIMES_ROW];
-    int s_x,s_y;
-
-    // judge if read out of boundary
-    for (i = 0; i<READ_TIMES_ROW; i++)
-    {
-        s_x = start_x + i*LSIZE0, s_y = start_y;
-        EXTRAPOLATE(s_x, src_whole_cols);
-        EXTRAPOLATE(s_y, src_whole_rows);
-
-        index[i]=mad24(s_y,src_step_in_pixel,s_x);
-    }
-    // read pixels from src
-    for (i = 0; i<READ_TIMES_ROW; i++)
-        temp[i] = src[index[i]];
-#endif
-
-    // save pixels to lds
-    for (i = 0; i<READ_TIMES_ROW; i++)
-        LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // read pixels from lds and calculate the result
-    sum =LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX];
-    for (i=1; i<=RADIUSX; i++)
-    {
-        temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
-        temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
-        sum += temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i];
-    }
-
-    // write the result to dst
-    if (x<dst_cols && y<dst_rows)
-    {
-        start_addr = mad24(y,dst_step_in_pixel,x);
-        dst[start_addr] = sum;
-    }
-}
diff --git a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
deleted file mode 100644
index 81b2961..0000000
--- a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
+++ /dev/null
@@ -1,429 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Harris Gasparakis, harris.gasparakis@amd.com
-//    Xiaopeng Fu, fuxiaopeng2222@163.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef BORDER_CONSTANT
-#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        x = max(min(x, maxV - 1), 0); \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        if (x < 0) \
-            x -= ((x - maxV + 1) / maxV) * maxV; \
-        if (x >= maxV) \
-            x %= maxV; \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
-#define EXTRAPOLATE_(x, maxV, delta) \
-    { \
-        if (maxV == 1) \
-            x = 0; \
-        else \
-            do \
-            { \
-                if ( x < 0 ) \
-                    x = -x - 1 + delta; \
-                else \
-                    x = maxV - 1 - (x - maxV) - delta; \
-            } \
-            while (x >= maxV || x < 0); \
-    }
-#ifdef BORDER_REFLECT
-#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0)
-#else
-#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1)
-#endif
-#else
-#error No extrapolation method
-#endif
-
-__kernel void
-adaptiveBilateralFilter_C4_D0(
-    __global const uchar4 * restrict src,
-    __global uchar4 *dst,
-    float alpha,
-    int src_offset,
-    int src_whole_rows,
-    int src_whole_cols,
-    int src_step,
-    int dst_offset,
-    int dst_rows,
-    int dst_cols,
-    int dst_step,
-    __global const float* lut,
-    int lut_step)
-{
-    int col = get_local_id(0);
-    const int gX = get_group_id(0);
-    const int gY = get_group_id(1);
-
-    int src_x_off = (src_offset % src_step) >> 2;
-    int src_y_off = src_offset / src_step;
-    int dst_x_off = (dst_offset % dst_step) >> 2;
-    int dst_y_off = dst_offset / dst_step;
-
-    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
-    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
-
-    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
-    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
-
-    int posX = dst_startX - dst_x_off + col;
-    int posY = (gY * (1+EXTRA))	;
-
-    __local uchar4 data[ksY+EXTRA][THREADS];
-
-    float4 tmp_sum[1+EXTRA];
-    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
-        tmp_sum[tmpint] = (float4)(0,0,0,0);
-
-#ifdef BORDER_CONSTANT
-    bool con;
-    uchar4 ss;
-    for(int j = 0;	j < ksY+EXTRA; j++)
-    {
-        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
-        int cur_col = clamp(startX + col, 0, src_whole_cols);
-        if (con)
-            ss = src[(startY+j)*(src_step>>2) + cur_col];
-
-        data[j][col] = con ? ss : (uchar4)0;
-    }
-#else
-    for(int j= 0; j < ksY+EXTRA; j++)
-    {
-        int selected_row = startY+j, selected_col = startX+col;
-        EXTRAPOLATE(selected_row, src_whole_rows)
-        EXTRAPOLATE(selected_col, src_whole_cols)
-
-        data[j][col] = src[selected_row * (src_step>>2) + selected_col];
-    }
-#endif
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    float4 var[1+EXTRA];
-
-#if VAR_PER_CHANNEL
-    float4 weight;
-    float4 totalWeight = (float4)(0,0,0,0);
-#else
-    float weight;
-    float totalWeight = 0;
-#endif
-
-    int4 currValCenter;
-    int4 currWRTCenter;
-
-    int4 sumVal = 0;
-    int4 sumValSqr = 0;
-
-    if(col < (THREADS-(ksX-1)))
-    {
-        int4 currVal;
-        int howManyAll = (2*anX+1)*(ksY);
-
-        //find variance of all data
-        int startLMj;
-        int endLMj ;
-        // Top row: don't sum the very last element
-        for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
-        {
-#if CALCVAR
-            startLMj = extraCnt;
-            endLMj =  ksY+extraCnt-1;
-            sumVal = (int4)0;
-            sumValSqr= (int4)0;
-            for(int j = startLMj; j < endLMj; j++)
-                for(int i=-anX; i<=anX; i++)
-                {
-                    currVal = convert_int4(data[j][col+anX+i]);
-
-                    sumVal += currVal;
-                    sumValSqr += mul24(currVal, currVal);
-                }
-
-            var[extraCnt] = clamp( convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ), (float4)(0.1f, 0.1f, 0.1f, 0.1f), (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL)) ;
-
-#else
-            var[extraCnt] = (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL);
-#endif
-        }
-
-        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
-        {
-
-            // top row: include the very first element, even on first time
-            startLMj = extraCnt;
-            // go all the way, unless this is the last local mem chunk,
-            // then stay within limits - 1
-            endLMj =  extraCnt + ksY;
-
-            // Top row: don't sum the very last element
-            currValCenter = convert_int4( data[ (startLMj + endLMj)/2][col+anX] );
-
-            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
-            {
-                for(int i=-anX; i<=anX; i++)
-                {
-#if FIXED_WEIGHT
-#if VAR_PER_CHANNEL
-                    weight.x = 1.0f;
-                    weight.y = 1.0f;
-                    weight.z = 1.0f;
-                    weight.w = 1.0f;
-#else
-                    weight = 1.0f;
-#endif
-#else // !FIXED_WEIGHT
-                    currVal = convert_int4(data[j][col+anX+i]);
-                    currWRTCenter = currVal-currValCenter;
-
-#if ABF_GAUSSIAN
-
-#if VAR_PER_CHANNEL
-                    weight = exp( (float4)(-0.5f, -0.5f, -0.5f, -0.5f) * convert_float4(currWRTCenter * currWRTCenter) / var[extraCnt] )*
-                        (float4)(lut[lut_j*lut_step+anX+i]);
-#else
-                    weight = exp( -0.5f * (mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
-                        mul24(currWRTCenter.z, currWRTCenter.z) ) / (var[extraCnt].x+var[extraCnt].y+var[extraCnt].z) ) * lut[lut_j*lut_step+anX+i];
-#endif
-
-#else // !ABF_GAUSSIAN
-
-#if VAR_PER_CHANNEL
-                    weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) *
-                        (float4)(lut[lut_j*lut_step+anX+i]);
-#else
-                    weight = ((float)lut[lut_j*lut_step+anX+i]) /(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
-                        mul24(currWRTCenter.z, currWRTCenter.z))/(var[extraCnt].x+var[extraCnt].y+var[extraCnt].z));
-#endif
-
-#endif //ABF_GAUSSIAN
-
-
-
-#endif  // FIXED_WEIGHT
-
-                    tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
-                    totalWeight += weight;
-                }
-            }
-
-            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
-                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4_rtz( (tmp_sum[extraCnt] / (float4)totalWeight) + (float4)0.5f);
-
-#if VAR_PER_CHANNEL
-            totalWeight = (float4)(0,0,0,0);
-#else
-            totalWeight = 0.0f;
-#endif
-        }
-    }
-}
-
-
-__kernel void
-adaptiveBilateralFilter_C1_D0(
-    __global const uchar * restrict src,
-    __global uchar *dst,
-    float alpha,
-    int src_offset,
-    int src_whole_rows,
-    int src_whole_cols,
-    int src_step,
-    int dst_offset,
-    int dst_rows,
-    int dst_cols,
-    int dst_step,
-    __global const float * lut,
-    int lut_step)
-{
-    int col = get_local_id(0);
-    const int gX = get_group_id(0);
-    const int gY = get_group_id(1);
-
-    int src_x_off = (src_offset % src_step);
-    int src_y_off = src_offset / src_step;
-    int dst_x_off = (dst_offset % dst_step);
-    int dst_y_off = dst_offset / dst_step;
-
-    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
-    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
-
-    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
-    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
-
-    int posX = dst_startX - dst_x_off + col;
-    int posY = (gY * (1+EXTRA))	;
-
-    __local uchar data[ksY+EXTRA][THREADS];
-
-    float tmp_sum[1+EXTRA];
-    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
-    {
-        tmp_sum[tmpint] = (float)(0);
-    }
-
-#ifdef BORDER_CONSTANT
-    bool con;
-    uchar ss;
-    for(int j = 0;	j < ksY+EXTRA; j++)
-    {
-        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
-
-        int cur_col = clamp(startX + col, 0, src_whole_cols);
-        if(con)
-        {
-            ss = src[(startY+j)*(src_step) + cur_col];
-        }
-
-        data[j][col] = con ? ss : 0;
-    }
-#else
-    for(int j= 0; j < ksY+EXTRA; j++)
-    {
-        int selected_row = startY+j, selected_col = startX+col;
-        EXTRAPOLATE(selected_row, src_whole_rows)
-        EXTRAPOLATE(selected_col, src_whole_cols)
-
-        data[j][col] = src[selected_row * (src_step) + selected_col];
-    }
-#endif
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    float var[1+EXTRA];
-
-    float weight;
-    float totalWeight = 0;
-
-    int currValCenter;
-    int currWRTCenter;
-
-    int sumVal = 0;
-    int sumValSqr = 0;
-
-    if(col < (THREADS-(ksX-1)))
-    {
-        int currVal;
-
-        int howManyAll = (2*anX+1)*(ksY);
-
-        //find variance of all data
-        int startLMj;
-        int endLMj;
-
-        // Top row: don't sum the very last element
-        for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
-        {
-#if CALCVAR
-            startLMj = extraCnt;
-            endLMj =  ksY+extraCnt-1;
-            sumVal = 0;
-            sumValSqr =0;
-            for(int j = startLMj; j < endLMj; j++)
-            {
-                for(int i=-anX; i<=anX; i++)
-                {
-                    currVal	= (uint)(data[j][col+anX+i])	;
-
-                    sumVal += currVal;
-                    sumValSqr += mul24(currVal, currVal);
-                }
-            }
-            var[extraCnt] =  clamp((float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) , 0.1f, (float)(MAX_VAR_VAL) );
-#else
-            var[extraCnt] = (float)(MAX_VAR_VAL);
-#endif
-        }
-
-        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
-        {
-
-            // top row: include the very first element, even on first time
-            startLMj = extraCnt;
-            // go all the way, unless this is the last local mem chunk,
-            // then stay within limits - 1
-            endLMj =  extraCnt + ksY;
-
-            // Top row: don't sum the very last element
-            currValCenter = (int)( data[ (startLMj + endLMj)/2][col+anX] );
-
-            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
-            {
-                for(int i=-anX; i<=anX; i++)
-                {
-#if FIXED_WEIGHT
-                    weight = 1.0f;
-#else
-                    currVal	= (int)(data[j][col+anX+i])	;
-                    currWRTCenter = currVal-currValCenter;
-
-#if ABF_GAUSSIAN
-                    weight = exp( -0.5f * (float)mul24(currWRTCenter,currWRTCenter)/var[extraCnt]) * lut[lut_j*lut_step+anX+i] ;
-#else
-                    weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
-#endif
-#endif
-                    tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
-                    totalWeight += weight;
-                }
-            }
-
-            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
-            {
-                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = convert_uchar_rtz(tmp_sum[extraCnt]/totalWeight+0.5f);
-            }
-
-            totalWeight = 0;
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/filtering_boxFilter.cl b/modules/ocl/src/opencl/filtering_boxFilter.cl
deleted file mode 100644
index 96091ce..0000000
--- a/modules/ocl/src/opencl/filtering_boxFilter.cl
+++ /dev/null
@@ -1,376 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////Macro for border type////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////
-#ifdef BORDER_REPLICATE
-//BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (l_edge)   : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (r_edge)-1 : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   :(i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (b_edge)-1 :(addr))
-#endif
-
-#ifdef BORDER_REFLECT
-//BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)-1               : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)-1 : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
-#endif
-
-#ifdef BORDER_REFLECT_101
-//BORDER_REFLECT_101:   gfedcb|abcdefgh|gfedcba
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)                 : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)                 : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
-#endif
-
-//blur function does not support BORDER_WRAP
-#ifdef BORDER_WRAP
-//BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (i)+(r_edge) : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (i)+(b_edge) : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
-#endif
-
-#ifdef EXTRA_EXTRAPOLATION // border > src image size
-#ifdef BORDER_CONSTANT
-// None
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
-    { \
-        x = max(min(x, maxX - 1), minX); \
-        y = max(min(y, maxY - 1), minY); \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
-    { \
-        if (x < minX) \
-            x -= ((x - maxX + 1) / maxX) * maxX; \
-        if (x >= maxX) \
-            x %= maxX; \
-        if (y < minY) \
-            y -= ((y - maxY + 1) / maxY) * maxY; \
-        if (y >= maxY) \
-            y %= maxY; \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
-#define EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, delta) \
-    { \
-        if (maxX - minX == 1) \
-            x = minX; \
-        else \
-            do \
-            { \
-                if (x < minX) \
-                    x = -(x - minX) - 1 + delta; \
-                else \
-                    x = maxX - 1 - (x - maxX) - delta; \
-            } \
-            while (x >= maxX || x < minX); \
-        \
-        if (maxY - minY == 1) \
-            y = minY; \
-        else \
-            do \
-            { \
-                if (y < minY) \
-                    y = -(y - minY) - 1 + delta; \
-                else \
-                    y = maxY - 1 - (y - maxY) - delta; \
-            } \
-            while (y >= maxY || y < minY); \
-    }
-#ifdef BORDER_REFLECT
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 0)
-#elif defined(BORDER_REFLECT_101)
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 1)
-#endif
-#else
-#error No extrapolation method
-#endif
-#else
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
-    { \
-        int _row = y - minY, _col = x - minX; \
-        _row = ADDR_H(_row, 0, maxY - minY); \
-        _row = ADDR_B(_row, maxY - minY, _row); \
-        y = _row + minY; \
-        \
-        _col = ADDR_L(_col, 0, maxX - minX); \
-        _col = ADDR_R(_col, maxX - minX, _col); \
-        x = _col + minX; \
-    }
-#endif
-
-#if USE_DOUBLE
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define FPTYPE double
-#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
-#else
-#define FPTYPE float
-#define CONVERT_TO_FPTYPE CAT(convert_float, VEC_SIZE)
-#endif
-
-#if DATA_DEPTH == 0
-#define BASE_TYPE uchar
-#elif DATA_DEPTH == 1
-#define BASE_TYPE char
-#elif DATA_DEPTH == 2
-#define BASE_TYPE ushort
-#elif DATA_DEPTH == 3
-#define BASE_TYPE short
-#elif DATA_DEPTH == 4
-#define BASE_TYPE int
-#elif DATA_DEPTH == 5
-#define BASE_TYPE float
-#elif DATA_DEPTH == 6
-#define BASE_TYPE double
-#else
-#error data_depth
-#endif
-
-#define __CAT(x, y) x##y
-#define CAT(x, y) __CAT(x, y)
-
-#define uchar1 uchar
-#define char1 char
-#define ushort1 ushort
-#define short1 short
-#define int1 int
-#define float1 float
-#define double1 double
-
-#define convert_uchar1_sat_rte convert_uchar_sat_rte
-#define convert_char1_sat_rte convert_char_sat_rte
-#define convert_ushort1_sat_rte convert_ushort_sat_rte
-#define convert_short1_sat_rte convert_short_sat_rte
-#define convert_int1_sat_rte convert_int_sat_rte
-#define convert_float1
-#define convert_double1
-
-#if DATA_DEPTH == 5 || DATA_DEPTH == 6
-#define CONVERT_TO_TYPE CAT(CAT(convert_, BASE_TYPE), VEC_SIZE)
-#else
-#define CONVERT_TO_TYPE CAT(CAT(CAT(convert_, BASE_TYPE), VEC_SIZE), _sat_rte)
-#endif
-
-#define VEC_SIZE DATA_CHAN
-
-#define VEC_TYPE CAT(BASE_TYPE, VEC_SIZE)
-#define TYPE VEC_TYPE
-
-#define SCALAR_TYPE CAT(FPTYPE, VEC_SIZE)
-
-#define INTERMEDIATE_TYPE CAT(FPTYPE, VEC_SIZE)
-
-struct RectCoords
-{
-    int x1, y1, x2, y2;
-};
-
-//#define DEBUG
-#ifdef DEBUG
-#define DEBUG_ONLY(x) x
-#define ASSERT(condition) do { if (!(condition)) { printf("BUG in boxFilter kernel (global=%d,%d): " #condition "\n", get_global_id(0), get_global_id(1)); } } while (0)
-#else
-#define DEBUG_ONLY(x)
-#define ASSERT(condition)
-#endif
-
-
-inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global TYPE *src, const unsigned int srcStepBytes, const struct RectCoords srcCoords
-#ifdef BORDER_CONSTANT
-               , SCALAR_TYPE borderValue
-#endif
-    )
-{
-#ifdef BORDER_ISOLATED
-    if(pos.x >= srcCoords.x1 && pos.y >= srcCoords.y1 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
-#else
-    if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
-#endif
-    {
-        __global TYPE* ptr = (__global TYPE*)((__global char*)src + pos.x * sizeof(TYPE) + pos.y * srcStepBytes);
-        return CONVERT_TO_FPTYPE(*ptr);
-    }
-    else
-    {
-#ifdef BORDER_CONSTANT
-        return borderValue;
-#else
-        int selected_col = pos.x;
-        int selected_row = pos.y;
-
-        EXTRAPOLATE(selected_col, selected_row,
-#ifdef BORDER_ISOLATED
-                srcCoords.x1, srcCoords.y1,
-#else
-                0, 0,
-#endif
-                srcCoords.x2, srcCoords.y2
-         );
-
-        // debug border mapping
-        //printf("pos=%d,%d --> %d, %d\n", pos.x, pos.y, selected_col, selected_row);
-
-        pos = (int2)(selected_col, selected_row);
-        if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
-        {
-            __global TYPE* ptr = (__global TYPE*)((__global char*)src + pos.x * sizeof(TYPE) + pos.y * srcStepBytes);
-            return CONVERT_TO_FPTYPE(*ptr);
-        }
-        else
-        {
-            // for debug only
-            DEBUG_ONLY(printf("BUG in boxFilter kernel\n"));
-            return (FPTYPE)(0.0f);
-        }
-#endif
-    }
-}
-
-// INPUT PARAMETER: BLOCK_SIZE_Y (via defines)
-
-__kernel
-__attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
-void boxFilter(__global TYPE *src, const unsigned int srcStepBytes, const int4 srcRC,
-               __global TYPE *dst, const unsigned int dstStepBytes, const int4 dstRC,
-#ifdef BORDER_CONSTANT
-               SCALAR_TYPE borderValue,
-#endif
-               FPTYPE alpha
-               )
-{
-    const struct RectCoords srcCoords = {srcRC.s0, srcRC.s1, srcRC.s2, srcRC.s3}; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
-    const struct RectCoords dstCoords = {dstRC.s0, dstRC.s1, dstRC.s2, dstRC.s3};
-
-    const int x = get_local_id(0) + (LOCAL_SIZE - (KERNEL_SIZE_X - 1)) * get_group_id(0) - ANCHOR_X;
-    const int y = get_global_id(1) * BLOCK_SIZE_Y;
-
-    const int local_id = get_local_id(0);
-
-    INTERMEDIATE_TYPE data[KERNEL_SIZE_Y];
-    __local INTERMEDIATE_TYPE sumOfCols[LOCAL_SIZE];
-
-    int2 srcPos = (int2)(srcCoords.x1 + x, srcCoords.y1 + y - ANCHOR_Y);
-    for(int sy = 0; sy < KERNEL_SIZE_Y; sy++, srcPos.y++)
-    {
-        data[sy] = readSrcPixel(srcPos, src, srcStepBytes, srcCoords
-#ifdef BORDER_CONSTANT
-                , borderValue
-#endif
-                );
-    }
-
-    INTERMEDIATE_TYPE tmp_sum = 0;
-    for(int sy = 0; sy < KERNEL_SIZE_Y; sy++)
-    {
-        tmp_sum += (data[sy]);
-    }
-
-    sumOfCols[local_id] = tmp_sum;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int2 pos = (int2)(dstCoords.x1 + x, dstCoords.y1 + y);
-    __global TYPE* dstPtr = (__global TYPE*)((__global char*)dst + pos.x * sizeof(TYPE) + pos.y * dstStepBytes); // Pointer can be out of bounds!
-
-    int sy_index = 0; // current index in data[] array
-    int stepsY = min(dstCoords.y2 - pos.y, BLOCK_SIZE_Y);
-    ASSERT(stepsY > 0);
-    for (; ;)
-    {
-        ASSERT(pos.y < dstCoords.y2);
-
-        if(local_id >= ANCHOR_X && local_id < LOCAL_SIZE - (KERNEL_SIZE_X - 1 - ANCHOR_X) &&
-            pos.x >= dstCoords.x1 && pos.x < dstCoords.x2)
-        {
-            ASSERT(pos.y >= dstCoords.y1 && pos.y < dstCoords.y2);
-
-            INTERMEDIATE_TYPE total_sum = 0;
-#pragma unroll
-            for (int sx = 0; sx < KERNEL_SIZE_X; sx++)
-            {
-                total_sum += sumOfCols[local_id + sx - ANCHOR_X];
-            }
-            *dstPtr = CONVERT_TO_TYPE(((INTERMEDIATE_TYPE)alpha) * total_sum);
-        }
-
-#if BLOCK_SIZE_Y == 1
-        break;
-#else
-        if (--stepsY == 0)
-            break;
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        tmp_sum = sumOfCols[local_id]; // TODO FIX IT: workaround for BUG in OpenCL compiler
-        // only works with scalars: ASSERT(fabs(tmp_sum - sumOfCols[local_id]) < (INTERMEDIATE_TYPE)1e-6);
-        tmp_sum -= data[sy_index];
-
-        data[sy_index] = readSrcPixel(srcPos, src, srcStepBytes, srcCoords
-#ifdef BORDER_CONSTANT
-                , borderValue
-#endif
-                );
-        srcPos.y++;
-
-        tmp_sum += data[sy_index];
-        sumOfCols[local_id] = tmp_sum;
-
-        sy_index = (sy_index + 1 < KERNEL_SIZE_Y) ? sy_index + 1 : 0;
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        // next line
-        DEBUG_ONLY(pos.y++);
-        dstPtr = (__global TYPE*)((__global char*)dstPtr + dstStepBytes); // Pointer can be out of bounds!
-#endif // BLOCK_SIZE_Y == 1
-    }
-}
diff --git a/modules/ocl/src/opencl/filtering_filter2D.cl b/modules/ocl/src/opencl/filtering_filter2D.cl
deleted file mode 100644
index fb7dca5..0000000
--- a/modules/ocl/src/opencl/filtering_filter2D.cl
+++ /dev/null
@@ -1,374 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef BORDER_REPLICATE
-//BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (l_edge)   : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (r_edge)-1 : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   :(i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (b_edge)-1 :(addr))
-#endif
-
-#ifdef BORDER_REFLECT
-//BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)-1               : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)-1 : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
-#endif
-
-#ifdef BORDER_REFLECT_101
-//BORDER_REFLECT_101:   gfedcb|abcdefgh|gfedcba
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)                 : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)                 : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
-#endif
-
-//blur function does not support BORDER_WRAP
-#ifdef BORDER_WRAP
-//BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (i)+(r_edge) : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (i)+(b_edge) : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
-#endif
-
-#ifdef EXTRA_EXTRAPOLATION // border > src image size
-#ifdef BORDER_CONSTANT
-// None
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
-    { \
-        x = max(min(x, maxX - 1), minX); \
-        y = max(min(y, maxY - 1), minY); \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
-    { \
-        if (x < minX) \
-            x -= ((x - maxX + 1) / maxX) * maxX; \
-        if (x >= maxX) \
-            x %= maxX; \
-        if (y < minY) \
-            y -= ((y - maxY + 1) / maxY) * maxY; \
-        if (y >= maxY) \
-            y %= maxY; \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
-#define EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, delta) \
-    { \
-        if (maxX - minX == 1) \
-            x = minX; \
-        else \
-            do \
-            { \
-                if (x < minX) \
-                    x = -(x - minX) - 1 + delta; \
-                else \
-                    x = maxX - 1 - (x - maxX) - delta; \
-            } \
-            while (x >= maxX || x < minX); \
-        \
-        if (maxY - minY == 1) \
-            y = minY; \
-        else \
-            do \
-            { \
-                if (y < minY) \
-                    y = -(y - minY) - 1 + delta; \
-                else \
-                    y = maxY - 1 - (y - maxY) - delta; \
-            } \
-            while (y >= maxY || y < minY); \
-    }
-#ifdef BORDER_REFLECT
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 0)
-#elif defined(BORDER_REFLECT_101)
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 1)
-#endif
-#else
-#error No extrapolation method
-#endif
-#else
-#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
-    { \
-        int _row = y - minY, _col = x - minX; \
-        _row = ADDR_H(_row, 0, maxY - minY); \
-        _row = ADDR_B(_row, maxY - minY, _row); \
-        y = _row + minY; \
-        \
-        _col = ADDR_L(_col, 0, maxX - minX); \
-        _col = ADDR_R(_col, maxX - minX, _col); \
-        x = _col + minX; \
-    }
-#endif
-
-#if USE_DOUBLE
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define FPTYPE double
-#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
-#else
-#define FPTYPE float
-#define CONVERT_TO_FPTYPE CAT(convert_float, VEC_SIZE)
-#endif
-
-#if DATA_DEPTH == 0
-#define BASE_TYPE uchar
-#elif DATA_DEPTH == 1
-#define BASE_TYPE char
-#elif DATA_DEPTH == 2
-#define BASE_TYPE ushort
-#elif DATA_DEPTH == 3
-#define BASE_TYPE short
-#elif DATA_DEPTH == 4
-#define BASE_TYPE int
-#elif DATA_DEPTH == 5
-#define BASE_TYPE float
-#elif DATA_DEPTH == 6
-#define BASE_TYPE double
-#else
-#error data_depth
-#endif
-
-#define __CAT(x, y) x##y
-#define CAT(x, y) __CAT(x, y)
-
-#define uchar1 uchar
-#define char1 char
-#define ushort1 ushort
-#define short1 short
-#define int1 int
-#define float1 float
-#define double1 double
-
-#define convert_uchar1_sat_rte convert_uchar_sat_rte
-#define convert_char1_sat_rte convert_char_sat_rte
-#define convert_ushort1_sat_rte convert_ushort_sat_rte
-#define convert_short1_sat_rte convert_short_sat_rte
-#define convert_int1_sat_rte convert_int_sat_rte
-#define convert_float1
-#define convert_double1
-
-#if DATA_DEPTH == 5 || DATA_DEPTH == 6
-#define CONVERT_TO_TYPE CAT(CAT(convert_, BASE_TYPE), VEC_SIZE)
-#else
-#define CONVERT_TO_TYPE CAT(CAT(CAT(convert_, BASE_TYPE), VEC_SIZE), _sat_rte)
-#endif
-
-#define VEC_SIZE DATA_CHAN
-
-#define VEC_TYPE CAT(BASE_TYPE, VEC_SIZE)
-#define TYPE VEC_TYPE
-
-#define SCALAR_TYPE CAT(FPTYPE, VEC_SIZE)
-
-#define INTERMEDIATE_TYPE CAT(FPTYPE, VEC_SIZE)
-
-struct RectCoords
-{
-    int x1, y1, x2, y2;
-};
-
-//#define DEBUG
-#ifdef DEBUG
-#define DEBUG_ONLY(x) x
-#define ASSERT(condition) do { if (!(condition)) { printf("BUG in boxFilter kernel (global=%d,%d): " #condition "\n", get_global_id(0), get_global_id(1)); } } while (0)
-#else
-#define DEBUG_ONLY(x) (void)0
-#define ASSERT(condition) (void)0
-#endif
-
-
-inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global TYPE *src, const unsigned int srcStepBytes, const struct RectCoords srcCoords
-#ifdef BORDER_CONSTANT
-               , SCALAR_TYPE borderValue
-#endif
-    )
-{
-#ifdef BORDER_ISOLATED
-    if(pos.x >= srcCoords.x1 && pos.y >= srcCoords.y1 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
-#else
-    if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
-#endif
-    {
-        __global TYPE* ptr = (__global TYPE*)((__global char*)src + pos.x * sizeof(TYPE) + pos.y * srcStepBytes);
-        return CONVERT_TO_FPTYPE(*ptr);
-    }
-    else
-    {
-#ifdef BORDER_CONSTANT
-        return borderValue;
-#else
-        int selected_col = pos.x;
-        int selected_row = pos.y;
-
-        EXTRAPOLATE(selected_col, selected_row,
-#ifdef BORDER_ISOLATED
-                srcCoords.x1, srcCoords.y1,
-#else
-                0, 0,
-#endif
-                srcCoords.x2, srcCoords.y2
-         );
-
-        // debug border mapping
-        //printf("pos=%d,%d --> %d, %d\n", pos.x, pos.y, selected_col, selected_row);
-
-        pos = (int2)(selected_col, selected_row);
-        if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
-        {
-            __global TYPE* ptr = (__global TYPE*)((__global char*)src + pos.x * sizeof(TYPE) + pos.y * srcStepBytes);
-            return CONVERT_TO_FPTYPE(*ptr);
-        }
-        else
-        {
-            // for debug only
-            DEBUG_ONLY(printf("BUG in boxFilter kernel\n"));
-            return (FPTYPE)(0.0f);
-        }
-#endif
-    }
-}
-
-// INPUT PARAMETER: BLOCK_SIZE_Y (via defines)
-
-__kernel
-__attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
-void filter2D(__global TYPE *src, const unsigned int srcStepBytes, const int4 srcRC,
-              __global TYPE *dst, const unsigned int dstStepBytes, const int4 dstRC,
-#ifdef BORDER_CONSTANT
-              SCALAR_TYPE borderValue,
-#endif
-              __constant FPTYPE* kernelData // transposed: [KERNEL_SIZE_X][KERNEL_SIZE_Y2_ALIGNED]
-              )
-{
-    const struct RectCoords srcCoords = {srcRC.s0, srcRC.s1, srcRC.s2, srcRC.s3}; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
-    struct RectCoords dstCoords = {dstRC.s0, dstRC.s1, dstRC.s2, dstRC.s3};
-
-    const int local_id = get_local_id(0);
-    const int x = local_id + (LOCAL_SIZE - (KERNEL_SIZE_X - 1)) * get_group_id(0) - ANCHOR_X;
-    const int y = get_global_id(1) * BLOCK_SIZE_Y;
-
-    INTERMEDIATE_TYPE data[KERNEL_SIZE_Y];
-    __local INTERMEDIATE_TYPE sumOfCols[LOCAL_SIZE];
-
-    int2 srcPos = (int2)(srcCoords.x1 + x, srcCoords.y1 + y - ANCHOR_Y);
-
-    int2 pos = (int2)(dstCoords.x1 + x, dstCoords.y1 + y);
-    __global TYPE* dstPtr = (__global TYPE*)((__global char*)dst + pos.x * sizeof(TYPE) + pos.y * dstStepBytes); // Pointer can be out of bounds!
-    bool writeResult = (local_id >= ANCHOR_X && local_id < LOCAL_SIZE - (KERNEL_SIZE_X - 1 - ANCHOR_X) &&
-                        pos.x >= dstCoords.x1 && pos.x < dstCoords.x2);
-
-#if BLOCK_SIZE_Y > 1
-    bool readAllpixels = true;
-    int sy_index = 0; // current index in data[] array
-
-    dstCoords.y2 = min(dstCoords.y2, pos.y + BLOCK_SIZE_Y);
-    for (;
-         pos.y < dstCoords.y2;
-         pos.y++,
-         dstPtr = (__global TYPE*)((__global char*)dstPtr + dstStepBytes))
-#endif
-    {
-        ASSERT(pos.y < dstCoords.y2);
-
-        for (
-#if BLOCK_SIZE_Y > 1
-            int sy = readAllpixels ? 0 : -1; sy < (readAllpixels ? KERNEL_SIZE_Y : 0);
-#else
-            int sy = 0, sy_index = 0; sy < KERNEL_SIZE_Y;
-#endif
-            sy++, srcPos.y++)
-        {
-            data[sy + sy_index] = readSrcPixel(srcPos, src, srcStepBytes, srcCoords
-#ifdef BORDER_CONSTANT
-                    , borderValue
-#endif
-                    );
-        }
-
-        INTERMEDIATE_TYPE total_sum = 0;
-        for (int sx = 0; sx < KERNEL_SIZE_X; sx++)
-        {
-            {
-                __constant FPTYPE* k = &kernelData[KERNEL_SIZE_Y2_ALIGNED * sx
-#if BLOCK_SIZE_Y > 1
-                                                   + KERNEL_SIZE_Y - sy_index
-#endif
-                                                   ];
-                INTERMEDIATE_TYPE tmp_sum = 0;
-                for (int sy = 0; sy < KERNEL_SIZE_Y; sy++)
-                {
-                    tmp_sum += data[sy] * k[sy];
-                }
-
-                sumOfCols[local_id] = tmp_sum;
-                barrier(CLK_LOCAL_MEM_FENCE);
-            }
-
-            int id = local_id + sx - ANCHOR_X;
-            if (id >= 0 && id < LOCAL_SIZE)
-               total_sum += sumOfCols[id];
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        if (writeResult)
-        {
-            ASSERT(pos.y >= dstCoords.y1 && pos.y < dstCoords.y2);
-            *dstPtr = CONVERT_TO_TYPE(total_sum);
-        }
-
-#if BLOCK_SIZE_Y > 1
-        readAllpixels = false;
-#if BLOCK_SIZE_Y > KERNEL_SIZE_Y
-        sy_index = (sy_index + 1 <= KERNEL_SIZE_Y) ? sy_index + 1 : 1;
-#else
-        sy_index++;
-#endif
-#endif // BLOCK_SIZE_Y == 1
-    }
-}
diff --git a/modules/ocl/src/opencl/filtering_morph.cl b/modules/ocl/src/opencl/filtering_morph.cl
deleted file mode 100644
index c402ff7..0000000
--- a/modules/ocl/src/opencl/filtering_morph.cl
+++ /dev/null
@@ -1,228 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Zero Lin, zero.lin@amd.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-
-#ifdef ERODE
-#define MORPH_OP(A,B) min((A),(B))
-#endif
-#ifdef DILATE
-#define MORPH_OP(A,B) max((A),(B))
-#endif
-//BORDER_CONSTANT:      iiiiii|abcdefgh|iiiiiii
-#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
-#ifndef GENTYPE
-
-__kernel void morph_C1_D0(__global const uchar * restrict src,
-                          __global uchar *dst,
-                          int src_offset_x, int src_offset_y,
-                          int cols, int rows,
-                          int src_step_in_pixel, int dst_step_in_pixel,
-                          __constant uchar * mat_kernel,
-                          int src_whole_cols, int src_whole_rows,
-                          int dst_offset_in_pixel)
-{
-    int l_x = get_local_id(0);
-    int l_y = get_local_id(1);
-    int x = get_group_id(0)*4*LSIZE0;
-    int y = get_group_id(1)*LSIZE1;
-    int start_x = x+src_offset_x-RADIUSX & 0xfffffffc;
-    int end_x = x + src_offset_x+LSIZE0*4+RADIUSX & 0xfffffffc;
-    int width = (end_x -start_x+4)>>2;
-    int offset = src_offset_x-RADIUSX & 3;
-    int start_y = y+src_offset_y-RADIUSY;
-    int point1 = mad24(l_y,LSIZE0,l_x);
-    int point2 = point1 + LSIZE0*LSIZE1;
-    int tl_x = (point1 % width)<<2;
-    int tl_y = point1 / width;
-    int tl_x2 = (point2 % width)<<2;
-    int tl_y2 = point2 / width;
-    int cur_x = start_x + tl_x;
-    int cur_y = start_y + tl_y;
-    int cur_x2 = start_x + tl_x2;
-    int cur_y2 = start_y + tl_y2;
-    int start_addr = mad24(cur_y,src_step_in_pixel,cur_x);
-    int start_addr2 = mad24(cur_y2,src_step_in_pixel,cur_x2);
-    uchar4 temp0,temp1;
-    __local uchar4 LDS_DAT[2*LSIZE1*LSIZE0];
-
-    int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
-    //read pixels from src
-    start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0;
-    start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0;
-    temp0 = *(__global uchar4*)&src[start_addr];
-    temp1 = *(__global uchar4*)&src[start_addr2];
-    //judge if read out of boundary
-    temp0.x= ELEM(cur_x,0,src_whole_cols,VAL,temp0.x);
-    temp0.y= ELEM(cur_x+1,0,src_whole_cols,VAL,temp0.y);
-    temp0.z= ELEM(cur_x+2,0,src_whole_cols,VAL,temp0.z);
-    temp0.w= ELEM(cur_x+3,0,src_whole_cols,VAL,temp0.w);
-    temp0= ELEM(cur_y,0,src_whole_rows,(uchar4)VAL,temp0);
-
-    temp1.x= ELEM(cur_x2,0,src_whole_cols,VAL,temp1.x);
-    temp1.y= ELEM(cur_x2+1,0,src_whole_cols,VAL,temp1.y);
-    temp1.z= ELEM(cur_x2+2,0,src_whole_cols,VAL,temp1.z);
-    temp1.w= ELEM(cur_x2+3,0,src_whole_cols,VAL,temp1.w);
-    temp1= ELEM(cur_y2,0,src_whole_rows,(uchar4)VAL,temp1);
-
-    LDS_DAT[point1] = temp0;
-    LDS_DAT[point2] = temp1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    uchar4 res = (uchar4)VAL;
-
-    for(int i=0; i<2*RADIUSY+1; i++)
-        for(int j=0; j<2*RADIUSX+1; j++)
-        {
-            res =
-#ifndef RECTKERNEL
-                mat_kernel[i*(2*RADIUSX+1)+j] ?
-#endif
-                MORPH_OP(res,vload4(0,(__local uchar*)&LDS_DAT[mad24((l_y+i),width,l_x)]+offset+j))
-#ifndef RECTKERNEL
-                :res
-#endif
-                ;
-        }
-
-    int gidx = get_global_id(0)<<2;
-    int gidy = get_global_id(1);
-    int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
-
-    if(gidx+3<cols && gidy<rows && ((dst_offset_in_pixel&3)==0))
-    {
-        *(__global uchar4*)&dst[out_addr] = res;
-    }
-    else
-    {
-        if(gidx+3<cols && gidy<rows)
-        {
-            dst[out_addr] = res.x;
-            dst[out_addr+1] = res.y;
-            dst[out_addr+2] = res.z;
-            dst[out_addr+3] = res.w;
-        }
-        else if(gidx+2<cols && gidy<rows)
-        {
-            dst[out_addr] = res.x;
-            dst[out_addr+1] = res.y;
-            dst[out_addr+2] = res.z;
-        }
-        else if(gidx+1<cols && gidy<rows)
-        {
-            dst[out_addr] = res.x;
-            dst[out_addr+1] = res.y;
-        }
-        else if(gidx<cols && gidy<rows)
-        {
-            dst[out_addr] = res.x;
-        }
-    }
-}
-
-#else
-
-__kernel void morph(__global const GENTYPE * restrict src,
-                    __global GENTYPE *dst,
-                    int src_offset_x, int src_offset_y,
-                    int cols, int rows,
-                    int src_step_in_pixel, int dst_step_in_pixel,
-                    __constant uchar * mat_kernel,
-                    int src_whole_cols, int src_whole_rows,
-                    int dst_offset_in_pixel)
-{
-    int l_x = get_local_id(0);
-    int l_y = get_local_id(1);
-    int x = get_group_id(0)*LSIZE0;
-    int y = get_group_id(1)*LSIZE1;
-    int start_x = x+src_offset_x-RADIUSX;
-    int end_x = x + src_offset_x+LSIZE0+RADIUSX;
-    int width = end_x -(x+src_offset_x-RADIUSX)+1;
-    int start_y = y+src_offset_y-RADIUSY;
-    int point1 = mad24(l_y,LSIZE0,l_x);
-    int point2 = point1 + LSIZE0*LSIZE1;
-    int tl_x = point1 % width;
-    int tl_y = point1 / width;
-    int tl_x2 = point2 % width;
-    int tl_y2 = point2 / width;
-    int cur_x = start_x + tl_x;
-    int cur_y = start_y + tl_y;
-    int cur_x2 = start_x + tl_x2;
-    int cur_y2 = start_y + tl_y2;
-    int start_addr = mad24(cur_y,src_step_in_pixel,cur_x);
-    int start_addr2 = mad24(cur_y2,src_step_in_pixel,cur_x2);
-    GENTYPE temp0,temp1;
-    __local GENTYPE LDS_DAT[2*LSIZE1*LSIZE0];
-
-    int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
-    //read pixels from src
-    start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0;
-    start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0;
-    temp0 = src[start_addr];
-    temp1 = src[start_addr2];
-    //judge if read out of boundary
-    temp0= ELEM(cur_x,0,src_whole_cols,(GENTYPE)VAL,temp0);
-    temp0= ELEM(cur_y,0,src_whole_rows,(GENTYPE)VAL,temp0);
-
-    temp1= ELEM(cur_x2,0,src_whole_cols,(GENTYPE)VAL,temp1);
-    temp1= ELEM(cur_y2,0,src_whole_rows,(GENTYPE)VAL,temp1);
-
-    LDS_DAT[point1] = temp0;
-    LDS_DAT[point2] = temp1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    GENTYPE res = (GENTYPE)VAL;
-    for(int i=0; i<2*RADIUSY+1; i++)
-        for(int j=0; j<2*RADIUSX+1; j++)
-        {
-            res =
-#ifndef RECTKERNEL
-                mat_kernel[i*(2*RADIUSX+1)+j] ?
-#endif
-                MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)])
-#ifndef RECTKERNEL
-                :res
-#endif
-                ;
-        }
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
-    if(gidx<cols && gidy<rows)
-    {
-        dst[out_addr] = res;
-    }
-}
-
-#endif
diff --git a/modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl b/modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl
deleted file mode 100644
index c6555bf..0000000
--- a/modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl
+++ /dev/null
@@ -1,185 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////Macro for border type////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef BORDER_CONSTANT
-//CCCCCC|abcdefgh|CCCCCCC
-#define EXTRAPOLATE(x, maxV)
-#elif defined BORDER_REPLICATE
-//aaaaaa|abcdefgh|hhhhhhh
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        (x) = max(min((x), (maxV) - 1), 0); \
-    }
-#elif defined BORDER_WRAP
-//cdefgh|abcdefgh|abcdefg
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        (x) = ( (x) + (maxV) ) % (maxV); \
-    }
-#elif defined BORDER_REFLECT
-//fedcba|abcdefgh|hgfedcb
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        (x) = min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ); \
-    }
-#elif defined BORDER_REFLECT_101
-//gfedcb|abcdefgh|gfedcba
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        (x) = min(((maxV)-1)*2-(x), max((x),-(x)) ); \
-    }
-#else
-#error No extrapolation method
-#endif
-
-#define SRC(_x,_y) CONVERT_SRCTYPE(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
-
-#ifdef BORDER_CONSTANT
-//CCCCCC|abcdefgh|CCCCCCC
-#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
-#else
-#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
-#endif
-
-#define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x])
-
-//horizontal and vertical filter kernels
-//should be defined on host during compile time to avoid overhead
-__constant uint mat_kernelX[] = {KERNEL_MATRIX_X};
-__constant uint mat_kernelY[] = {KERNEL_MATRIX_Y};
-
-__kernel __attribute__((reqd_work_group_size(BLK_X,BLK_Y,1))) void sep_filter_singlepass
-        (
-        __global uchar* Src,
-        const uint      SrcPitch,
-        const int       srcOffsetX,
-        const int       srcOffsetY,
-        __global uchar* Dst,
-        const int       DstOffset,
-        const uint      DstPitch,
-        int             width,
-        int             height,
-        int             dstWidth,
-        int             dstHeight
-        )
-{
-    //RADIUSX, RADIUSY are filter dimensions
-    //BLK_X, BLK_Y are local wrogroup sizes
-    //all these should be defined on host during compile time
-    //first lsmem array for source pixels used in first pass,
-    //second lsmemDy for storing first pass results
-    __local WORKTYPE lsmem[BLK_Y+2*RADIUSY][BLK_X+2*RADIUSX];
-    __local WORKTYPE lsmemDy[BLK_Y][BLK_X+2*RADIUSX];
-
-    //get local and global ids - used as image and local memory array indexes
-    int lix = get_local_id(0);
-    int liy = get_local_id(1);
-
-    int x = (int)get_global_id(0);
-    int y = (int)get_global_id(1);
-
-    //calculate pixel position in source image taking image offset into account
-    int srcX = x + srcOffsetX - RADIUSX;
-    int srcY = y + srcOffsetY - RADIUSY;
-    int xb = srcX;
-    int yb = srcY;
-
-    //extrapolate coordinates, if needed
-    //and read my own source pixel into local memory
-    //with account for extra border pixels, which will be read by starting workitems
-    int clocY = liy;
-    int cSrcY = srcY;
-    do
-    {
-        int yb = cSrcY;
-        EXTRAPOLATE(yb, (height));
-
-        int clocX = lix;
-        int cSrcX = srcX;
-        do
-        {
-            int xb = cSrcX;
-            EXTRAPOLATE(xb,(width));
-            lsmem[clocY][clocX] = ELEM(xb, yb, (width), (height), 0 );
-
-            clocX += BLK_X;
-            cSrcX += BLK_X;
-        }
-        while(clocX < BLK_X+(RADIUSX*2));
-
-        clocY += BLK_Y;
-        cSrcY += BLK_Y;
-    }
-    while(clocY < BLK_Y+(RADIUSY*2));
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //do vertical filter pass
-    //and store intermediate results to second local memory array
-    int i;
-    WORKTYPE sum = 0.0f;
-    int clocX = lix;
-    do
-    {
-        sum = 0.0f;
-        for(i=0; i<=2*RADIUSY; i++)
-            sum = mad(lsmem[liy+i][clocX], as_float(mat_kernelY[i]), sum);
-        lsmemDy[liy][clocX] = sum;
-        clocX += BLK_X;
-    }
-    while(clocX < BLK_X+(RADIUSX*2));
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //if this pixel happened to be out of image borders because of global size rounding,
-    //then just return
-    if( x >= dstWidth || y >=dstHeight )  return;
-
-    //do second horizontal filter pass
-    //and calculate final result
-    sum = 0.0f;
-    for(i=0; i<=2*RADIUSX; i++)
-        sum = mad(lsmemDy[liy][lix+i], as_float(mat_kernelX[i]), sum);
-
-    //store result into destination image
-    DST(x,y) = CONVERT_DSTTYPE(sum);
-}
diff --git a/modules/ocl/src/opencl/haarobjectdetect.cl b/modules/ocl/src/opencl/haarobjectdetect.cl
deleted file mode 100644
index d6e5fb9..0000000
--- a/modules/ocl/src/opencl/haarobjectdetect.cl
+++ /dev/null
@@ -1,596 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Wang Weiyan, wangweiyanster@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Nathan, liujun@multicorewareinc.com
-//    Peng Xiao, pengxiao@outlook.com
-//    Erping Pang, erping@multicorewareinc.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#define CV_HAAR_FEATURE_MAX           3
-
-#define calc_sum(rect,offset)        (sum[(rect).p0+offset] - sum[(rect).p1+offset] - sum[(rect).p2+offset] + sum[(rect).p3+offset])
-#define calc_sum1(rect,offset,i)     (sum[(rect).p0[i]+offset] - sum[(rect).p1[i]+offset] - sum[(rect).p2[i]+offset] + sum[(rect).p3[i]+offset])
-
-typedef int   sumtype;
-typedef float sqsumtype;
-
-#ifndef STUMP_BASED
-#define STUMP_BASED 1
-#endif
-
-typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode
-{
-    int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned (64)));
-    float weight[CV_HAAR_FEATURE_MAX];
-    float threshold;
-    float alpha[3] __attribute__((aligned (16)));
-    int left __attribute__((aligned (4)));
-    int right __attribute__((aligned (4)));
-}
-GpuHidHaarTreeNode;
-
-
-//typedef struct __attribute__((aligned (32))) GpuHidHaarClassifier
-//{
-//    int count __attribute__((aligned (4)));
-//    GpuHidHaarTreeNode* node __attribute__((aligned (8)));
-//    float* alpha __attribute__((aligned (8)));
-//}
-//GpuHidHaarClassifier;
-
-
-typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier
-{
-    int  count __attribute__((aligned (4)));
-    float threshold __attribute__((aligned (4)));
-    int two_rects __attribute__((aligned (4)));
-    int reserved0 __attribute__((aligned (8)));
-    int reserved1 __attribute__((aligned (8)));
-    int reserved2 __attribute__((aligned (8)));
-    int reserved3 __attribute__((aligned (8)));
-}
-GpuHidHaarStageClassifier;
-
-
-//typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
-//{
-//    int  count __attribute__((aligned (4)));
-//    int  is_stump_based __attribute__((aligned (4)));
-//    int  has_tilted_features __attribute__((aligned (4)));
-//    int  is_tree __attribute__((aligned (4)));
-//    int pq0 __attribute__((aligned (4)));
-//    int pq1 __attribute__((aligned (4)));
-//    int pq2 __attribute__((aligned (4)));
-//    int pq3 __attribute__((aligned (4)));
-//    int p0 __attribute__((aligned (4)));
-//    int p1 __attribute__((aligned (4)));
-//    int p2 __attribute__((aligned (4)));
-//    int p3 __attribute__((aligned (4)));
-//    float inv_window_area __attribute__((aligned (4)));
-//} GpuHidHaarClassifierCascade;
-
-
-#ifdef PACKED_CLASSIFIER
-// this code is scalar, one pixel -> one workitem
-__kernel void gpuRunHaarClassifierCascadePacked(
-    global const GpuHidHaarStageClassifier * stagecascadeptr,
-    global const int4 * info,
-    global const GpuHidHaarTreeNode * nodeptr,
-    global const int * restrict sum,
-    global const float * restrict sqsum,
-    volatile global int4 * candidate,
-    const int pixelstep,
-    const int loopcount,
-    const int start_stage,
-    const int split_stage,
-    const int end_stage,
-    const int startnode,
-    const int splitnode,
-    const int4 p,
-    const int4 pq,
-    const float correction,
-    global const int* pNodesPK,
-    global const int4* pWGInfo
-    )
-
-{
-    int     gid = (int)get_group_id(0);
-    int     lid_x = (int)get_local_id(0);
-    int     lid_y = (int)get_local_id(1);
-    int     lid = lid_y*LSx+lid_x;
-    int4    WGInfo = pWGInfo[WGSTART+gid];
-    int     GroupX = (WGInfo.y >> 16)&0xFFFF;
-    int     GroupY = (WGInfo.y >> 0 )& 0xFFFF;
-    int     Width  = (WGInfo.x >> 16)&0xFFFF;
-    int     Height = (WGInfo.x >> 0 )& 0xFFFF;
-    int     ImgOffset = WGInfo.z;
-    float   ScaleFactor = as_float(WGInfo.w);
-
-#define DATA_SIZE_X (PIXEL_STEP*LSx+WND_SIZE_X)
-#define DATA_SIZE_Y (PIXEL_STEP*LSy+WND_SIZE_Y)
-#define DATA_SIZE (DATA_SIZE_X*DATA_SIZE_Y)
-
-    local int SumL[DATA_SIZE];
-
-    // read input data window into local mem
-    for(int i = 0; i<DATA_SIZE; i+=(LSx*LSy))
-    {
-        int     index = i+lid; // index in shared local memory
-        if(index<DATA_SIZE)
-        {// calc global x,y coordinat and read data from there
-            int     x = min(GroupX + (index % (DATA_SIZE_X)),Width-1);
-            int     y = min(GroupY + (index / (DATA_SIZE_X)),Height-1);
-            SumL[index] = sum[ImgOffset+y*pixelstep+x];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // calc variance_norm_factor for all stages
-    float   variance_norm_factor;
-    int     nodecounter= startnode;
-    int4    info1 = p;
-    int4    info2 = pq;
-
-    // calc processed ROI coordinate in local mem
-    int     xl = lid_x*PIXEL_STEP;
-    int     yl = lid_y*PIXEL_STEP;
-
-    {// calc variance_norm_factor for all stages
-        int     OffsetLocal =          yl * DATA_SIZE_X +         xl;
-        int     OffsetGlobal = (GroupY+yl)* pixelstep   + (GroupX+xl);
-
-        // add shift to get position on scaled image
-        OffsetGlobal += ImgOffset;
-
-        float   mean =
-            SumL[info1.y*DATA_SIZE_X+info1.x+OffsetLocal] -
-            SumL[info1.y*DATA_SIZE_X+info1.z+OffsetLocal] -
-            SumL[info1.w*DATA_SIZE_X+info1.x+OffsetLocal] +
-            SumL[info1.w*DATA_SIZE_X+info1.z+OffsetLocal];
-        float sq =
-            sqsum[info2.y*pixelstep+info2.x+OffsetGlobal] -
-            sqsum[info2.y*pixelstep+info2.z+OffsetGlobal] -
-            sqsum[info2.w*pixelstep+info2.x+OffsetGlobal] +
-            sqsum[info2.w*pixelstep+info2.z+OffsetGlobal];
-
-        mean *= correction;
-        sq *= correction;
-
-        variance_norm_factor = sq - mean * mean;
-        variance_norm_factor = (variance_norm_factor >=0.f) ? sqrt(variance_norm_factor) : 1.f;
-    }// end calc variance_norm_factor for all stages
-
-    int result = (1.0f>0.0f);
-    for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ )
-    {// iterate until candidate is valid
-        float   stage_sum = 0.0f;
-        __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
-            ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
-        int     lcl_off = (yl*DATA_SIZE_X)+(xl);
-        int stagecount = stageinfo->count;
-        float stagethreshold = stageinfo->threshold;
-        for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ )
-        {
-        // simple macro to extract shorts from int
-#define M0(_t) ((_t)&0xFFFF)
-#define M1(_t) (((_t)>>16)&0xFFFF)
-            // load packed node data from global memory (L3) into registers
-            global const int4* pN = (__global int4*)(pNodesPK+nodecounter*NODE_SIZE);
-            int4    n0 = pN[0];
-            int4    n1 = pN[1];
-            int4    n2 = pN[2];
-            float   nodethreshold  = as_float(n2.y) * variance_norm_factor;
-            // calc sum of intensity pixels according to classifier node information
-            float classsum =
-                (SumL[M0(n0.x)+lcl_off] - SumL[M1(n0.x)+lcl_off] - SumL[M0(n0.y)+lcl_off] + SumL[M1(n0.y)+lcl_off]) * as_float(n1.z) +
-                (SumL[M0(n0.z)+lcl_off] - SumL[M1(n0.z)+lcl_off] - SumL[M0(n0.w)+lcl_off] + SumL[M1(n0.w)+lcl_off]) * as_float(n1.w) +
-                (SumL[M0(n1.x)+lcl_off] - SumL[M1(n1.x)+lcl_off] - SumL[M0(n1.y)+lcl_off] + SumL[M1(n1.y)+lcl_off]) * as_float(n2.x);
-            //accumulate stage responce
-            stage_sum += (classsum >= nodethreshold) ? as_float(n2.w) : as_float(n2.z);
-        }
-        result = (stage_sum >= stagethreshold);
-    }// next stage if needed
-
-    if(result)
-    {// all stages will be passed and there is a detected face on the tested position
-        int index = 1+atomic_inc((volatile global int*)candidate); //get index to write global data with face info
-        if(index<OUTPUTSZ)
-        {
-            int     x = GroupX+xl;
-            int     y = GroupY+yl;
-            int4 candidate_result;
-            candidate_result.x = convert_int_rtn(x*ScaleFactor);
-            candidate_result.y = convert_int_rtn(y*ScaleFactor);
-            candidate_result.z = convert_int_rtn(ScaleFactor*WND_SIZE_X);
-            candidate_result.w = convert_int_rtn(ScaleFactor*WND_SIZE_Y);
-            candidate[index] = candidate_result;
-        }
-    }
-}//end gpuRunHaarClassifierCascade
-#else
-
-__kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(
-    global GpuHidHaarStageClassifier * stagecascadeptr,
-    global int4 * info,
-    global GpuHidHaarTreeNode * nodeptr,
-    global const int * restrict sum1,
-    global const float * restrict sqsum1,
-    global int4 * candidate,
-    const int pixelstep,
-    const int loopcount,
-    const int start_stage,
-    const int split_stage,
-    const int end_stage,
-    const int startnode,
-    const int splitnode,
-    const int4 p,
-    const int4 pq,
-    const float correction)
-{
-    int grpszx = get_local_size(0);
-    int grpszy = get_local_size(1);
-    int grpnumx = get_num_groups(0);
-    int grpidx = get_group_id(0);
-    int lclidx = get_local_id(0);
-    int lclidy = get_local_id(1);
-
-    int lcl_sz = mul24(grpszx,grpszy);
-    int lcl_id = mad24(lclidy,grpszx,lclidx);
-
-    __local int lclshare[1024];
-    __local int* lcldata = lclshare;//for save win data
-    __local int* glboutindex = lcldata + 28*28;//for save global out index
-    __local int* lclcount = glboutindex + 1;//for save the numuber of temp pass pixel
-    __local int* lcloutindex = lclcount + 1;//for save info of temp pass pixel
-    __local float* partialsum = (__local float*)(lcloutindex + (lcl_sz<<1));
-    glboutindex[0]=0;
-    int outputoff = mul24(grpidx,256);
-
-    //assume window size is 20X20
-#define WINDOWSIZE 20+1
-    //make sure readwidth is the multiple of 4
-    //ystep =1, from host code
-    int readwidth = ((grpszx-1 + WINDOWSIZE+3)>>2)<<2;
-    int readheight = grpszy-1+WINDOWSIZE;
-    int read_horiz_cnt = readwidth >> 2;//each read int4
-    int total_read = mul24(read_horiz_cnt,readheight);
-    int read_loop = (total_read + lcl_sz - 1) >> 6;
-    candidate[outputoff+(lcl_id<<2)] = (int4)0;
-    candidate[outputoff+(lcl_id<<2)+1] = (int4)0;
-    candidate[outputoff+(lcl_id<<2)+2] = (int4)0;
-    candidate[outputoff+(lcl_id<<2)+3] = (int4)0;
-    for(int scalei = 0; scalei <loopcount; scalei++)
-    {
-        int4 scaleinfo1= info[scalei];
-        int height = scaleinfo1.x & 0xffff;
-        int grpnumperline =(scaleinfo1.y & 0xffff0000) >> 16;
-        int totalgrp = scaleinfo1.y & 0xffff;
-        int imgoff = scaleinfo1.z;
-        float factor = as_float(scaleinfo1.w);
-
-        __global const int * sum = sum1 + imgoff;
-        __global const float * sqsum = sqsum1 + imgoff;
-        for(int grploop=grpidx; grploop<totalgrp; grploop+=grpnumx)
-        {
-            int grpidy = grploop / grpnumperline;
-            int grpidx = grploop - mul24(grpidy, grpnumperline);
-            int x = mad24(grpidx,grpszx,lclidx);
-            int y = mad24(grpidy,grpszy,lclidy);
-            int grpoffx = x-lclidx;
-            int grpoffy = y-lclidy;
-
-            for(int i=0; i<read_loop; i++)
-            {
-                int pos_id = mad24(i,lcl_sz,lcl_id);
-                pos_id = pos_id < total_read ? pos_id : 0;
-
-                int lcl_y = pos_id / read_horiz_cnt;
-                int lcl_x = pos_id - mul24(lcl_y, read_horiz_cnt);
-
-                int glb_x = grpoffx + (lcl_x<<2);
-                int glb_y = grpoffy + lcl_y;
-
-                int glb_off = mad24(min(glb_y, height + WINDOWSIZE - 1),pixelstep,glb_x);
-                int4 data = *(__global int4*)&sum[glb_off];
-                int lcl_off = mad24(lcl_y, readwidth, lcl_x<<2);
-
-                vstore4(data, 0, &lcldata[lcl_off]);
-            }
-
-            lcloutindex[lcl_id] = 0;
-            lclcount[0] = 0;
-            int result = 1;
-            int nodecounter= startnode;
-            float mean, variance_norm_factor;
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            int lcl_off = mad24(lclidy,readwidth,lclidx);
-            int4 cascadeinfo1, cascadeinfo2;
-            cascadeinfo1 = p;
-            cascadeinfo2 = pq;
-
-            cascadeinfo1.x +=lcl_off;
-            cascadeinfo1.z +=lcl_off;
-            mean = (lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.x)] - lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.z)] -
-                    lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.x)] + lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.z)])
-                    *correction;
-
-            int p_offset = mad24(y, pixelstep, x);
-
-            cascadeinfo2.x +=p_offset;
-            cascadeinfo2.z +=p_offset;
-            variance_norm_factor =sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.x)] - sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.z)] -
-                                    sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.x)] + sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.z)];
-
-            variance_norm_factor = variance_norm_factor * correction - mean * mean;
-            variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1.f;
-
-            for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ )
-            {
-                float stage_sum = 0.f;
-                __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
-                    ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
-                int stagecount = stageinfo->count;
-                float stagethreshold = stageinfo->threshold;
-                for(int nodeloop = 0; nodeloop < stagecount; )
-                {
-                    __global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
-                        (((__global uchar*)nodeptr) + nodecounter * sizeof(GpuHidHaarTreeNode));
-
-                    int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
-                    int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
-                    int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
-                    float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
-                    float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
-
-                    float nodethreshold  = w.w * variance_norm_factor;
-
-                    info1.x +=lcl_off;
-                    info1.z +=lcl_off;
-                    info2.x +=lcl_off;
-                    info2.z +=lcl_off;
-
-                    float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
-                                        lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
-
-                    classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
-                                    lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
-
-                    info3.x +=lcl_off;
-                    info3.z +=lcl_off;
-                    classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
-                                    lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
-
-                    bool passThres = classsum >= nodethreshold;
-#if STUMP_BASED
-                    stage_sum += passThres ? alpha3.y : alpha3.x;
-                    nodecounter++;
-                    nodeloop++;
-#else
-                    bool isRootNode = (nodecounter & 1) == 0;
-                    if(isRootNode)
-                    {
-                        if( (passThres && currentnodeptr->right) ||
-                            (!passThres && currentnodeptr->left))
-                        {
-                            nodecounter ++;
-                        }
-                        else
-                        {
-                            stage_sum += alpha3.x;
-                            nodecounter += 2;
-                            nodeloop ++;
-                        }
-                    }
-                    else
-                    {
-                        stage_sum += passThres ? alpha3.z : alpha3.y;
-                        nodecounter ++;
-                        nodeloop ++;
-                    }
-#endif
-                }
-
-                result = (stage_sum >= stagethreshold) ? 1 : 0;
-            }
-            if(factor < 2)
-            {
-                if(result && lclidx %2 ==0 && lclidy %2 ==0 )
-                {
-                    int queueindex = atomic_inc(lclcount);
-                    lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx;
-                    lcloutindex[(queueindex<<1)+1] = as_int((float)variance_norm_factor);
-                }
-            }
-            else
-            {
-                if(result)
-                {
-                    int queueindex = atomic_inc(lclcount);
-                    lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx;
-                    lcloutindex[(queueindex<<1)+1] = as_int((float)variance_norm_factor);
-                }
-            }
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int queuecount  = lclcount[0];
-            barrier(CLK_LOCAL_MEM_FENCE);
-            nodecounter = splitnode;
-            for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++)
-            {
-                lclcount[0]=0;
-                barrier(CLK_LOCAL_MEM_FENCE);
-
-                //int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
-                __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
-                    ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
-                int stagecount = stageinfo->count;
-                float stagethreshold = stageinfo->threshold;
-
-                int perfscale = queuecount > 4 ? 3 : 2;
-                int queuecount_loop = (queuecount + (1<<perfscale)-1) >> perfscale;
-                int lcl_compute_win = lcl_sz >> perfscale;
-                int lcl_compute_win_id = (lcl_id >>(6-perfscale));
-                int lcl_loops = (stagecount + lcl_compute_win -1) >> (6-perfscale);
-                int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale));
-                for(int queueloop=0; queueloop<queuecount_loop; queueloop++)
-                {
-                    float stage_sum = 0.f;
-                    int temp_coord = lcloutindex[lcl_compute_win_id<<1];
-                    float variance_norm_factor = as_float(lcloutindex[(lcl_compute_win_id<<1)+1]);
-                    int queue_pixel = mad24(((temp_coord  & (int)0xffff0000)>>16),readwidth,temp_coord & 0xffff);
-
-                    if(lcl_compute_win_id < queuecount)
-                    {
-                        int tempnodecounter = lcl_compute_id;
-                        float part_sum = 0.f;
-                        const int stump_factor = STUMP_BASED ? 1 : 2;
-                        int root_offset = 0;
-                        for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stagecount;)
-                        {
-                            __global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
-                                    (((__global uchar*)nodeptr) + sizeof(GpuHidHaarTreeNode) * ((nodecounter + tempnodecounter) * stump_factor + root_offset));
-
-                            int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
-                            int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
-                            int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
-                            float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
-                            float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
-                            float nodethreshold  = w.w * variance_norm_factor;
-
-                            info1.x +=queue_pixel;
-                            info1.z +=queue_pixel;
-                            info2.x +=queue_pixel;
-                            info2.z +=queue_pixel;
-
-                            float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
-                                                lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
-
-
-                            classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
-                                            lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
-
-                            info3.x +=queue_pixel;
-                            info3.z +=queue_pixel;
-                            classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
-                                            lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
-
-                            bool passThres = classsum >= nodethreshold;
-#if STUMP_BASED
-                            part_sum += passThres ? alpha3.y : alpha3.x;
-                            tempnodecounter += lcl_compute_win;
-                            lcl_loop++;
-#else
-                            if(root_offset == 0)
-                            {
-                                if( (passThres && currentnodeptr->right) ||
-                                    (!passThres && currentnodeptr->left))
-                                {
-                                    root_offset = 1;
-                                }
-                                else
-                                {
-                                    part_sum += alpha3.x;
-                                    tempnodecounter += lcl_compute_win;
-                                    lcl_loop++;
-                                }
-                            }
-                            else
-                            {
-                                part_sum += passThres ? alpha3.z : alpha3.y;
-                                tempnodecounter += lcl_compute_win;
-                                lcl_loop++;
-                                root_offset = 0;
-                            }
-#endif
-                        }//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
-                        partialsum[lcl_id]=part_sum;
-                    }
-                    barrier(CLK_LOCAL_MEM_FENCE);
-                    if(lcl_compute_win_id < queuecount)
-                    {
-                        for(int i=0; i<lcl_compute_win && (lcl_compute_id==0); i++)
-                        {
-                            stage_sum += partialsum[lcl_id+i];
-                        }
-                        if(stage_sum >= stagethreshold && (lcl_compute_id==0))
-                        {
-                            int queueindex = atomic_inc(lclcount);
-                            lcloutindex[queueindex<<1] = temp_coord;
-                            lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
-                        }
-                        lcl_compute_win_id +=(1<<perfscale);
-                    }
-                    barrier(CLK_LOCAL_MEM_FENCE);
-                }//end for(int queueloop=0;queueloop<queuecount_loop;queueloop++)
-
-                queuecount = lclcount[0];
-                barrier(CLK_LOCAL_MEM_FENCE);
-                nodecounter += stagecount;
-            }//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++)
-
-            if(lcl_id<queuecount)
-            {
-                int temp = lcloutindex[lcl_id<<1];
-                int x = mad24(grpidx,grpszx,temp & 0xffff);
-                int y = mad24(grpidy,grpszy,((temp & (int)0xffff0000) >> 16));
-                temp = glboutindex[0];
-                int4 candidate_result;
-                candidate_result.zw = (int2)convert_int_rte(factor*20.f);
-                candidate_result.x = convert_int_rte(x*factor);
-                candidate_result.y = convert_int_rte(y*factor);
-                atomic_inc(glboutindex);
-
-                int i = outputoff+temp+lcl_id;
-                if(candidate[i].z == 0)
-                {
-                    candidate[i] = candidate_result;
-                }
-                else
-                {
-                    for(i=i+1;;i++)
-                    {
-                        if(candidate[i].z == 0)
-                        {
-                            candidate[i] = candidate_result;
-                            break;
-                        }
-                    }
-                }
-            }
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }//end for(int grploop=grpidx;grploop<totalgrp;grploop+=grpnumx)
-    }//end for(int scalei = 0; scalei <loopcount; scalei++)
-}
-#endif
diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
deleted file mode 100644
index 09a2676..0000000
--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ /dev/null
@@ -1,323 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Wu Xinglong, wxl370@126.com
-//    Sen Liu, swjtuls1987@126.com
-//    Peng Xiao, pengxiao@outlook.com
-//    Erping Pang, erping@multicorewareinc.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#define CV_HAAR_FEATURE_MAX           3
-typedef int   sumtype;
-typedef float sqsumtype;
-
-typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode
-{
-    int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned(64)));
-    float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
-    float threshold /*__attribute__((aligned (4)))*/;
-    float alpha[3] __attribute__((aligned(16)));
-    int left __attribute__((aligned(4)));
-    int right __attribute__((aligned(4)));
-}
-GpuHidHaarTreeNode;
-//typedef struct __attribute__((aligned(32))) GpuHidHaarClassifier
-//{
-//    int count __attribute__((aligned(4)));
-//    GpuHidHaarTreeNode *node __attribute__((aligned(8)));
-//    float *alpha __attribute__((aligned(8)));
-//}
-//GpuHidHaarClassifier;
-typedef struct __attribute__((aligned(64))) GpuHidHaarStageClassifier
-{
-    int  count __attribute__((aligned(4)));
-    float threshold __attribute__((aligned(4)));
-    int two_rects __attribute__((aligned(4)));
-    int reserved0 __attribute__((aligned(8)));
-    int reserved1 __attribute__((aligned(8)));
-    int reserved2 __attribute__((aligned(8)));
-    int reserved3 __attribute__((aligned(8)));
-}
-GpuHidHaarStageClassifier;
-//typedef struct __attribute__((aligned(64))) GpuHidHaarClassifierCascade
-//{
-//    int  count __attribute__((aligned(4)));
-//    int  is_stump_based __attribute__((aligned(4)));
-//    int  has_tilted_features __attribute__((aligned(4)));
-//    int  is_tree __attribute__((aligned(4)));
-//    int pq0 __attribute__((aligned(4)));
-//    int pq1 __attribute__((aligned(4)));
-//    int pq2 __attribute__((aligned(4)));
-//    int pq3 __attribute__((aligned(4)));
-//    int p0 __attribute__((aligned(4)));
-//    int p1 __attribute__((aligned(4)));
-//    int p2 __attribute__((aligned(4)));
-//    int p3 __attribute__((aligned(4)));
-//    float inv_window_area __attribute__((aligned(4)));
-//} GpuHidHaarClassifierCascade;
-
-__kernel void gpuRunHaarClassifierCascade_scaled2(
-    global GpuHidHaarStageClassifier *stagecascadeptr_,
-    global int4 *info,
-    global GpuHidHaarTreeNode *nodeptr_,
-    global const int *restrict sum,
-    global const float *restrict sqsum,
-    global int4 *candidate,
-    const int rows,
-    const int cols,
-    const int step,
-    const int loopcount,
-    const int start_stage,
-    const int split_stage,
-    const int end_stage,
-    const int startnode,
-    global int4 *p,
-    global float *correction,
-    const int nodecount)
-{
-    int grpszx = get_local_size(0);
-    int grpszy = get_local_size(1);
-    int grpnumx = get_num_groups(0);
-    int grpidx = get_group_id(0);
-    int lclidx = get_local_id(0);
-    int lclidy = get_local_id(1);
-    int lcl_id = mad24(lclidy, grpszx, lclidx);
-    __local int glboutindex[1];
-    __local int lclcount[1];
-    __local int lcloutindex[64];
-    glboutindex[0] = 0;
-    int outputoff = mul24(grpidx, 256);
-    candidate[outputoff + (lcl_id << 2)] = (int4)0;
-    candidate[outputoff + (lcl_id << 2) + 1] = (int4)0;
-    candidate[outputoff + (lcl_id << 2) + 2] = (int4)0;
-    candidate[outputoff + (lcl_id << 2) + 3] = (int4)0;
-    int max_idx = rows * cols - 1;
-    for (int scalei = 0; scalei < loopcount; scalei++)
-    {
-        int4 scaleinfo1 = info[scalei];
-        int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
-        int totalgrp = scaleinfo1.y & 0xffff;
-        float factor = as_float(scaleinfo1.w);
-        float correction_t = correction[scalei];
-        float ystep = max(2.0f, factor);
-
-        for (int grploop = get_group_id(0); grploop < totalgrp; grploop += grpnumx)
-        {
-            int4 cascadeinfo = p[scalei];
-            int grpidy = grploop / grpnumperline;
-            int grpidx = grploop - mul24(grpidy, grpnumperline);
-            int ix = mad24(grpidx, grpszx, lclidx);
-            int iy = mad24(grpidy, grpszy, lclidy);
-            int x = round(ix * ystep);
-            int y = round(iy * ystep);
-            lcloutindex[lcl_id] = 0;
-            lclcount[0] = 0;
-            int nodecounter;
-            float mean, variance_norm_factor;
-            //if((ix < width) && (iy < height))
-            {
-                const int p_offset = mad24(y, step, x);
-                cascadeinfo.x += p_offset;
-                cascadeinfo.z += p_offset;
-                mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
-                - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
-                        sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
-                + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
-                       * correction_t;
-                variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
-                - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
-                                       sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
-                + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
-                variance_norm_factor = variance_norm_factor * correction_t - mean * mean;
-                variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f;
-                bool result = true;
-                nodecounter = startnode + nodecount * scalei;
-                for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
-                {
-                    float stage_sum = 0.f;
-                    __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
-                        (((__global uchar*)stagecascadeptr_)+stageloop*sizeof(GpuHidHaarStageClassifier));
-                    int stagecount = stageinfo->count;
-                    for (int nodeloop = 0; nodeloop < stagecount;)
-                    {
-                        __global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
-                            (((__global uchar*)nodeptr_) + nodecounter * sizeof(GpuHidHaarTreeNode));
-                        int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
-                        int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
-                        int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
-                        float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
-                        float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
-                        float nodethreshold  = w.w * variance_norm_factor;
-
-                        info1.x += p_offset;
-                        info1.z += p_offset;
-                        info2.x += p_offset;
-                        info2.z += p_offset;
-                        info3.x += p_offset;
-                        info3.z += p_offset;
-                        float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)]
-                        - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
-                                          sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)]
-                        + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
-                        classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)]
-                        - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
-                                     sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)]
-                        + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
-                        classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)]
-                        - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
-                                     sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
-                        + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
-
-                        bool passThres = (classsum >= nodethreshold) ? 1 : 0;
-
-#if STUMP_BASED
-                        stage_sum += passThres ? alpha3.y : alpha3.x;
-                        nodecounter++;
-                        nodeloop++;
-#else
-                        bool isRootNode = (nodecounter & 1) == 0;
-                        if(isRootNode)
-                        {
-                            if( (passThres && currentnodeptr->right) ||
-                                (!passThres && currentnodeptr->left))
-                            {
-                                nodecounter ++;
-                            }
-                            else
-                            {
-                                stage_sum += alpha3.x;
-                                nodecounter += 2;
-                                nodeloop ++;
-                            }
-                        }
-                        else
-                        {
-                            stage_sum += (passThres ? alpha3.z : alpha3.y);
-                            nodecounter ++;
-                            nodeloop ++;
-                        }
-#endif
-                    }
-
-                    result = (stage_sum >= stageinfo->threshold) ? 1 : 0;
-                }
-
-                barrier(CLK_LOCAL_MEM_FENCE);
-
-                if (result)
-                {
-                    int queueindex = atomic_inc(lclcount);
-                    lcloutindex[queueindex] = (y << 16) | x;
-                }
-                barrier(CLK_LOCAL_MEM_FENCE);
-                int queuecount = lclcount[0];
-
-                if (lcl_id < queuecount)
-                {
-                    int temp = lcloutindex[lcl_id];
-                    int x = temp & 0xffff;
-                    int y = (temp & (int)0xffff0000) >> 16;
-                    temp = atomic_inc(glboutindex);
-                    int4 candidate_result;
-                    candidate_result.zw = (int2)convert_int_rte(factor * 20.f);
-                    candidate_result.x = x;
-                    candidate_result.y = y;
-
-                    int i = outputoff+temp+lcl_id;
-                    if(candidate[i].z == 0)
-                    {
-                        candidate[i] = candidate_result;
-                    }
-                    else
-                    {
-                        for(i=i+1;;i++)
-                        {
-                            if(candidate[i].z == 0)
-                            {
-                                candidate[i] = candidate_result;
-                                break;
-                            }
-                        }
-                    }
-                }
-
-                barrier(CLK_LOCAL_MEM_FENCE);
-            }
-        }
-    }
-}
-__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, const int nodenum)
-{
-    const int counter = get_global_id(0);
-    int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
-    GpuHidHaarTreeNode t1 = *(__global GpuHidHaarTreeNode*)
-        (((__global uchar*)orinode) + counter * sizeof(GpuHidHaarTreeNode));
-    __global GpuHidHaarTreeNode* pNew = (__global GpuHidHaarTreeNode*)
-        (((__global uchar*)newnode) + (counter + nodenum) * sizeof(GpuHidHaarTreeNode));
-
-    #pragma unroll
-    for (i = 0; i < 3; i++)
-    {
-        tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
-        tr_y[i] = (int)(t1.p[i][1] * scale + 0.5f);
-        tr_w[i] = (int)(t1.p[i][2] * scale + 0.5f);
-        tr_h[i] = (int)(t1.p[i][3] * scale + 0.5f);
-    }
-
-    t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
-
-    #pragma unroll
-    for (i = 0; i < 3; i++)
-    {
-        pNew->p[i][0] = tr_x[i];
-        pNew->p[i][1] = tr_y[i];
-        pNew->p[i][2] = tr_x[i] + tr_w[i];
-        pNew->p[i][3] = tr_y[i] + tr_h[i];
-        pNew->weight[i] = t1.weight[i] * weight_scale;
-    }
-
-    pNew->left = t1.left;
-    pNew->right = t1.right;
-    pNew->threshold = t1.threshold;
-    pNew->alpha[0] = t1.alpha[0];
-    pNew->alpha[1] = t1.alpha[1];
-    pNew->alpha[2] = t1.alpha[2];
-}
diff --git a/modules/ocl/src/opencl/imgproc_bilateral.cl b/modules/ocl/src/opencl/imgproc_bilateral.cl
deleted file mode 100644
index cb317a0..0000000
--- a/modules/ocl/src/opencl/imgproc_bilateral.cl
+++ /dev/null
@@ -1,145 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Rock Li, Rock.li@amd.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-
-__kernel void bilateral_C1_D0(__global uchar *dst,
-        __global const uchar *src,
-        const int dst_rows,
-        const int dst_cols,
-        const int maxk,
-        const int radius,
-        const int dst_step,
-        const int dst_offset,
-        const int src_step,
-        const int src_rows,
-        const int src_cols,
-        __constant float *color_weight,
-        __constant float *space_weight,
-        __constant int *space_ofs)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < dst_rows && x < dst_cols)
-    {
-        int src_index = mad24(y + radius, src_step, x + radius);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-        float sum = 0.f, wsum = 0.f;
-
-        int val0 = (int)src[src_index];
-        for(int k = 0; k < maxk; k++ )
-        {
-            int val = (int)src[src_index + space_ofs[k]];
-            float w = space_weight[k] * color_weight[abs(val - val0)];
-            sum += (float)(val) * w;
-            wsum += w;
-        }
-        dst[dst_index] = convert_uchar_rtz(sum / wsum + 0.5f);
-    }
-}
-
-__kernel void bilateral2_C1_D0(__global uchar *dst,
-        __global const uchar *src,
-        const int dst_rows,
-        const int dst_cols,
-        const int maxk,
-        const int radius,
-        const int dst_step,
-        const int dst_offset,
-        const int src_step,
-        const int src_rows,
-        const int src_cols,
-        __constant float *color_weight,
-        __constant float *space_weight,
-        __constant int *space_ofs)
-{
-    int x = get_global_id(0) << 2;
-    int y = get_global_id(1);
-
-    if (y < dst_rows && x < dst_cols)
-    {
-        int src_index = mad24(y + radius, src_step, x + radius);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-        float4 sum = (float4)(0.f), wsum = (float4)(0.f);
-
-        int4 val0 = convert_int4(vload4(0,src + src_index));
-        for(int k = 0; k < maxk; k++ )
-        {
-            int4 val = convert_int4(vload4(0,src+src_index + space_ofs[k]));
-            float4 w = (float4)(space_weight[k]) * (float4)(color_weight[abs(val.x - val0.x)], color_weight[abs(val.y - val0.y)],
-                color_weight[abs(val.z - val0.z)], color_weight[abs(val.w - val0.w)]);
-            sum += convert_float4(val) * w;
-            wsum += w;
-        }
-        *(__global uchar4*)(dst+dst_index) = convert_uchar4_rtz(sum/wsum+0.5f);
-    }
-}
-
-__kernel void bilateral_C4_D0(__global uchar4 *dst,
-        __global const uchar4 *src,
-        const int dst_rows,
-        const int dst_cols,
-        const int maxk,
-        const int radius,
-        const int dst_step,
-        const int dst_offset,
-        const int src_step,
-        const int src_rows,
-        const int src_cols,
-        __constant float *color_weight,
-        __constant float *space_weight,
-        __constant int *space_ofs)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < dst_rows && x < dst_cols)
-    {
-        int src_index = mad24(y + radius, src_step, x + radius);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-        float4 sum = (float4)0.f;
-        float wsum = 0.f;
-
-        int4 val0 = convert_int4(src[src_index]);
-        for(int k = 0; k < maxk; k++ )
-        {
-            int4 val = convert_int4(src[src_index + space_ofs[k]]);
-            float w = space_weight[k] * color_weight[abs(val.x - val0.x) + abs(val.y - val0.y) + abs(val.z - val0.z)];
-            sum += convert_float4(val) * (float4)w;
-            wsum += w;
-        }
-
-        wsum = 1.f / wsum;
-        dst[dst_index] = convert_uchar4_rtz(sum * (float4)wsum + (float4)0.5f);
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_calcHarris.cl b/modules/ocl/src/opencl/imgproc_calcHarris.cl
deleted file mode 100644
index 4fc1792..0000000
--- a/modules/ocl/src/opencl/imgproc_calcHarris.cl
+++ /dev/null
@@ -1,214 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////Macro for border type////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////
-
-#if defined (DOUBLE_SUPPORT) && defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#define FPTYPE double
-#else
-#define FPTYPE float
-#endif
-
-#ifdef BORDER_CONSTANT
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        x = max(min(x, maxV - 1), 0); \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        if (x < 0) \
-            x -= ((x - maxV + 1) / maxV) * maxV; \
-        if (x >= maxV) \
-            x %= maxV; \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT101)
-#define EXTRAPOLATE_(x, maxV, delta) \
-    { \
-        if (maxV == 1) \
-            x = 0; \
-        else \
-            do \
-            { \
-                if ( x < 0 ) \
-                    x = -x - 1 + delta; \
-                else \
-                    x = maxV - 1 - (x - maxV) - delta; \
-            } \
-            while (x >= maxV || x < 0); \
-    }
-#ifdef BORDER_REFLECT
-#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0)
-#else
-#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1)
-#endif
-#else
-#error No extrapolation method
-#endif
-
-#define THREADS 256
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////calcHarris////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void calcHarris(__global const float *Dx, __global const float *Dy, __global float *dst,
-                         int dx_offset, int dx_whole_rows, int dx_whole_cols, int dx_step,
-                         int dy_offset, int dy_whole_rows, int dy_whole_cols, int dy_step,
-                         int dst_offset, int dst_rows, int dst_cols, int dst_step, float k)
-{
-    int col = get_local_id(0);
-    int gX = get_group_id(0);
-    int gY = get_group_id(1);
-    int gly = get_global_id(1);
-
-    int dx_x_off = (dx_offset % dx_step) >> 2;
-    int dx_y_off = dx_offset / dx_step;
-    int dy_x_off = (dy_offset % dy_step) >> 2;
-    int dy_y_off = dy_offset / dy_step;
-    int dst_x_off = (dst_offset % dst_step) >> 2;
-    int dst_y_off = dst_offset / dst_step;
-
-    int dx_startX = gX * (THREADS-ksX+1) - anX + dx_x_off;
-    int dx_startY = (gY << 1) - anY + dx_y_off;
-    int dy_startX = gX * (THREADS-ksX+1) - anX + dy_x_off;
-    int dy_startY = (gY << 1) - anY + dy_y_off;
-    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
-    int dst_startY = (gY << 1) + dst_y_off;
-
-    float dx_data[ksY+1],dy_data[ksY+1], data[3][ksY+1];
-    __local FPTYPE temp[6][THREADS];
-
-#ifdef BORDER_CONSTANT
-    for (int i=0; i < ksY+1; i++)
-    {
-        bool dx_con = dx_startX+col >= 0 && dx_startX+col < dx_whole_cols && dx_startY+i >= 0 && dx_startY+i < dx_whole_rows;
-        int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
-        float dx_s = dx_con ? Dx[indexDx] : 0.0f;
-        dx_data[i] = dx_s;
-
-        bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
-        int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
-        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
-        dy_data[i] = dy_s;
-
-        data[0][i] = dx_data[i] * dx_data[i];
-        data[1][i] = dx_data[i] * dy_data[i];
-        data[2][i] = dy_data[i] * dy_data[i];
-    }
-#else
-    int clamped_col = min(2*dst_cols, col);
-    for (int i=0; i < ksY+1; i++)
-    {
-        int dx_selected_row = dx_startY+i, dx_selected_col = dx_startX+clamped_col;
-        EXTRAPOLATE(dx_selected_row, dx_whole_rows)
-        EXTRAPOLATE(dx_selected_col, dx_whole_cols)
-        dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
-
-        int dy_selected_row = dy_startY+i, dy_selected_col = dy_startX+clamped_col;
-        EXTRAPOLATE(dy_selected_row, dy_whole_rows)
-        EXTRAPOLATE(dy_selected_col, dy_whole_cols)
-        dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
-
-        data[0][i] = dx_data[i] * dx_data[i];
-        data[1][i] = dx_data[i] * dy_data[i];
-        data[2][i] = dy_data[i] * dy_data[i];
-    }
-#endif
-    FPTYPE sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f;
-    for (int i=1; i < ksY; i++)
-    {
-        sum0 += data[0][i];
-        sum1 += data[1][i];
-        sum2 += data[2][i];
-    }
-
-    FPTYPE sum01 = sum0 + data[0][0];
-    FPTYPE sum02 = sum0 + data[0][ksY];
-    temp[0][col] = sum01;
-    temp[1][col] = sum02;
-    FPTYPE sum11 = sum1 + data[1][0];
-    FPTYPE sum12 = sum1 + data[1][ksY];
-    temp[2][col] = sum11;
-    temp[3][col] = sum12;
-    FPTYPE sum21 = sum2 + data[2][0];
-    FPTYPE sum22 = sum2 + data[2][ksY];
-    temp[4][col] = sum21;
-    temp[5][col] = sum22;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (col < (THREADS- (ksX - 1)))
-    {
-        col += anX;
-        int posX = dst_startX - dst_x_off + col - anX;
-        int posY = (gly << 1);
-        int till = (ksX + 1)%2;
-        float tmp_sum[6] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
-        for (int k=0; k<6; k++)
-        {
-            FPTYPE temp_sum = 0;
-            for (int i=-anX; i<=anX - till; i++)
-            {
-                temp_sum += temp[k][col+i];
-            }
-            tmp_sum[k] = temp_sum;
-        }
-
-        if (posX < dst_cols && (posY) < dst_rows)
-        {
-            dst[(dst_startY+0) * (dst_step>>2)+ dst_startX + col - anX] =
-                    tmp_sum[0] * tmp_sum[4] - tmp_sum[2] * tmp_sum[2] - k * (tmp_sum[0] + tmp_sum[4]) * (tmp_sum[0] + tmp_sum[4]);
-        }
-        if (posX < dst_cols && (posY + 1) < dst_rows)
-        {
-            dst[(dst_startY+1) * (dst_step>>2)+ dst_startX + col - anX] =
-                    tmp_sum[1] * tmp_sum[5] - tmp_sum[3] * tmp_sum[3] - k * (tmp_sum[1] + tmp_sum[5]) * (tmp_sum[1] + tmp_sum[5]);
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
deleted file mode 100644
index 7cb4c8f..0000000
--- a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
+++ /dev/null
@@ -1,204 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////Macro for border type////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef BORDER_CONSTANT
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        x = max(min(x, maxV - 1), 0); \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        if (x < 0) \
-            x -= ((x - maxV + 1) / maxV) * maxV; \
-        if (x >= maxV) \
-            x %= maxV; \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT101)
-#define EXTRAPOLATE_(x, maxV, delta) \
-    { \
-        if (maxV == 1) \
-            x = 0; \
-        else \
-            do \
-            { \
-                if ( x < 0 ) \
-                    x = -x - 1 + delta; \
-                else \
-                    x = maxV - 1 - (x - maxV) - delta; \
-            } \
-            while (x >= maxV || x < 0); \
-    }
-#ifdef BORDER_REFLECT
-#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0)
-#else
-#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1)
-#endif
-#else
-#error No extrapolation method
-#endif
-
-#define THREADS 256
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////calcHarris////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy, __global float *dst,
-                              int dx_offset, int dx_whole_rows, int dx_whole_cols, int dx_step,
-                              int dy_offset, int dy_whole_rows, int dy_whole_cols, int dy_step,
-                              int dst_offset, int dst_rows, int dst_cols, int dst_step, float k)
-{
-    int col = get_local_id(0);
-    int gX = get_group_id(0);
-    int gY = get_group_id(1);
-    int gly = get_global_id(1);
-
-    int dx_x_off = (dx_offset % dx_step) >> 2;
-    int dx_y_off = dx_offset / dx_step;
-    int dy_x_off = (dy_offset % dy_step) >> 2;
-    int dy_y_off = dy_offset / dy_step;
-    int dst_x_off = (dst_offset % dst_step) >> 2;
-    int dst_y_off = dst_offset / dst_step;
-
-    int dx_startX = gX * (THREADS-ksX+1) - anX + dx_x_off;
-    int dx_startY = (gY << 1) - anY + dx_y_off;
-    int dy_startX = gX * (THREADS-ksX+1) - anX + dy_x_off;
-    int dy_startY = (gY << 1) - anY + dy_y_off;
-    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
-    int dst_startY = (gY << 1) + dst_y_off;
-
-    float dx_data[ksY+1], dy_data[ksY+1], data[3][ksY+1];
-    __local float temp[6][THREADS];
-
-#ifdef BORDER_CONSTANT
-    for (int i=0; i < ksY+1; i++)
-    {
-        bool dx_con = dx_startX+col >= 0 && dx_startX+col < dx_whole_cols && dx_startY+i >= 0 && dx_startY+i < dx_whole_rows;
-        int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
-        float dx_s = dx_con ? Dx[indexDx] : 0.0f;
-        dx_data[i] = dx_s;
-
-        bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
-        int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
-        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
-        dy_data[i] = dy_s;
-
-        data[0][i] = dx_data[i] * dx_data[i];
-        data[1][i] = dx_data[i] * dy_data[i];
-        data[2][i] = dy_data[i] * dy_data[i];
-    }
-#else
-    int clamped_col = min(dst_cols, col);
-    for (int i=0; i < ksY+1; i++)
-    {
-        int dx_selected_row = dx_startY+i, dx_selected_col = dx_startX+clamped_col;
-        EXTRAPOLATE(dx_selected_row, dx_whole_rows)
-        EXTRAPOLATE(dx_selected_col, dx_whole_cols)
-        dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
-
-        int dy_selected_row = dy_startY+i, dy_selected_col = dy_startX+clamped_col;
-        EXTRAPOLATE(dy_selected_row, dy_whole_rows)
-        EXTRAPOLATE(dy_selected_col, dy_whole_cols)
-        dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
-
-        data[0][i] = dx_data[i] * dx_data[i];
-        data[1][i] = dx_data[i] * dy_data[i];
-        data[2][i] = dy_data[i] * dy_data[i];
-    }
-#endif
-    float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f;
-    for (int i=1; i < ksY; i++)
-    {
-        sum0 += (data[0][i]);
-        sum1 += (data[1][i]);
-        sum2 += (data[2][i]);
-    }
-
-    float sum01 = sum0 + (data[0][0]);
-    float sum02 = sum0 + (data[0][ksY]);
-    temp[0][col] = sum01;
-    temp[1][col] = sum02;
-    float sum11 = sum1 + (data[1][0]);
-    float sum12 = sum1 + (data[1][ksY]);
-    temp[2][col] = sum11;
-    temp[3][col] = sum12;
-    float sum21 = sum2 + (data[2][0]);
-    float sum22 = sum2 + (data[2][ksY]);
-    temp[4][col] = sum21;
-    temp[5][col] = sum22;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(col < (THREADS-(ksX-1)))
-    {
-        col += anX;
-        int posX = dst_startX - dst_x_off + col - anX;
-        int posY = (gly << 1);
-        int till = (ksX + 1)%2;
-        float tmp_sum[6] = { 0.0f, 0.0f , 0.0f, 0.0f, 0.0f, 0.0f };
-        for (int k=0; k<6; k++)
-            for (int i=-anX; i<=anX - till; i++)
-                tmp_sum[k] += temp[k][col+i];
-
-        if(posX < dst_cols && (posY) < dst_rows)
-        {
-            float a = tmp_sum[0] * 0.5f;
-            float b = tmp_sum[2];
-            float c = tmp_sum[4] * 0.5f;
-            dst[(dst_startY+0) * (dst_step>>2)+ dst_startX + col - anX] = (float)((a+c) - sqrt((a-c)*(a-c) + b*b));
-        }
-        if (posX < dst_cols && (posY + 1) < dst_rows)
-        {
-            float a = tmp_sum[1] * 0.5f;
-            float b = tmp_sum[3];
-            float c = tmp_sum[5] * 0.5f;
-            dst[(dst_startY+1) * (dst_step>>2)+ dst_startX + col - anX] = (float)((a+c) - sqrt((a-c)*(a-c) + b*b));
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_canny.cl b/modules/ocl/src/opencl/imgproc_canny.cl
deleted file mode 100644
index 2ddfdae..0000000
--- a/modules/ocl/src/opencl/imgproc_canny.cl
+++ /dev/null
@@ -1,721 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef L2GRAD
-inline float calc(int x, int y)
-{
-    return sqrt((float)(x * x + y * y));
-}
-#else
-inline float calc(int x, int y)
-{
-    return (float)abs(x) + abs(y);
-}
-#endif //
-
-// Smoothing perpendicular to the derivative direction with a triangle filter
-// only support 3x3 Sobel kernel
-// h (-1) =  1, h (0) =  2, h (1) =  1
-// h'(-1) = -1, h'(0) =  0, h'(1) =  1
-// thus sobel 2D operator can be calculated as:
-// h'(x, y) = h'(x)h(y) for x direction
-//
-// src		input 8bit single channel image data
-// dx_buf	output dx buffer
-// dy_buf	output dy buffer
-__kernel
-void
-__attribute__((reqd_work_group_size(16,16,1)))
-calcSobelRowPass
-(
-    __global const uchar * src,
-    __global int * dx_buf,
-    __global int * dy_buf,
-    int rows,
-    int cols,
-    int src_step,
-    int src_offset,
-    int dx_buf_step,
-    int dx_buf_offset,
-    int dy_buf_step,
-    int dy_buf_offset
-)
-{
-    dx_buf_step   /= sizeof(*dx_buf);
-    dx_buf_offset /= sizeof(*dx_buf);
-    dy_buf_step   /= sizeof(*dy_buf);
-    dy_buf_offset /= sizeof(*dy_buf);
-
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    int lidx = get_local_id(0);
-    int lidy = get_local_id(1);
-
-    __local int smem[16][18];
-
-    smem[lidy][lidx + 1] =
-        src[gidx + min(gidy, rows - 1) * src_step + src_offset];
-    if(lidx == 0)
-    {
-        smem[lidy][0]  =
-            src[max(gidx - 1,  0)        + min(gidy, rows - 1) * src_step + src_offset];
-        smem[lidy][17] =
-            src[min(gidx + 16, cols - 1) + min(gidy, rows - 1) * src_step + src_offset];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(gidy < rows && gidx < cols)
-    {
-        dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset] =
-            -smem[lidy][lidx] + smem[lidy][lidx + 2];
-        dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset] =
-            smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2];
-    }
-}
-
-// calculate the magnitude of the filter pass combining both x and y directions
-// This is the buffered version(3x3 sobel)
-//
-// dx_buf		dx buffer, calculated from calcSobelRowPass
-// dy_buf		dy buffer, calculated from calcSobelRowPass
-// dx			direvitive in x direction output
-// dy			direvitive in y direction output
-// mag			magnitude direvitive of xy output
-__kernel
-void
-__attribute__((reqd_work_group_size(16,16,1)))
-calcMagnitude_buf
-(
-    __global const int * dx_buf,
-    __global const int * dy_buf,
-    __global int * dx,
-    __global int * dy,
-    __global float * mag,
-    int rows,
-    int cols,
-    int dx_buf_step,
-    int dx_buf_offset,
-    int dy_buf_step,
-    int dy_buf_offset,
-    int dx_step,
-    int dx_offset,
-    int dy_step,
-    int dy_offset,
-    int mag_step,
-    int mag_offset
-)
-{
-    dx_buf_step    /= sizeof(*dx_buf);
-    dx_buf_offset  /= sizeof(*dx_buf);
-    dy_buf_step    /= sizeof(*dy_buf);
-    dy_buf_offset  /= sizeof(*dy_buf);
-    dx_step    /= sizeof(*dx);
-    dx_offset  /= sizeof(*dx);
-    dy_step    /= sizeof(*dy);
-    dy_offset  /= sizeof(*dy);
-    mag_step   /= sizeof(*mag);
-    mag_offset /= sizeof(*mag);
-
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    int lidx = get_local_id(0);
-    int lidy = get_local_id(1);
-
-    __local int sdx[18][16];
-    __local int sdy[18][16];
-
-    sdx[lidy + 1][lidx] =
-        dx_buf[gidx + min(gidy, rows - 1) * dx_buf_step + dx_buf_offset];
-    sdy[lidy + 1][lidx] =
-        dy_buf[gidx + min(gidy, rows - 1) * dy_buf_step + dy_buf_offset];
-    if(lidy == 0)
-    {
-        sdx[0][lidx]  =
-            dx_buf[gidx + min(max(gidy-1,0),rows-1) * dx_buf_step + dx_buf_offset];
-        sdx[17][lidx] =
-            dx_buf[gidx + min(gidy + 16, rows - 1)  * dx_buf_step + dx_buf_offset];
-
-        sdy[0][lidx]  =
-            dy_buf[gidx + min(max(gidy-1,0),rows-1) * dy_buf_step + dy_buf_offset];
-        sdy[17][lidx] =
-            dy_buf[gidx + min(gidy + 16, rows - 1)  * dy_buf_step + dy_buf_offset];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(gidx < cols && gidy < rows)
-    {
-        int x =  sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx];
-        int y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx];
-
-        dx[gidx + gidy * dx_step + dx_offset] = x;
-        dy[gidx + gidy * dy_step + dy_offset] = y;
-
-        mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y);
-    }
-}
-
-// calculate the magnitude of the filter pass combining both x and y directions
-// This is the non-buffered version(non-3x3 sobel)
-//
-// dx_buf		dx buffer, calculated from calcSobelRowPass
-// dy_buf		dy buffer, calculated from calcSobelRowPass
-// dx			direvitive in x direction output
-// dy			direvitive in y direction output
-// mag			magnitude direvitive of xy output
-__kernel
-void calcMagnitude
-(
-    __global const int * dx,
-    __global const int * dy,
-    __global float * mag,
-    int rows,
-    int cols,
-    int dx_step,
-    int dx_offset,
-    int dy_step,
-    int dy_offset,
-    int mag_step,
-    int mag_offset
-)
-{
-    dx_step    /= sizeof(*dx);
-    dx_offset  /= sizeof(*dx);
-    dy_step    /= sizeof(*dy);
-    dy_offset  /= sizeof(*dy);
-    mag_step   /= sizeof(*mag);
-    mag_offset /= sizeof(*mag);
-
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    if(gidy < rows && gidx < cols)
-    {
-        mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] =
-            calc(
-                dx[gidx + gidy * dx_step + dx_offset],
-                dy[gidx + gidy * dy_step + dy_offset]
-            );
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// 0.4142135623730950488016887242097 is tan(22.5)
-#define CANNY_SHIFT 15
-
-#ifdef DOUBLE_SUPPORT
-    #define TG22        (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
-#else
-    #define TG22        (int)(0.4142135623730950488016887242097f*(1<<CANNY_SHIFT) + 0.5f)
-#endif
-
-//First pass of edge detection and non-maximum suppression
-// edgetype is set to for each pixel:
-// 0 - below low thres, not an edge
-// 1 - maybe an edge
-// 2 - is an edge, either magnitude is greater than high thres, or
-//     Given estimates of the image gradients, a search is then carried out
-//     to determine if the gradient magnitude assumes a local maximum in the gradient direction.
-//     if the rounded gradient angle is zero degrees (i.e. the edge is in the north-south direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the west and east directions,
-//     if the rounded gradient angle is 90 degrees (i.e. the edge is in the east-west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north and south directions,
-//     if the rounded gradient angle is 135 degrees (i.e. the edge is in the north east-south west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north west and south east directions,
-//     if the rounded gradient angle is 45 degrees (i.e. the edge is in the north west-south east direction)the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north east and south west directions.
-//
-// dx, dy		direvitives of x and y direction
-// mag			magnitudes calculated from calcMagnitude function
-// map			output containing raw edge types
-__kernel
-void
-__attribute__((reqd_work_group_size(16,16,1)))
-calcMap
-(
-    __global const int * dx,
-    __global const int * dy,
-    __global const float * mag,
-    __global int * map,
-    int rows,
-    int cols,
-    float low_thresh,
-    float high_thresh,
-    int dx_step,
-    int dx_offset,
-    int dy_step,
-    int dy_offset,
-    int mag_step,
-    int mag_offset,
-    int map_step,
-    int map_offset
-)
-{
-    dx_step    /= sizeof(*dx);
-    dx_offset  /= sizeof(*dx);
-    dy_step    /= sizeof(*dy);
-    dy_offset  /= sizeof(*dy);
-    mag_step   /= sizeof(*mag);
-    mag_offset /= sizeof(*mag);
-    map_step   /= sizeof(*map);
-    map_offset /= sizeof(*map);
-
-    mag += mag_offset;
-    map += map_offset;
-
-    __local float smem[18][18];
-
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    int lidx = get_local_id(0);
-    int lidy = get_local_id(1);
-
-    int grp_idx = get_global_id(0) & 0xFFFFF0;
-    int grp_idy = get_global_id(1) & 0xFFFFF0;
-
-    int tid = lidx + lidy * 16;
-    int lx = tid % 18;
-    int ly = tid / 18;
-    if(ly < 14)
-    {
-        smem[ly][lx] =
-            mag[grp_idx + lx + min(grp_idy + ly, rows - 1) * mag_step];
-    }
-    if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
-    {
-        smem[ly + 14][lx] =
-            mag[grp_idx + lx + min(grp_idy + ly + 14, rows -1) * mag_step];
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(gidy < rows && gidx < cols)
-    {
-        int x = dx[gidx + gidy * dx_step];
-        int y = dy[gidx + gidy * dy_step];
-        const int s = (x ^ y) < 0 ? -1 : 1;
-        const float m = smem[lidy + 1][lidx + 1];
-        x = abs(x);
-        y = abs(y);
-
-        // 0 - the pixel can not belong to an edge
-        // 1 - the pixel might belong to an edge
-        // 2 - the pixel does belong to an edge
-        int edge_type = 0;
-        if(m > low_thresh)
-        {
-            const int tg22x = x * TG22;
-            const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
-            y <<= CANNY_SHIFT;
-            if(y < tg22x)
-            {
-                if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2])
-                {
-                    edge_type = 1 + (int)(m > high_thresh);
-                }
-            }
-            else if (y > tg67x)
-            {
-                if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1])
-                {
-                    edge_type = 1 + (int)(m > high_thresh);
-                }
-            }
-            else
-            {
-                if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s])
-                {
-                    edge_type = 1 + (int)(m > high_thresh);
-                }
-            }
-        }
-        map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
-    }
-}
-
-#undef CANNY_SHIFT
-#undef TG22
-
-struct PtrStepSz {
-    __global int *ptr;
-    int step;
-    int rows, cols;
-};
-inline int get(struct PtrStepSz data, int y, int x) { return *((__global int *)((__global char*)data.ptr + data.step * (y + 1) + sizeof(int) * (x + 1))); }
-inline void set(struct PtrStepSz data, int y, int x, int value) { *((__global int *)((__global char*)data.ptr + data.step * (y + 1) + sizeof(int) * (x + 1))) = value; }
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// do Hysteresis for pixel whose edge type is 1
-//
-// If candidate pixel (edge type is 1) has a neighbour pixel (in 3x3 area) with type 2, it is believed to be part of an edge and
-// marked as edge. Each thread will iterate for 16 times to connect local edges.
-// Candidate pixel being identified as edge will then be tested if there is nearby potiential edge points. If there is, counter will
-// be incremented by 1 and the point location is stored. These potiential candidates will be processed further in next kernel.
-//
-// map		raw edge type results calculated from calcMap.
-// st		the potiential edge points found in this kernel call
-// counter	the number of potiential edge points
-__kernel
-void
-__attribute__((reqd_work_group_size(16,16,1)))
-edgesHysteresisLocal
-(
-    __global int * map_ptr,
-    __global ushort2 * st,
-    __global unsigned int * counter,
-    int rows,
-    int cols,
-    int map_step,
-    int map_offset
-)
-{
-#if 0
-    map_step   /= sizeof(*map);
-    map_offset /= sizeof(*map);
-
-    const __global int* map = map_ptr + map_offset;
-
-    __local int smem[18][18];
-
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    int lidx = get_local_id(0);
-    int lidy = get_local_id(1);
-
-    int grp_idx = get_global_id(0) & 0xFFFFF0;
-    int grp_idy = get_global_id(1) & 0xFFFFF0;
-
-    int tid = lidx + lidy * 16;
-    int lx = tid % 18;
-    int ly = tid / 18;
-    if(ly < 14)
-    {
-        smem[ly][lx] =
-            map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
-    }
-    if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
-    {
-        smem[ly + 14][lx] =
-            map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(gidy < rows && gidx < cols)
-    {
-        int n;
-
-        #pragma unroll
-        for (int k = 0; k < 16; ++k)
-        {
-            n = 0;
-
-            if (smem[lidy + 1][lidx + 1] == 1)
-            {
-                n += smem[lidy    ][lidx    ] == 2;
-                n += smem[lidy    ][lidx + 1] == 2;
-                n += smem[lidy    ][lidx + 2] == 2;
-
-                n += smem[lidy + 1][lidx    ] == 2;
-                n += smem[lidy + 1][lidx + 2] == 2;
-
-                n += smem[lidy + 2][lidx    ] == 2;
-                n += smem[lidy + 2][lidx + 1] == 2;
-                n += smem[lidy + 2][lidx + 2] == 2;
-            }
-
-            if (n > 0)
-                smem[lidy + 1][lidx + 1] = 2;
-        }
-
-        const int e = smem[lidy + 1][lidx + 1];
-        map[gidx + 1 + (gidy + 1) * map_step] = e;
-
-        n = 0;
-        if(e == 2)
-        {
-            n += smem[lidy    ][lidx    ] == 1;
-            n += smem[lidy    ][lidx + 1] == 1;
-            n += smem[lidy    ][lidx + 2] == 1;
-
-            n += smem[lidy + 1][lidx    ] == 1;
-            n += smem[lidy + 1][lidx + 2] == 1;
-
-            n += smem[lidy + 2][lidx    ] == 1;
-            n += smem[lidy + 2][lidx + 1] == 1;
-            n += smem[lidy + 2][lidx + 2] == 1;
-        }
-
-        if(n > 0)
-        {
-            unsigned int ind = atomic_inc(counter);
-            st[ind] = (ushort2)(gidx + 1, gidy + 1);
-        }
-    }
-#else
-    struct PtrStepSz map = {((__global int *)((__global char*)map_ptr + map_offset)), map_step, rows + 1, cols + 1};
-
-    __local int smem[18][18];
-
-    int2 blockIdx = (int2)(get_group_id(0), get_group_id(1));
-    int2 blockDim = (int2)(get_local_size(0), get_local_size(1));
-    int2 threadIdx = (int2)(get_local_id(0), get_local_id(1));
-
-    const int x = blockIdx.x * blockDim.x + threadIdx.x;
-    const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-    smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? get(map, y, x) : 0;
-    if (threadIdx.y == 0)
-        smem[0][threadIdx.x + 1] = x < map.cols ? get(map, y - 1, x) : 0;
-    if (threadIdx.y == blockDim.y - 1)
-        smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? get(map, y + 1, x) : 0;
-    if (threadIdx.x == 0)
-        smem[threadIdx.y + 1][0] = y < map.rows ? get(map, y, x - 1) : 0;
-    if (threadIdx.x == blockDim.x - 1)
-        smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols && y < map.rows ? get(map, y, x + 1) : 0;
-    if (threadIdx.x == 0 && threadIdx.y == 0)
-        smem[0][0] = y > 0 && x > 0 ? get(map, y - 1, x - 1) : 0;
-    if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0)
-        smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? get(map, y - 1, x + 1) : 0;
-    if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1)
-        smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? get(map, y + 1, x - 1) : 0;
-    if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1)
-        smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? get(map, y + 1, x + 1) : 0;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (x >= cols || y >= rows)
-        return;
-
-    int n;
-
-    #pragma unroll
-    for (int k = 0; k < 16; ++k)
-    {
-        n = 0;
-
-        if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1)
-        {
-            n += smem[threadIdx.y    ][threadIdx.x    ] == 2;
-            n += smem[threadIdx.y    ][threadIdx.x + 1] == 2;
-            n += smem[threadIdx.y    ][threadIdx.x + 2] == 2;
-
-            n += smem[threadIdx.y + 1][threadIdx.x    ] == 2;
-            n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2;
-
-            n += smem[threadIdx.y + 2][threadIdx.x    ] == 2;
-            n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2;
-            n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2;
-        }
-
-        if (n > 0)
-            smem[threadIdx.y + 1][threadIdx.x + 1] = 2;
-    }
-
-    const int e = smem[threadIdx.y + 1][threadIdx.x + 1];
-
-    set(map, y, x, e);
-
-    n = 0;
-
-    if (e == 2)
-    {
-        n += smem[threadIdx.y    ][threadIdx.x    ] == 1;
-        n += smem[threadIdx.y    ][threadIdx.x + 1] == 1;
-        n += smem[threadIdx.y    ][threadIdx.x + 2] == 1;
-
-        n += smem[threadIdx.y + 1][threadIdx.x    ] == 1;
-        n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1;
-
-        n += smem[threadIdx.y + 2][threadIdx.x    ] == 1;
-        n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1;
-        n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1;
-    }
-
-    if (n > 0)
-    {
-        const int ind = atomic_inc(counter);
-        st[ind] = (ushort2)(x + 1, y + 1);
-    }
-#endif
-}
-
-__constant int c_dx[8] = {-1,  0,  1, -1, 1, -1, 0, 1};
-__constant int c_dy[8] = {-1, -1, -1,  0, 0,  1, 1, 1};
-
-
-#define stack_size 512
-__kernel
-void
-__attribute__((reqd_work_group_size(128,1,1)))
-edgesHysteresisGlobal
-(
-    __global int * map,
-    __global ushort2 * st1,
-    __global ushort2 * st2,
-    __global int * counter,
-    int rows,
-    int cols,
-    int count,
-    int map_step,
-    int map_offset
-)
-{
-    map_step   /= sizeof(*map);
-    map_offset /= sizeof(*map);
-
-    map += map_offset;
-
-    int lidx = get_local_id(0);
-
-    int grp_idx = get_group_id(0);
-    int grp_idy = get_group_id(1);
-
-    __local unsigned int s_counter;
-    __local unsigned int s_ind;
-
-    __local ushort2 s_st[stack_size];
-
-    if(lidx == 0)
-    {
-        s_counter = 0;
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int ind = mad24(grp_idy, (int)get_local_size(0), grp_idx);
-
-    if(ind < count)
-    {
-        ushort2 pos = st1[ind];
-        if (lidx < 8)
-        {
-            pos.x += c_dx[lidx];
-            pos.y += c_dy[lidx];
-            if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows && map[pos.x + pos.y * map_step] == 1)
-            {
-                map[pos.x + pos.y * map_step] = 2;
-
-                ind = atomic_inc(&s_counter);
-
-                s_st[ind] = pos;
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        while (s_counter > 0 && s_counter <= stack_size - get_local_size(0))
-        {
-            const int subTaskIdx = lidx >> 3;
-            const int portion = min(s_counter, (uint)(get_local_size(0)>> 3));
-
-            if (subTaskIdx < portion)
-                pos = s_st[s_counter - 1 - subTaskIdx];
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            if (lidx == 0)
-                s_counter -= portion;
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            if (subTaskIdx < portion)
-            {
-                pos.x += c_dx[lidx & 7];
-                pos.y += c_dy[lidx & 7];
-                if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows && map[pos.x + pos.y * map_step] == 1)
-                {
-                    map[pos.x + pos.y * map_step] = 2;
-
-                    ind = atomic_inc(&s_counter);
-
-                    s_st[ind] = pos;
-                }
-            }
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        if (s_counter > 0)
-        {
-            if (lidx == 0)
-            {
-                ind = atomic_add(counter, s_counter);
-                s_ind = ind - s_counter;
-            }
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            ind = s_ind;
-
-            for (int i = lidx; i < (int)s_counter; i += get_local_size(0))
-            {
-                st2[ind + i] = s_st[i];
-            }
-        }
-    }
-}
-#undef stack_size
-
-//Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0.
-// map		edge type mappings
-// dst		edge output
-__kernel
-void getEdges
-(
-    __global const int * map,
-    __global uchar * dst,
-    int rows,
-    int cols,
-    int map_step,
-    int map_offset,
-    int dst_step,
-    int dst_offset
-)
-{
-    map_step   /= sizeof(*map);
-    map_offset /= sizeof(*map);
-
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    if(gidy < rows && gidx < cols)
-    {
-        dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_clahe.cl b/modules/ocl/src/opencl/imgproc_clahe.cl
deleted file mode 100644
index 71a6f89..0000000
--- a/modules/ocl/src/opencl/imgproc_clahe.cl
+++ /dev/null
@@ -1,255 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef WAVE_SIZE
-#define WAVE_SIZE 1
-#endif
-
-inline int calc_lut(__local int* smem, int val, int tid)
-{
-    smem[tid] = val;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid == 0)
-        for (int i = 1; i < 256; ++i)
-            smem[i] += smem[i - 1];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    return smem[tid];
-}
-
-#ifdef CPU
-inline void reduce(volatile __local int* smem, int val, int tid)
-{
-    smem[tid] = val;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 128)
-        smem[tid] = val += smem[tid + 128];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 64)
-        smem[tid] = val += smem[tid + 64];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 32)
-        smem[tid] += smem[tid + 32];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 16)
-        smem[tid] += smem[tid + 16];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 8)
-        smem[tid] += smem[tid + 8];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 4)
-        smem[tid] += smem[tid + 4];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 2)
-        smem[tid] += smem[tid + 2];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 1)
-        smem[256] = smem[tid] + smem[tid + 1];
-    barrier(CLK_LOCAL_MEM_FENCE);
-}
-
-#else
-
-inline void reduce(__local volatile int* smem, int val, int tid)
-{
-    smem[tid] = val;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 128)
-        smem[tid] = val += smem[tid + 128];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 64)
-        smem[tid] = val += smem[tid + 64];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 32)
-    {
-        smem[tid] += smem[tid + 32];
-#if WAVE_SIZE < 32
-    } barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 16)
-    {
-#endif
-        smem[tid] += smem[tid + 16];
-#if WAVE_SIZE < 16
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 8)
-    {
-#endif
-        smem[tid] += smem[tid + 8];
-        smem[tid] += smem[tid + 4];
-        smem[tid] += smem[tid + 2];
-        smem[tid] += smem[tid + 1];
-    }
-}
-#endif
-
-__kernel void calcLut(__global __const uchar * src, __global uchar * lut,
-                      const int srcStep, const int dstStep,
-                      const int2 tileSize, const int tilesX,
-                      const int clipLimit, const float lutScale,
-                      const int src_offset, const int dst_offset)
-{
-    __local int smem[512];
-
-    int tx = get_group_id(0);
-    int ty = get_group_id(1);
-    int tid = get_local_id(1) * get_local_size(0)
-                             + get_local_id(0);
-
-    smem[tid] = 0;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
-    {
-        __global const uchar* srcPtr = src + mad24(ty * tileSize.y + i, srcStep, tx * tileSize.x + src_offset);
-        for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
-        {
-            const int data = srcPtr[j];
-            atomic_inc(&smem[data]);
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int tHistVal = smem[tid];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (clipLimit > 0)
-    {
-        // clip histogram bar
-        int clipped = 0;
-        if (tHistVal > clipLimit)
-        {
-            clipped = tHistVal - clipLimit;
-            tHistVal = clipLimit;
-        }
-
-        // find number of overall clipped samples
-        reduce(smem, clipped, tid);
-        barrier(CLK_LOCAL_MEM_FENCE);
-#ifdef CPU
-        clipped = smem[256];
-#else
-        clipped = smem[0];
-#endif
-
-        // broadcast evaluated value
-
-        __local int totalClipped;
-
-        if (tid == 0)
-            totalClipped = clipped;
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        // redistribute clipped samples evenly
-
-        int redistBatch = totalClipped / 256;
-        tHistVal += redistBatch;
-
-        int residual = totalClipped - redistBatch * 256;
-        if (tid < residual)
-            ++tHistVal;
-    }
-
-    const int lutVal = calc_lut(smem, tHistVal, tid);
-    uint ires = (uint)convert_int_rte(lutScale * lutVal);
-    lut[(ty * tilesX + tx) * dstStep + tid + dst_offset] =
-        convert_uchar(clamp(ires, (uint)0, (uint)255));
-}
-
-__kernel void transform(__global __const uchar * src,
-                        __global uchar * dst,
-                        __global uchar * lut,
-                        const int srcStep, const int dstStep, const int lutStep,
-                        const int cols, const int rows,
-                        const int2 tileSize,
-                        const int tilesX, const int tilesY,
-                        const int src_offset, const int dst_offset, int lut_offset)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (x >= cols || y >= rows)
-        return;
-
-    const float tyf = (convert_float(y) / tileSize.y) - 0.5f;
-    int ty1 = convert_int_rtn(tyf);
-    int ty2 = ty1 + 1;
-    const float ya = tyf - ty1;
-    ty1 = max(ty1, 0);
-    ty2 = min(ty2, tilesY - 1);
-
-    const float txf = (convert_float(x) / tileSize.x) - 0.5f;
-    int tx1 = convert_int_rtn(txf);
-    int tx2 = tx1 + 1;
-    const float xa = txf - tx1;
-    tx1 = max(tx1, 0);
-    tx2 = min(tx2, tilesX - 1);
-
-    const int srcVal = src[mad24(y, srcStep, x + src_offset)];
-
-    float res = 0;
-
-    res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (1.0f - ya));
-    res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (1.0f - ya));
-    res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (ya));
-    res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (ya));
-
-    uint ires = (uint)convert_int_rte(res);
-    dst[mad24(y, dstStep, x + dst_offset)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
-}
diff --git a/modules/ocl/src/opencl/imgproc_columnsum.cl b/modules/ocl/src/opencl/imgproc_columnsum.cl
deleted file mode 100644
index 6b596a3..0000000
--- a/modules/ocl/src/opencl/imgproc_columnsum.cl
+++ /dev/null
@@ -1,70 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Chunpeng Zhang chunpeng@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-////////////////////////////////////////////////////////////////////
-///////////////////////// columnSum ////////////////////////////////
-////////////////////////////////////////////////////////////////////
-
-__kernel void columnSum_C1_D5(__global float * src, __global float * dst,
-    int cols, int rows, int src_step, int dst_step, int src_offset, int dst_offset)
-{
-    const int x = get_global_id(0);
-
-    if (x < cols)
-    {
-        int srcIdx = x + src_offset;
-        int dstIdx = x + dst_offset;
-
-        float sum = 0;
-
-        for (int y = 0; y < rows; ++y)
-        {
-            sum += src[srcIdx];
-            dst[dstIdx] = sum;
-            srcIdx += src_step;
-            dstIdx += dst_step;
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_convolve.cl b/modules/ocl/src/opencl/imgproc_convolve.cl
deleted file mode 100644
index b8f9742..0000000
--- a/modules/ocl/src/opencl/imgproc_convolve.cl
+++ /dev/null
@@ -1,111 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-/************************************** convolve **************************************/
-
-__kernel void convolve_D5(__global float *src, __global float *temp1, __global float *dst,
-                          int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight,
-                          int src_offset, int dst_offset, int koffset)
-{
-    __local float smem[16 + 2 * 8][16 + 2 * 8];
-
-    int x = get_local_id(0);
-    int y = get_local_id(1);
-    int gx = get_global_id(0);
-    int gy = get_global_id(1);
-
-            // x | x 0 | 0
-            // -----------
-            // x | x 0 | 0
-            // 0 | 0 0 | 0
-            // -----------
-            // 0 | 0 0 | 0
-    smem[y][x] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
-
-            // 0 | 0 x | x
-            // -----------
-            // 0 | 0 x | x
-            // 0 | 0 0 | 0
-            // -----------
-            // 0 | 0 0 | 0
-    smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
-
-            // 0 | 0 0 | 0
-            // -----------
-            // 0 | 0 0 | 0
-            // x | x 0 | 0
-            // -----------
-            // x | x 0 | 0
-    smem[y + 16][x] = src[min(gy + 8, rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
-
-            // 0 | 0 0 | 0
-            // -----------
-            // 0 | 0 0 | 0
-            // 0 | 0 x | x
-            // -----------
-            // 0 | 0 x | x
-    smem[y + 16][x + 16] = src[min(gy + 8, rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (gx < cols && gy < rows)
-    {
-        float res = 0;
-
-        for (int i = 0; i < kHeight; ++i)
-            for (int j = 0; j < kWidth; ++j)
-                res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * k_step + j + koffset];
-
-        dst[gy * dst_step + gx + dst_offset] = res;
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_copymakeboder.cl b/modules/ocl/src/opencl/imgproc_copymakeboder.cl
deleted file mode 100644
index ac149a4..0000000
--- a/modules/ocl/src/opencl/imgproc_copymakeboder.cl
+++ /dev/null
@@ -1,134 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Zero Lin zero.lin@amd.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#ifdef BORDER_CONSTANT
-#define EXTRAPOLATE(x, y, v) v = scalar;
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, y, v) \
-    { \
-        x = max(min(x, src_cols - 1), 0); \
-        y = max(min(y, src_rows - 1), 0); \
-        v = src[mad24(y, src_step, x + src_offset)]; \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, y, v) \
-    { \
-        if (x < 0) \
-            x -= ((x - src_cols + 1) / src_cols) * src_cols; \
-        if (x >= src_cols) \
-            x %= src_cols; \
-        \
-        if (y < 0) \
-            y -= ((y - src_rows + 1) / src_rows) * src_rows; \
-        if( y >= src_rows ) \
-            y %= src_rows; \
-        v = src[mad24(y, src_step, x + src_offset)]; \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
-#ifdef BORDER_REFLECT
-#define DELTA int delta = 0
-#else
-#define DELTA int delta = 1
-#endif
-#define EXTRAPOLATE(x, y, v) \
-    { \
-        DELTA; \
-        if (src_cols == 1) \
-            x = 0; \
-        else \
-            do \
-            { \
-                if( x < 0 ) \
-                    x = -x - 1 + delta; \
-                else \
-                    x = src_cols - 1 - (x - src_cols) - delta; \
-            } \
-            while (x >= src_cols || x < 0); \
-        \
-        if (src_rows == 1) \
-            y = 0; \
-        else \
-            do \
-            { \
-                if( y < 0 ) \
-                    y = -y - 1 + delta; \
-                else \
-                    y = src_rows - 1 - (y - src_rows) - delta; \
-            } \
-            while (y >= src_rows || y < 0); \
-        v = src[mad24(y, src_step, x + src_offset)]; \
-    }
-#else
-#error No extrapolation method
-#endif
-
-#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
-
-__kernel void copymakeborder
-                        (__global const GENTYPE *src,
-                         __global GENTYPE *dst,
-                         int dst_cols, int dst_rows,
-                         int src_cols, int src_rows,
-                         int src_step, int src_offset,
-                         int dst_step, int dst_offset,
-                         int top, int left, GENTYPE scalar)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        int src_x = x - left;
-        int src_y = y - top;
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-
-        if (NEED_EXTRAPOLATION(src_x, src_y))
-            EXTRAPOLATE(src_x, src_y, dst[dst_index])
-        else
-        {
-            int src_index = mad24(src_y, src_step, src_x + src_offset);
-            dst[dst_index] = src[src_index];
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_gftt.cl b/modules/ocl/src/opencl/imgproc_gftt.cl
deleted file mode 100644
index 9cd5767..0000000
--- a/modules/ocl/src/opencl/imgproc_gftt.cl
+++ /dev/null
@@ -1,129 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef WITH_MASK
-#define WITH_MASK 0
-#endif
-
-//macro to read eigenvalue matrix
-#define GET_SRC_32F(_x, _y) ((__global const float*)(eig + (_y)*eig_pitch))[_x]
-
-__kernel
-    void findCorners
-    (
-        __global const char*    eig,
-        const int               eig_pitch,
-        __global const char*    mask,
-        __global float2*        corners,
-        const int               mask_strip,// in pixels
-        __global const float*   pMinMax,
-        const float             qualityLevel,
-        const int               rows,
-        const int               cols,
-        const int               max_count,
-        __global int*           g_counter
-    )
-{
-    float threshold = qualityLevel*pMinMax[1];
-    const int j = get_global_id(0);
-    const int i = get_global_id(1);
-
-    if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
-#if WITH_MASK
-        && mask[i * mask_strip + j] != 0
-#endif
-        )
-    {
-        const float val = GET_SRC_32F(j, i);
-
-        if (val > threshold)
-        {
-            float maxVal = val;
-            maxVal = fmax(GET_SRC_32F(j - 1, i - 1), maxVal);
-            maxVal = fmax(GET_SRC_32F(j    , i - 1), maxVal);
-            maxVal = fmax(GET_SRC_32F(j + 1, i - 1), maxVal);
-
-            maxVal = fmax(GET_SRC_32F(j - 1, i), maxVal);
-            maxVal = fmax(GET_SRC_32F(j + 1, i), maxVal);
-
-            maxVal = fmax(GET_SRC_32F(j - 1, i + 1), maxVal);
-            maxVal = fmax(GET_SRC_32F(j    , i + 1), maxVal);
-            maxVal = fmax(GET_SRC_32F(j + 1, i + 1), maxVal);
-
-            if (val == maxVal)
-            {
-                const int ind = atomic_inc(g_counter);
-
-                if (ind < max_count)
-                {// pack and store eigenvalue and its coordinates
-                    corners[ind].x = val;
-                    corners[ind].y = as_float(j|(i<<16));
-                }
-            }
-        }
-    }
-}
-#undef GET_SRC_32F
-
-
-// this is simple short serial kernel that makes some short reduction and initialization work
-// it makes HOST like work to avoid additional sync with HOST to do this short work
-// data - input/output float2.
-//      input data are sevral (min,max) pairs
-//      output data is one reduced (min,max) pair
-// g_counter - counter that have to be initialized by 0 for next findCorner call.
-__kernel void arithm_op_minMax_final(__global float * data, int groupnum,__global int * g_counter)
-{
-    g_counter[0] = 0;
-    float minVal = data[0];
-    float maxVal = data[groupnum];
-    for(int i=1;i<groupnum;++i)
-    {
-        minVal = min(minVal,data[i]);
-        maxVal = max(maxVal,data[i+groupnum]);
-    }
-    data[0] = minVal;
-    data[1] = maxVal;
-}
diff --git a/modules/ocl/src/opencl/imgproc_histogram.cl b/modules/ocl/src/opencl/imgproc_histogram.cl
deleted file mode 100644
index bac9a6b..0000000
--- a/modules/ocl/src/opencl/imgproc_histogram.cl
+++ /dev/null
@@ -1,279 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Xu Pang, pangxu010@163.com
-//    Wenju He, wenju@multicorewareinc.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-#define PARTIAL_HISTOGRAM256_COUNT     (256)
-#define HISTOGRAM256_BIN_COUNT         (256)
-
-#define HISTOGRAM256_WORK_GROUP_SIZE     (256)
-#define HISTOGRAM256_LOCAL_MEM_SIZE      (HISTOGRAM256_BIN_COUNT)
-
-#define NBANKS (16)
-#define NBANKS_BIT (4)
-
-
-__kernel __attribute__((reqd_work_group_size(HISTOGRAM256_BIN_COUNT,1,1)))void calc_sub_hist_D0(
-                                                                      __global const uint4* src,
-                                          int src_step, int src_offset,
-                                                                      __global int* globalHist,
-                                                                      int dataCount,  int cols,
-                                          int inc_x, int inc_y,
-                                          int hist_step)
-{
-        __local int subhist[(HISTOGRAM256_BIN_COUNT << NBANKS_BIT)]; // NBINS*NBANKS
-        int gid = get_global_id(0);
-        int lid = get_local_id(0);
-        int gx  = get_group_id(0);
-        int gsize = get_global_size(0);
-        int lsize  = get_local_size(0);
-        const int shift = 8;
-        const int mask = HISTOGRAM256_BIN_COUNT-1;
-        int offset = (lid & (NBANKS-1));// lid % NBANKS
-        uint4 data, temp1, temp2, temp3, temp4;
-        src += src_offset;
-
-        //clear LDS
-        for(int i=0, idx=lid; i<(NBANKS >> 2); i++, idx += lsize)
-        {
-            subhist[idx] = 0;
-            subhist[idx+=lsize] = 0;
-            subhist[idx+=lsize] = 0;
-            subhist[idx+=lsize] = 0;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        //read and scatter
-        int y = gid/cols;
-        int x = gid - mul24(y, cols);
-        for(int idx=gid; idx<dataCount; idx+=gsize)
-        {
-              data = src[mad24(y, src_step, x)];
-              temp1 = ((data & mask) << NBANKS_BIT) + offset;
-              data >>= shift;
-              temp2 = ((data & mask) << NBANKS_BIT) + offset;
-              data >>= shift;
-              temp3 = ((data & mask) << NBANKS_BIT) + offset;
-              data >>= shift;
-              temp4 = ((data & mask) << NBANKS_BIT) + offset;
-
-              atomic_inc(subhist + temp1.x);
-              atomic_inc(subhist + temp1.y);
-              atomic_inc(subhist + temp1.z);
-              atomic_inc(subhist + temp1.w);
-
-              atomic_inc(subhist + temp2.x);
-              atomic_inc(subhist + temp2.y);
-              atomic_inc(subhist + temp2.z);
-              atomic_inc(subhist + temp2.w);
-
-              atomic_inc(subhist + temp3.x);
-              atomic_inc(subhist + temp3.y);
-              atomic_inc(subhist + temp3.z);
-              atomic_inc(subhist + temp3.w);
-
-              atomic_inc(subhist + temp4.x);
-              atomic_inc(subhist + temp4.y);
-              atomic_inc(subhist + temp4.z);
-              atomic_inc(subhist + temp4.w);
-
-              x += inc_x;
-              int off = ((x>=cols) ? -1 : 0);
-              x = mad24(off, cols, x);
-              y += inc_y - off;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        //reduce local banks to single histogram per workgroup
-        int bin1=0, bin2=0, bin3=0, bin4=0;
-        for(int i=0; i<NBANKS; i+=4)
-        {
-             bin1 += subhist[(lid << NBANKS_BIT) + i];
-             bin2 += subhist[(lid << NBANKS_BIT) + i+1];
-             bin3 += subhist[(lid << NBANKS_BIT) + i+2];
-             bin4 += subhist[(lid << NBANKS_BIT) + i+3];
-        }
-
-        globalHist[mad24(gx, hist_step, lid)] = bin1+bin2+bin3+bin4;
-}
-
-__kernel void __attribute__((reqd_work_group_size(1,HISTOGRAM256_BIN_COUNT,1)))
-calc_sub_hist_border_D0(__global const uchar* src, int src_step, int src_offset,
-                        __global int* globalHist, int left_col, int cols,
-                        int rows, int hist_step)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-        int lidy = get_local_id(1);
-        int gx = get_group_id(0);
-        int gy = get_group_id(1);
-        int gn = get_num_groups(0);
-        int rowIndex = mad24(gy, gn, gx);
-//        rowIndex &= (PARTIAL_HISTOGRAM256_COUNT - 1);
-
-        __local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE];
-        subhist[lidy] = 0;
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        gidx = ((gidx>=left_col) ? (gidx+cols) : gidx);
-        if(gidy<rows)
-        {
-            int src_index = src_offset + mad24(gidy, src_step, gidx);
-            int p = (int)src[src_index];
-//	    p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
-            atomic_inc(subhist + p);
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        globalHist[mad24(rowIndex, hist_step, lidy)] += subhist[lidy];
-}
-
-__kernel __attribute__((reqd_work_group_size(256,1,1)))void merge_hist(__global int* buf,
-                __global int* hist,
-                int src_step)
-{
-    int lx = get_local_id(0);
-    int gx = get_group_id(0);
-
-    int sum = 0;
-
-    for(int i = lx; i < PARTIAL_HISTOGRAM256_COUNT; i += HISTOGRAM256_WORK_GROUP_SIZE)
-        sum += buf[ mad24(i, src_step, gx)];
-
-    __local int data[HISTOGRAM256_WORK_GROUP_SIZE];
-    data[lx] = sum;
-
-    for(int stride = HISTOGRAM256_WORK_GROUP_SIZE /2; stride > 0; stride >>= 1)
-    {
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lx < stride)
-            data[lx] += data[lx + stride];
-    }
-
-    if(lx == 0)
-        hist[gx] = data[0];
-}
-
-__kernel __attribute__((reqd_work_group_size(256,1,1)))
-void calLUT(__global uchar * dst, __constant int * hist, int total)
-{
-    int lid = get_local_id(0);
-    __local int sumhist[HISTOGRAM256_BIN_COUNT];
-    __local float scale;
-
-    sumhist[lid] = hist[lid];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (lid == 0)
-    {
-        int sum = 0, i = 0;
-        while (!sumhist[i])
-            ++i;
-
-        if (total == sumhist[i])
-        {
-            scale = 1;
-            for (int j = 0; j < HISTOGRAM256_BIN_COUNT; ++j)
-                sumhist[i] = i;
-        }
-        else
-        {
-            scale = 255.f/(total - sumhist[i]);
-
-            for (sumhist[i++] = 0; i < HISTOGRAM256_BIN_COUNT; i++)
-            {
-                sum += sumhist[i];
-                sumhist[i] = sum;
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    dst[lid]= convert_uchar_sat_rte(convert_float(sumhist[lid])*scale);
-}
-
-/*
-///////////////////////////////equalizeHist//////////////////////////////////////////////////
-__kernel __attribute__((reqd_work_group_size(256,1,1)))void equalizeHist(
-                            __global uchar * src,
-                            __global uchar * dst,
-                            __constant int * hist,
-                            int srcstep,
-                            int srcoffset,
-                            int dststep,
-                            int dstoffset,
-                            int width,
-                            int height,
-                            float scale,
-                            int inc_x,
-                            int inc_y)
-{
-    int gidx = get_global_id(0);
-    int lid = get_local_id(0);
-    int glb_size = get_global_size(0);
-    src+=srcoffset;
-    dst+=dstoffset;
-    __local int sumhist[HISTOGRAM256_BIN_COUNT];
-    __local uchar lut[HISTOGRAM256_BIN_COUNT+1];
-
-    sumhist[lid]=hist[lid];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(lid==0)
-    {
-        int sum = 0;
-        for(int i=0;i<HISTOGRAM256_BIN_COUNT;i++)
-        {
-            sum+=sumhist[i];
-            sumhist[i]=sum;
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    lut[lid]= convert_uchar_sat(convert_float(sumhist[lid])*scale);
-    lut[0]=0;
-    int pos_y = gidx / width;
-    int pos_x = gidx - mul24(pos_y, width);
-
-    for(int pos = gidx; pos < mul24(width,height); pos += glb_size)
-    {
-        int inaddr = mad24(pos_y,srcstep,pos_x);
-        int outaddr = mad24(pos_y,dststep,pos_x);
-        dst[outaddr] = lut[src[inaddr]];
-        pos_x +=inc_x;
-        int off = (pos_x >= width ? -1 : 0);
-        pos_x =  mad24(off,width,pos_x);
-        pos_y += inc_y - off;
-    }
-}
-*/
diff --git a/modules/ocl/src/opencl/imgproc_hough.cl b/modules/ocl/src/opencl/imgproc_hough.cl
deleted file mode 100644
index fd1c5b9..0000000
--- a/modules/ocl/src/opencl/imgproc_hough.cl
+++ /dev/null
@@ -1,280 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
-#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
-
-////////////////////////////////////////////////////////////////////////
-// buildPointList
-
-#define PIXELS_PER_THREAD 16
-
-// TODO: add offset to support ROI
-__kernel void buildPointList(__global const uchar* src,
-                             int cols,
-                             int rows,
-                             int step,
-                             __global unsigned int* list,
-                             __global int* counter)
-{
-    __local unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
-    __local int s_qsize[4];
-    __local int s_globStart[4];
-
-    const int x = get_group_id(0) * get_local_size(0) * PIXELS_PER_THREAD + get_local_id(0);
-    const int y = get_global_id(1);
-
-    if (get_local_id(0) == 0)
-        s_qsize[get_local_id(1)] = 0;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (y < rows)
-    {
-        // fill the queue
-        __global const uchar* srcRow = &src[y * step];
-        for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < cols; ++i, xx += get_local_size(0))
-        {
-            if (srcRow[xx])
-            {
-                const unsigned int val = (y << 16) | xx;
-                const int qidx = atomic_add(&s_qsize[get_local_id(1)], 1);
-                s_queues[get_local_id(1)][qidx] = val;
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // let one work-item reserve the space required in the global list
-    if (get_local_id(0) == 0 && get_local_id(1) == 0)
-    {
-        // find how many items are stored in each list
-        int totalSize = 0;
-        for (int i = 0; i < get_local_size(1); ++i)
-        {
-            s_globStart[i] = totalSize;
-            totalSize += s_qsize[i];
-        }
-
-        // calculate the offset in the global list
-        const int globalOffset = atomic_add(counter, totalSize);
-        for (int i = 0; i < get_local_size(1); ++i)
-            s_globStart[i] += globalOffset;
-    }
-
-    barrier(CLK_GLOBAL_MEM_FENCE);
-
-    // copy local queues to global queue
-    const int qsize = s_qsize[get_local_id(1)];
-    int gidx = s_globStart[get_local_id(1)] + get_local_id(0);
-    for(int i = get_local_id(0); i < qsize; i += get_local_size(0), gidx += get_local_size(0))
-        list[gidx] = s_queues[get_local_id(1)][i];
-}
-
-////////////////////////////////////////////////////////////////////////
-// circlesAccumCenters
-
-// TODO: add offset to support ROI
-__kernel void circlesAccumCenters(__global const unsigned int* list,
-                                  const int count,
-                                  __global const int* dx,
-                                  const int dxStep,
-                                  __global const int* dy,
-                                  const int dyStep,
-                                  __global int* accum,
-                                  const int accumStep,
-                                  const int width,
-                                  const int height,
-                                  const int minRadius,
-                                  const int maxRadius,
-                                  const float idp)
-{
-    const int dxStepInPixel    = dxStep    / sizeof(int);
-    const int dyStepInPixel    = dyStep    / sizeof(int);
-    const int accumStepInPixel = accumStep / sizeof(int);
-
-    const int SHIFT = 10;
-    const int ONE = 1 << SHIFT;
-
-    // const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const int wid = get_global_id(0);
-
-    if (wid >= count)
-        return;
-
-    const unsigned int val = list[wid];
-
-    const int x = (val & 0xFFFF);
-    const int y = (val >> 16) & 0xFFFF;
-
-    const int vx = dx[mad24(y, dxStepInPixel, x)];
-    const int vy = dy[mad24(y, dyStepInPixel, x)];
-
-    if (vx == 0 && vy == 0)
-        return;
-
-    const float mag = sqrt(convert_float(vx * vx + vy * vy));
-
-    const int x0 = convert_int_rte((x * idp) * ONE);
-    const int y0 = convert_int_rte((y * idp) * ONE);
-
-    int sx = convert_int_rte((vx * idp) * ONE / mag);
-    int sy = convert_int_rte((vy * idp) * ONE / mag);
-
-    // Step from minRadius to maxRadius in both directions of the gradient
-    for (int k1 = 0; k1 < 2; ++k1)
-    {
-        int x1 = x0 + minRadius * sx;
-        int y1 = y0 + minRadius * sy;
-
-        for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
-        {
-            const int x2 = x1 >> SHIFT;
-            const int y2 = y1 >> SHIFT;
-
-            if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
-                break;
-
-            atomic_add(&accum[mad24(y2+1, accumStepInPixel, x2+1)], 1);
-        }
-
-        sx = -sx;
-        sy = -sy;
-    }
-}
-
-// ////////////////////////////////////////////////////////////////////////
-// // buildCentersList
-
-// TODO: add offset to support ROI
-__kernel void buildCentersList(__global const int* accum,
-                               const int accumCols,
-                               const int accumRows,
-                               const int accumStep,
-                               __global unsigned int* centers,
-                               const int threshold,
-                               __global int* counter)
-{
-    const int accumStepInPixel = accumStep/sizeof(int);
-
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (x < accumCols - 2 && y < accumRows - 2)
-    {
-        const int top    = accum[mad24(y,     accumStepInPixel, x + 1)];
-
-        const int left   = accum[mad24(y + 1, accumStepInPixel, x)];
-        const int cur    = accum[mad24(y + 1, accumStepInPixel, x + 1)];
-        const int right  = accum[mad24(y + 1, accumStepInPixel, x + 2)];
-
-        const int bottom = accum[mad24(y + 2, accumStepInPixel, x + 1)];;
-
-        if (cur > threshold && cur > top && cur >= bottom && cur >  left && cur >= right)
-        {
-            const unsigned int val = (y << 16) | x;
-            const int idx = atomic_add(counter, 1);
-            centers[idx] = val;
-        }
-    }
-}
-
-
-// ////////////////////////////////////////////////////////////////////////
-// // circlesAccumRadius
-
-// TODO: add offset to support ROI
-__kernel void circlesAccumRadius(__global const unsigned int* centers,
-                                 __global const unsigned int* list, const int count,
-                                 __global float4* circles, const int maxCircles,
-                                 const float dp,
-                                 const int minRadius, const int maxRadius,
-                                 const int histSize,
-                                 const int threshold,
-                                 __local int* smem,
-                                 __global int* counter)
-{
-    for (int i = get_local_id(0); i < histSize + 2; i += get_local_size(0))
-        smem[i] = 0;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    unsigned int val = centers[get_group_id(0)];
-
-    float cx = convert_float(val & 0xFFFF);
-    float cy = convert_float((val >> 16) & 0xFFFF);
-
-    cx = (cx + 0.5f) * dp;
-    cy = (cy + 0.5f) * dp;
-
-    for (int i = get_local_id(0); i < count; i += get_local_size(0))
-    {
-        val = list[i];
-
-        const int x = (val & 0xFFFF);
-        const int y = (val >> 16) & 0xFFFF;
-
-        const float rad = sqrt((cx - x) * (cx - x) + (cy - y) * (cy - y));
-        if (rad >= minRadius && rad <= maxRadius)
-        {
-            const int r = convert_int_rte(rad - minRadius);
-
-            atomic_add(&smem[r + 1], 1);
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for (int i = get_local_id(0); i < histSize; i += get_local_size(0))
-    {
-        const int curVotes = smem[i + 1];
-
-        if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
-
-        {
-            const int ind = atomic_add(counter, 1);
-            if (ind < maxCircles)
-            {
-                circles[ind] = (float4)(cx, cy, convert_float(i + minRadius), 0.0f);
-            }
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_integral.cl b/modules/ocl/src/opencl/imgproc_integral.cl
deleted file mode 100644
index 1d90e50..0000000
--- a/modules/ocl/src/opencl/imgproc_integral.cl
+++ /dev/null
@@ -1,503 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define CONVERT(step) ((step)>>1)
-#else
-#define CONVERT(step) ((step))
-#endif
-
-#define LSIZE 256
-#define LSIZE_1 255
-#define LSIZE_2 254
-#define HF_LSIZE 128
-#define LOG_LSIZE 8
-#define LOG_NUM_BANKS 5
-#define NUM_BANKS 32
-#define GET_CONFLICT_OFFSET(lid) ((lid) >> LOG_NUM_BANKS)
-
-
-kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TYPE *sqsum,
-                          int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step,int dst1_step)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int4 src_t[2], sum_t[2];
-    TYPE4 sqsum_t[2];
-    __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
-    __local int* sum_p;
-    __local TYPE* sqsum_p;
-    src_step = src_step >> 2;
-    gid = gid << 1;
-    for(int i = 0; i < rows; i =i + LSIZE_1)
-    {
-        src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid, cols - 1)]) : 0);
-        src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : 0);
-
-        sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
-        sum_t[1] =  (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[1] =  (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
-        lm_sum[0][bf_loc] = src_t[0];
-        lm_sqsum[0][bf_loc] = convert_TYPE4(src_t[0] * src_t[0]);
-
-        lm_sum[1][bf_loc] = src_t[1];
-        lm_sqsum[1][bf_loc] = convert_TYPE4(src_t[1] * src_t[1]);
-
-        int offset = 1;
-        for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi]  +=  lm_sum[lid >> 7][ai];
-                lm_sqsum[lid >> 7][bi]  +=  lm_sqsum[lid >> 7][ai];
-            }
-            offset <<= 1;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid < 2)
-        {
-            lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-            lm_sqsum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-        }
-        for(int d = 1;  d < LSIZE; d <<= 1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            offset >>= 1;
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
-                lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
-
-                lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
-                lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        int loc_s0 = gid * dst_step  + i + lid - 1 - pre_invalid * dst_step /4, loc_s1 = loc_s0 + dst_step ;
-        int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE),loc_sq1 = loc_sq0 + CONVERT(dst1_step);
-        if(lid > 0 && (i+lid) <= rows)
-        {
-            lm_sum[0][bf_loc] += sum_t[0];
-            lm_sum[1][bf_loc] += sum_t[1];
-            lm_sqsum[0][bf_loc] += sqsum_t[0];
-            lm_sqsum[1][bf_loc] += sqsum_t[1];
-            sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
-            sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
-                sum[loc_s0 + k * dst_step / 4] = sum_p[k];
-                sqsum[loc_sq0 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
-            }
-            sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
-            sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k + 4 >= cols + pre_invalid) break;
-                sum[loc_s1 + k * dst_step / 4] = sum_p[k];
-                sqsum[loc_sq1 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-
-
-kernel void integral_rows_D4(__global int4 *srcsum,__global TYPE4 * srcsqsum,__global int *sum ,
-                          __global TYPE *sqsum,int rows,int cols,int src_step,int src1_step,int sum_step,
-                          int sqsum_step,int sum_offset,int sqsum_offset)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int4 src_t[2], sum_t[2];
-    TYPE4 sqsrc_t[2],sqsum_t[2];
-    __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
-    __local int *sum_p;
-    __local TYPE *sqsum_p;
-    src_step = src_step >> 4;
-    src1_step = (src1_step / sizeof(TYPE)) >> 2 ;
-    gid <<= 1;
-    for(int i = 0; i < rows; i =i + LSIZE_1)
-    {
-        src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid ] : (int4)0;
-        sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid ] : (TYPE4)0;
-        src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid  + 1] : (int4)0;
-        sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid  + 1] : (TYPE4)0;
-
-        sum_t[0] =  (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[0] =  (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
-        sum_t[1] =  (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[1] =  (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
-        lm_sum[0][bf_loc] = src_t[0];
-        lm_sqsum[0][bf_loc] = sqsrc_t[0];
-
-        lm_sum[1][bf_loc] = src_t[1];
-        lm_sqsum[1][bf_loc] = sqsrc_t[1];
-
-        int offset = 1;
-        for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi]  +=  lm_sum[lid >> 7][ai];
-                lm_sqsum[lid >> 7][bi]  +=  lm_sqsum[lid >> 7][ai];
-            }
-            offset <<= 1;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid < 2)
-        {
-            lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-            lm_sqsum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-        }
-        for(int d = 1;  d < LSIZE; d <<= 1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            offset >>= 1;
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
-                lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
-
-                lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
-                lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(gid == 0 && (i + lid) <= rows)
-        {
-            sum[sum_offset + i + lid] = 0;
-            sqsum[sqsum_offset + i + lid] = 0;
-        }
-        if(i + lid == 0)
-        {
-            int loc0 = gid  * sum_step;
-            int loc1 = gid  * CONVERT(sqsum_step);
-            for(int k = 1; k <= 8; k++)
-            {
-                if(gid * 4 + k > cols) break;
-                sum[sum_offset + loc0 + k * sum_step / 4] = 0;
-                sqsum[sqsum_offset + loc1 + k * sqsum_step / sizeof(TYPE)] = 0;
-            }
-        }
-        int loc_s0 = sum_offset + gid  * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
-        int loc_sq0 = sqsum_offset + gid  * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
-
-        if(lid > 0 && (i+lid) <= rows)
-        {
-            lm_sum[0][bf_loc] += sum_t[0];
-            lm_sum[1][bf_loc] += sum_t[1];
-            lm_sqsum[0][bf_loc] += sqsum_t[0];
-            lm_sqsum[1][bf_loc] += sqsum_t[1];
-            sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
-            sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k >= cols) break;
-                sum[loc_s0 + k * sum_step / 4] = sum_p[k];
-                sqsum[loc_sq0 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
-            }
-            sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
-            sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + 4 + k >= cols) break;
-                sum[loc_s1 + k * sum_step / 4] = sum_p[k];
-                sqsum[loc_sq1 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
-            }
-          }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-
-kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global TYPE *sqsum,
-                          int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step, int dst1_step)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    float4 src_t[2], sum_t[2];
-    TYPE4 sqsum_t[2];
-    __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
-    __local float* sum_p;
-    __local TYPE* sqsum_p;
-    src_step = src_step >> 2;
-    gid = gid << 1;
-    for(int i = 0; i < rows; i =i + LSIZE_1)
-    {
-        src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid, cols - 1)]) : (float4)0);
-        src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
-
-        sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
-        sum_t[1] =  (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[1] =  (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
-        lm_sum[0][bf_loc] = src_t[0];
-        lm_sqsum[0][bf_loc] = convert_TYPE4(src_t[0] * src_t[0]);
-
-        lm_sum[1][bf_loc] = src_t[1];
-        lm_sqsum[1][bf_loc] = convert_TYPE4(src_t[1] * src_t[1]);
-
-        int offset = 1;
-        for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi]  +=  lm_sum[lid >> 7][ai];
-                lm_sqsum[lid >> 7][bi]  +=  lm_sqsum[lid >> 7][ai];
-            }
-            offset <<= 1;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid < 2)
-        {
-            lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-            lm_sqsum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-        }
-        for(int d = 1;  d < LSIZE; d <<= 1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            offset >>= 1;
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
-                lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
-
-                lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
-                lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
-        int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE), loc_sq1 = loc_sq0 + CONVERT(dst1_step);
-        if(lid > 0 && (i+lid) <= rows)
-        {
-            lm_sum[0][bf_loc] += sum_t[0];
-            lm_sum[1][bf_loc] += sum_t[1];
-            lm_sqsum[0][bf_loc] += sqsum_t[0];
-            lm_sqsum[1][bf_loc] += sqsum_t[1];
-            sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
-            sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
-                sum[loc_s0 + k * dst_step / 4] = sum_p[k];
-                sqsum[loc_sq0 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
-            }
-            sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
-            sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k + 4 >= cols + pre_invalid) break;
-                sum[loc_s1 + k * dst_step / 4] = sum_p[k];
-                sqsum[loc_sq1 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-
-
-kernel void integral_rows_D5(__global float4 *srcsum,__global TYPE4 * srcsqsum,__global float *sum ,
-                          __global TYPE *sqsum,int rows,int cols,int src_step,int src1_step, int sum_step,
-                          int sqsum_step,int sum_offset,int sqsum_offset)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    float4 src_t[2], sum_t[2];
-    TYPE4 sqsrc_t[2],sqsum_t[2];
-    __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
-    __local float *sum_p;
-    __local TYPE *sqsum_p;
-    src_step = src_step >> 4;
-    src1_step = (src1_step / sizeof(TYPE)) >> 2;
-    for(int i = 0; i < rows; i =i + LSIZE_1)
-    {
-        src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (float4)0;
-        sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid * 2] : (TYPE4)0;
-        src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
-        sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid * 2 + 1] : (TYPE4)0;
-
-        sum_t[0] =  (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[0] =  (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
-        sum_t[1] =  (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[1] =  (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
-        lm_sum[0][bf_loc] = src_t[0];
-        lm_sqsum[0][bf_loc] = sqsrc_t[0];
-
-        lm_sum[1][bf_loc] = src_t[1];
-        lm_sqsum[1][bf_loc] = sqsrc_t[1];
-
-        int offset = 1;
-        for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi]  +=  lm_sum[lid >> 7][ai];
-                lm_sqsum[lid >> 7][bi]  +=  lm_sqsum[lid >> 7][ai];
-            }
-            offset <<= 1;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid < 2)
-        {
-            lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-            lm_sqsum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-        }
-        for(int d = 1;  d < LSIZE; d <<= 1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            offset >>= 1;
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
-                lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
-
-                lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
-                lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(gid == 0 && (i + lid) <= rows)
-        {
-            sum[sum_offset + i + lid] = 0;
-            sqsum[sqsum_offset + i + lid] = 0;
-        }
-        if(i + lid == 0)
-        {
-            int loc0 = gid * 2 * sum_step;
-            int loc1 = gid * 2 * CONVERT(sqsum_step);
-            for(int k = 1; k <= 8; k++)
-            {
-                if(gid * 8 + k > cols) break;
-                sum[sum_offset + loc0 + k * sum_step / 4] = 0;
-                sqsum[sqsum_offset + loc1 + k * sqsum_step / sizeof(TYPE)] = 0;
-            }
-        }
-        int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
-        int loc_sq0 = sqsum_offset + gid * 2 * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
-        if(lid > 0 && (i+lid) <= rows)
-        {
-            lm_sum[0][bf_loc] += sum_t[0];
-            lm_sum[1][bf_loc] += sum_t[1];
-            lm_sqsum[0][bf_loc] += sqsum_t[0];
-            lm_sqsum[1][bf_loc] += sqsum_t[1];
-            sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
-            sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 8 + k >= cols) break;
-                sum[loc_s0 + k * sum_step / 4] = sum_p[k];
-                sqsum[loc_sq0 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
-            }
-            sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
-            sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 8 + 4 + k >= cols) break;
-                sum[loc_s1 + k * sum_step / 4] = sum_p[k];
-                sqsum[loc_sq1 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_integral_sum.cl b/modules/ocl/src/opencl/imgproc_integral_sum.cl
deleted file mode 100644
index 6624061..0000000
--- a/modules/ocl/src/opencl/imgproc_integral_sum.cl
+++ /dev/null
@@ -1,412 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#define LSIZE 256
-#define LSIZE_1 255
-#define LSIZE_2 254
-#define HF_LSIZE 128
-#define LOG_LSIZE 8
-#define LOG_NUM_BANKS 5
-#define NUM_BANKS 32
-#define GET_CONFLICT_OFFSET(lid) ((lid) >> LOG_NUM_BANKS)
-
-
-kernel void integral_sum_cols_D4(__global uchar4 *src,__global int *sum ,
-                              int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int4 src_t[2], sum_t[2];
-    __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local int* sum_p;
-    src_step = src_step >> 2;
-    gid = gid << 1;
-    for(int i = 0; i < rows; i =i + LSIZE_1)
-    {
-        src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + gid]) : 0);
-        src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + gid + 1]) : 0);
-
-        sum_t[0] =  (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sum_t[1] =  (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
-        lm_sum[0][bf_loc] = src_t[0];
-
-        lm_sum[1][bf_loc] = src_t[1];
-
-        int offset = 1;
-        for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi]  +=  lm_sum[lid >> 7][ai];
-            }
-            offset <<= 1;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid < 2)
-        {
-            lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-        }
-        for(int d = 1;  d < LSIZE; d <<= 1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            offset >>= 1;
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
-                lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid > 0 && (i+lid) <= rows)
-        {
-            int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
-            lm_sum[0][bf_loc] += sum_t[0];
-            lm_sum[1][bf_loc] += sum_t[1];
-            sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
-                sum[loc_s0 + k * dst_step / 4] = sum_p[k];
-            }
-            sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k + 4 >= cols + pre_invalid) break;
-                sum[loc_s1 + k * dst_step / 4] = sum_p[k];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-
-
-kernel void integral_sum_rows_D4(__global int4 *srcsum,__global int *sum ,
-                              int rows,int cols,int src_step,int sum_step,
-                              int sum_offset)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    int4 src_t[2], sum_t[2];
-    __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local int *sum_p;
-    src_step = src_step >> 4;
-    for(int i = 0; i < rows; i =i + LSIZE_1)
-    {
-        src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : 0;
-        src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : 0;
-
-        sum_t[0] =  (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sum_t[1] =  (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
-        lm_sum[0][bf_loc] = src_t[0];
-
-        lm_sum[1][bf_loc] = src_t[1];
-
-        int offset = 1;
-        for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi]  +=  lm_sum[lid >> 7][ai];
-            }
-            offset <<= 1;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid < 2)
-        {
-            lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-        }
-        for(int d = 1;  d < LSIZE; d <<= 1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            offset >>= 1;
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
-                lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(gid == 0 && (i + lid) <= rows)
-        {
-            sum[sum_offset + i + lid] = 0;
-        }
-        if(i + lid == 0)
-        {
-            int loc0 = gid * 2 * sum_step;
-            for(int k = 1; k <= 8; k++)
-            {
-                if(gid * 8 + k > cols) break;
-                sum[sum_offset + loc0 + k * sum_step / 4] = 0;
-            }
-        }
-
-        if(lid > 0 && (i+lid) <= rows)
-        {
-            int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
-            lm_sum[0][bf_loc] += sum_t[0];
-            lm_sum[1][bf_loc] += sum_t[1];
-            sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 8 + k >= cols) break;
-                sum[loc_s0 + k * sum_step / 4] = sum_p[k];
-            }
-            sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 8 + 4 + k >= cols) break;
-                sum[loc_s1 + k * sum_step / 4] = sum_p[k];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-
-kernel void integral_sum_cols_D5(__global uchar4 *src,__global float *sum ,
-                              int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    float4 src_t[2], sum_t[2];
-    __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local float* sum_p;
-    src_step = src_step >> 2;
-    gid = gid << 1;
-    for(int i = 0; i < rows; i =i + LSIZE_1)
-    {
-        src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + gid]) : (float4)0);
-        src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + gid + 1]) : (float4)0);
-
-        sum_t[0] =  (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sum_t[1] =  (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
-        lm_sum[0][bf_loc] = src_t[0];
-
-        lm_sum[1][bf_loc] = src_t[1];
-
-        int offset = 1;
-        for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi]  +=  lm_sum[lid >> 7][ai];
-            }
-            offset <<= 1;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid < 2)
-        {
-            lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-        }
-        for(int d = 1;  d < LSIZE; d <<= 1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            offset >>= 1;
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
-                lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid > 0 && (i+lid) <= rows)
-        {
-            int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
-            lm_sum[0][bf_loc] += sum_t[0];
-            lm_sum[1][bf_loc] += sum_t[1];
-            sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
-                sum[loc_s0 + k * dst_step / 4] = sum_p[k];
-            }
-            sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 4 + k + 4 >= cols + pre_invalid) break;
-                sum[loc_s1 + k * dst_step / 4] = sum_p[k];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-
-
-kernel void integral_sum_rows_D5(__global float4 *srcsum,__global float *sum ,
-                              int rows,int cols,int src_step,int sum_step,
-                              int sum_offset)
-{
-    int lid = get_local_id(0);
-    int gid = get_group_id(0);
-    float4 src_t[2], sum_t[2];
-    __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local float *sum_p;
-    src_step = src_step >> 4;
-    for(int i = 0; i < rows; i =i + LSIZE_1)
-    {
-        src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (float4)0;
-        src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
-
-        sum_t[0] =  (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sum_t[1] =  (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
-        lm_sum[0][bf_loc] = src_t[0];
-
-        lm_sum[1][bf_loc] = src_t[1];
-
-        int offset = 1;
-        for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi]  +=  lm_sum[lid >> 7][ai];
-            }
-            offset <<= 1;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lid < 2)
-        {
-            lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
-        }
-        for(int d = 1;  d < LSIZE; d <<= 1)
-        {
-            barrier(CLK_LOCAL_MEM_FENCE);
-            offset >>= 1;
-            int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
-            ai += GET_CONFLICT_OFFSET(ai);
-            bi += GET_CONFLICT_OFFSET(bi);
-
-            if((lid & 127) < d)
-            {
-                lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
-                lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(gid == 0 && (i + lid) <= rows)
-        {
-            sum[sum_offset + i + lid] = 0;
-        }
-        if(i + lid == 0)
-        {
-            int loc0 = gid * 2 * sum_step;
-            for(int k = 1; k <= 8; k++)
-            {
-                if(gid * 8 + k > cols) break;
-                sum[sum_offset + loc0 + k * sum_step / 4] = 0;
-            }
-        }
-
-        if(lid > 0 && (i+lid) <= rows)
-        {
-            int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
-            lm_sum[0][bf_loc] += sum_t[0];
-            lm_sum[1][bf_loc] += sum_t[1];
-            sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 8 + k >= cols) break;
-                sum[loc_s0 + k * sum_step / 4] = sum_p[k];
-            }
-            sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
-            for(int k = 0; k < 4; k++)
-            {
-                if(gid * 8 + 4 + k >= cols) break;
-                sum[loc_s1 + k * sum_step / 4] = sum_p[k];
-            }
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_median.cl b/modules/ocl/src/opencl/imgproc_median.cl
deleted file mode 100644
index 5fa7a17..0000000
--- a/modules/ocl/src/opencl/imgproc_median.cl
+++ /dev/null
@@ -1,486 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Zero Lin, zero.lin@amd.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-
-/*
-__kernel void medianFilter_C1(__global uchar * src, __global uchar * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep, int m)
-{
-    int dx = get_global_id(0)-(m>>1);
-    int dy = get_global_id(1)-(m>>1);
-
-    short histom[256];
-    for(int i=0;i<256;++i)
-        histom[i]=0;
-
-
-    for(int i=0;i<m;++i)
-    {
-        __global uchar * data = src + srcOffset + mul24(srcStep,clamp(dy + (i), 0, rows-1));
-        for(int j=dx;j<dx+m;++j)
-        {
-            histom[data[clamp(j, 0, cols-1)]]++;
-        }
-    }
-
-    int now=0;
-    int goal=(m*m+1)>>1;
-    int v;
-    for(int i=0;i<256;++i)
-    {
-        v=(now<goal?i:v);
-        now+=histom[i];
-    }
-
-    if(dy<rows && dx<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=v;
-}
-*/
-#define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
-__kernel void medianFilter3_C4_D0(__global uchar4 * src, __global uchar4 * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep)
-{
-
-    __local uchar4 data[18][18];
-    __global uchar4* source=src + srcOffset;
-
-    int dx = get_global_id(0) - get_local_id(0) -1;
-    int dy = get_global_id(1) - get_local_id(1) -1;
-
-    const int id = min((int)(get_local_id(0)*16+get_local_id(1)), 9*18-1);
-
-    int dr=id/18;
-    int dc=id%18;
-    int r=clamp(dy+dr, 0, rows-1);
-    int c=clamp(dx+dc, 0, cols-1);
-
-    data[dr][dc] = source[r*srcStep + c];
-    r=clamp(dy+dr+9, 0, rows-1);
-    data[dr+9][dc] = source[r*srcStep + c];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int x =get_local_id(0);
-    int y =get_local_id(1);
-    uchar4 p0=data[y][x], p1=data[y][x+1], p2=data[y][x+2];
-    uchar4 p3=data[y+1][x], p4=data[y+1][x+1], p5=data[y+1][x+2];
-    uchar4 p6=data[y+2][x], p7=data[y+2][x+1], p8=data[y+2][x+2];
-    uchar4 mid;
-
-    op(p1, p2); op(p4, p5); op(p7, p8); op(p0, p1);
-    op(p3, p4); op(p6, p7); op(p1, p2); op(p4, p5);
-    op(p7, p8); op(p0, p3); op(p5, p8); op(p4, p7);
-    op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7);
-    op(p4, p2); op(p6, p4); op(p4, p2);
-
-    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p4;
-}
-#undef op
-
-#define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
-__kernel void medianFilter3_C1_D0(__global uchar * src, __global uchar * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep)
-{
-
-    __local uchar data[18][18];
-    __global uchar* source=src + srcOffset;
-
-    int dx = get_global_id(0) - get_local_id(0) -1;
-    int dy = get_global_id(1) - get_local_id(1) -1;
-
-    const int id = min((int)(get_local_id(0)*16+get_local_id(1)), 9*18-1);
-
-    int dr=id/18;
-    int dc=id%18;
-    int r=clamp(dy+dr, 0, rows-1);
-    int c=clamp(dx+dc, 0, cols-1);
-
-    data[dr][dc] = source[r*srcStep + c];
-    r=clamp(dy+dr+9, 0, rows-1);
-    data[dr+9][dc] = source[r*srcStep + c];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int x =get_local_id(0);
-    int y =get_local_id(1);
-    uchar p0=data[y][x], p1=data[y][x+1], p2=data[y][x+2];
-    uchar p3=data[y+1][x], p4=data[y+1][x+1], p5=data[y+1][x+2];
-    uchar p6=data[y+2][x], p7=data[y+2][x+1], p8=data[y+2][x+2];
-    uchar mid;
-
-    op(p1, p2); op(p4, p5); op(p7, p8); op(p0, p1);
-    op(p3, p4); op(p6, p7); op(p1, p2); op(p4, p5);
-    op(p7, p8); op(p0, p3); op(p5, p8); op(p4, p7);
-    op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7);
-    op(p4, p2); op(p6, p4); op(p4, p2);
-
-    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p4;
-}
-#undef op
-
-#define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
-__kernel void medianFilter3_C1_D5(__global float * src, __global float * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep)
-{
-
-    __local float data[18][18];
-    __global float* source=src + srcOffset;
-
-    int dx = get_global_id(0) - get_local_id(0) -1;
-    int dy = get_global_id(1) - get_local_id(1) -1;
-
-    const int id = min((int)(get_local_id(0)*16+get_local_id(1)), 9*18-1);
-
-    int dr=id/18;
-    int dc=id%18;
-    int r=clamp(dy+dr, 0, rows-1);
-    int c=clamp(dx+dc, 0, cols-1);
-
-    data[dr][dc] = source[r*srcStep + c];
-    r=clamp(dy+dr+9, 0, rows-1);
-    data[dr+9][dc] = source[r*srcStep + c];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int x =get_local_id(0);
-    int y =get_local_id(1);
-    float p0=data[y][x], p1=data[y][x+1], p2=data[y][x+2];
-    float p3=data[y+1][x], p4=data[y+1][x+1], p5=data[y+1][x+2];
-    float p6=data[y+2][x], p7=data[y+2][x+1], p8=data[y+2][x+2];
-    float mid;
-
-    op(p1, p2); op(p4, p5); op(p7, p8); op(p0, p1);
-    op(p3, p4); op(p6, p7); op(p1, p2); op(p4, p5);
-    op(p7, p8); op(p0, p3); op(p5, p8); op(p4, p7);
-    op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7);
-    op(p4, p2); op(p6, p4); op(p4, p2);
-
-    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p4;
-}
-#undef op
-
-#define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
-__kernel void medianFilter3_C4_D5(__global float4 * src, __global float4 * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep)
-{
-
-    __local float4 data[18][18];
-    __global float4* source=src + srcOffset;
-
-    int dx = get_global_id(0) - get_local_id(0) -1;
-    int dy = get_global_id(1) - get_local_id(1) -1;
-
-    const int id = min((int)(get_local_id(0)*16+get_local_id(1)), 9*18-1);
-
-    int dr=id/18;
-    int dc=id%18;
-    int r=clamp(dy+dr, 0, rows-1);
-    int c=clamp(dx+dc, 0, cols-1);
-
-    data[dr][dc] = source[r*srcStep + c];
-    r=clamp(dy+dr+9, 0, rows-1);
-    data[dr+9][dc] = source[r*srcStep + c];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int x =get_local_id(0);
-    int y =get_local_id(1);
-    float4 p0=data[y][x], p1=data[y][x+1], p2=data[y][x+2];
-    float4 p3=data[y+1][x], p4=data[y+1][x+1], p5=data[y+1][x+2];
-    float4 p6=data[y+2][x], p7=data[y+2][x+1], p8=data[y+2][x+2];
-    float4 mid;
-
-    op(p1, p2); op(p4, p5); op(p7, p8); op(p0, p1);
-    op(p3, p4); op(p6, p7); op(p1, p2); op(p4, p5);
-    op(p7, p8); op(p0, p3); op(p5, p8); op(p4, p7);
-    op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7);
-    op(p4, p2); op(p6, p4); op(p4, p2);
-
-    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p4;
-}
-#undef op
-
-#define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
-__kernel void medianFilter5_C4_D0(__global uchar4 * src, __global uchar4 * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep)
-{
-
-    __local uchar4 data[20][20];
-    __global uchar4* source=src + srcOffset;
-
-    int dx = get_global_id(0) - get_local_id(0) -2;
-    int dy = get_global_id(1) - get_local_id(1) -2;
-
-    const int id = min((int)(get_local_id(0)*16+get_local_id(1)), 10*20-1);
-
-    int dr=id/20;
-    int dc=id%20;
-    int r=clamp(dy+dr, 0, rows-1);
-    int c=clamp(dx+dc, 0, cols-1);
-
-    data[dr][dc] = source[r*srcStep + c];
-    r=clamp(dy+dr+10, 0, rows-1);
-    data[dr+10][dc] = source[r*srcStep + c];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int x =get_local_id(0);
-    int y =get_local_id(1);
-    uchar4 p0=data[y][x], p1=data[y][x+1], p2=data[y][x+2], p3=data[y][x+3], p4=data[y][x+4];
-    uchar4 p5=data[y+1][x], p6=data[y+1][x+1], p7=data[y+1][x+2], p8=data[y+1][x+3], p9=data[y+1][x+4];
-    uchar4 p10=data[y+2][x], p11=data[y+2][x+1], p12=data[y+2][x+2], p13=data[y+2][x+3], p14=data[y+2][x+4];
-    uchar4 p15=data[y+3][x], p16=data[y+3][x+1], p17=data[y+3][x+2], p18=data[y+3][x+3], p19=data[y+3][x+4];
-    uchar4 p20=data[y+4][x], p21=data[y+4][x+1], p22=data[y+4][x+2], p23=data[y+4][x+3], p24=data[y+4][x+4];
-    uchar4 mid;
-
-    op(p1, p2); op(p0, p1); op(p1, p2); op(p4, p5); op(p3, p4);
-    op(p4, p5); op(p0, p3); op(p2, p5); op(p2, p3); op(p1, p4);
-    op(p1, p2); op(p3, p4); op(p7, p8); op(p6, p7); op(p7, p8);
-    op(p10, p11); op(p9, p10); op(p10, p11); op(p6, p9); op(p8, p11);
-    op(p8, p9); op(p7, p10); op(p7, p8); op(p9, p10); op(p0, p6);
-    op(p4, p10); op(p4, p6); op(p2, p8); op(p2, p4); op(p6, p8);
-    op(p1, p7); op(p5, p11); op(p5, p7); op(p3, p9); op(p3, p5);
-    op(p7, p9); op(p1, p2); op(p3, p4); op(p5, p6); op(p7, p8);
-    op(p9, p10); op(p13, p14); op(p12, p13); op(p13, p14); op(p16, p17);
-    op(p15, p16); op(p16, p17); op(p12, p15); op(p14, p17); op(p14, p15);
-    op(p13, p16); op(p13, p14); op(p15, p16); op(p19, p20); op(p18, p19);
-    op(p19, p20); op(p21, p22); op(p23, p24); op(p21, p23); op(p22, p24);
-    op(p22, p23); op(p18, p21); op(p20, p23); op(p20, p21); op(p19, p22);
-    op(p22, p24); op(p19, p20); op(p21, p22); op(p23, p24); op(p12, p18);
-    op(p16, p22); op(p16, p18); op(p14, p20); op(p20, p24); op(p14, p16);
-    op(p18, p20); op(p22, p24); op(p13, p19); op(p17, p23); op(p17, p19);
-    op(p15, p21); op(p15, p17); op(p19, p21); op(p13, p14); op(p15, p16);
-    op(p17, p18); op(p19, p20); op(p21, p22); op(p23, p24); op(p0, p12);
-    op(p8, p20); op(p8, p12); op(p4, p16); op(p16, p24); op(p12, p16);
-    op(p2, p14); op(p10, p22); op(p10, p14); op(p6, p18); op(p6, p10);
-    op(p10, p12); op(p1, p13); op(p9, p21); op(p9, p13); op(p5, p17);
-    op(p13, p17); op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
-    op(p7, p11); op(p11, p13); op(p11, p12);
-
-    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
-}
-#undef op
-
-#define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
-__kernel void medianFilter5_C1_D0(__global uchar * src, __global uchar * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep)
-{
-
-    __local uchar data[20][20];
-    __global uchar* source=src + srcOffset;
-
-    int dx = get_global_id(0) - get_local_id(0) -2;
-    int dy = get_global_id(1) - get_local_id(1) -2;
-
-    const int id = min((int)(get_local_id(0)*16+get_local_id(1)), 10*20-1);
-
-    int dr=id/20;
-    int dc=id%20;
-    int r=clamp(dy+dr, 0, rows-1);
-    int c=clamp(dx+dc, 0, cols-1);
-
-    data[dr][dc] = source[r*srcStep + c];
-    r=clamp(dy+dr+10, 0, rows-1);
-    data[dr+10][dc] = source[r*srcStep + c];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int x =get_local_id(0);
-    int y =get_local_id(1);
-    uchar p0=data[y][x], p1=data[y][x+1], p2=data[y][x+2], p3=data[y][x+3], p4=data[y][x+4];
-    uchar p5=data[y+1][x], p6=data[y+1][x+1], p7=data[y+1][x+2], p8=data[y+1][x+3], p9=data[y+1][x+4];
-    uchar p10=data[y+2][x], p11=data[y+2][x+1], p12=data[y+2][x+2], p13=data[y+2][x+3], p14=data[y+2][x+4];
-    uchar p15=data[y+3][x], p16=data[y+3][x+1], p17=data[y+3][x+2], p18=data[y+3][x+3], p19=data[y+3][x+4];
-    uchar p20=data[y+4][x], p21=data[y+4][x+1], p22=data[y+4][x+2], p23=data[y+4][x+3], p24=data[y+4][x+4];
-    uchar mid;
-
-    op(p1, p2); op(p0, p1); op(p1, p2); op(p4, p5); op(p3, p4);
-    op(p4, p5); op(p0, p3); op(p2, p5); op(p2, p3); op(p1, p4);
-    op(p1, p2); op(p3, p4); op(p7, p8); op(p6, p7); op(p7, p8);
-    op(p10, p11); op(p9, p10); op(p10, p11); op(p6, p9); op(p8, p11);
-    op(p8, p9); op(p7, p10); op(p7, p8); op(p9, p10); op(p0, p6);
-    op(p4, p10); op(p4, p6); op(p2, p8); op(p2, p4); op(p6, p8);
-    op(p1, p7); op(p5, p11); op(p5, p7); op(p3, p9); op(p3, p5);
-    op(p7, p9); op(p1, p2); op(p3, p4); op(p5, p6); op(p7, p8);
-    op(p9, p10); op(p13, p14); op(p12, p13); op(p13, p14); op(p16, p17);
-    op(p15, p16); op(p16, p17); op(p12, p15); op(p14, p17); op(p14, p15);
-    op(p13, p16); op(p13, p14); op(p15, p16); op(p19, p20); op(p18, p19);
-    op(p19, p20); op(p21, p22); op(p23, p24); op(p21, p23); op(p22, p24);
-    op(p22, p23); op(p18, p21); op(p20, p23); op(p20, p21); op(p19, p22);
-    op(p22, p24); op(p19, p20); op(p21, p22); op(p23, p24); op(p12, p18);
-    op(p16, p22); op(p16, p18); op(p14, p20); op(p20, p24); op(p14, p16);
-    op(p18, p20); op(p22, p24); op(p13, p19); op(p17, p23); op(p17, p19);
-    op(p15, p21); op(p15, p17); op(p19, p21); op(p13, p14); op(p15, p16);
-    op(p17, p18); op(p19, p20); op(p21, p22); op(p23, p24); op(p0, p12);
-    op(p8, p20); op(p8, p12); op(p4, p16); op(p16, p24); op(p12, p16);
-    op(p2, p14); op(p10, p22); op(p10, p14); op(p6, p18); op(p6, p10);
-    op(p10, p12); op(p1, p13); op(p9, p21); op(p9, p13); op(p5, p17);
-    op(p13, p17); op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
-    op(p7, p11); op(p11, p13); op(p11, p12);
-
-    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
-}
-#undef op
-
-#define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
-__kernel void medianFilter5_C4_D5(__global float4 * src, __global float4 * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep)
-{
-
-    __local float4 data[20][20];
-    __global float4* source=src + srcOffset;
-
-    int dx = get_global_id(0) - get_local_id(0) -2;
-    int dy = get_global_id(1) - get_local_id(1) -2;
-
-    const int id = min((int)(get_local_id(0)*16+get_local_id(1)), 10*20-1);
-
-    int dr=id/20;
-    int dc=id%20;
-    int r=clamp(dy+dr, 0, rows-1);
-    int c=clamp(dx+dc, 0, cols-1);
-
-    data[dr][dc] = source[r*srcStep + c];
-    r=clamp(dy+dr+10, 0, rows-1);
-    data[dr+10][dc] = source[r*srcStep + c];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int x =get_local_id(0);
-    int y =get_local_id(1);
-    float4 p0=data[y][x], p1=data[y][x+1], p2=data[y][x+2], p3=data[y][x+3], p4=data[y][x+4];
-    float4 p5=data[y+1][x], p6=data[y+1][x+1], p7=data[y+1][x+2], p8=data[y+1][x+3], p9=data[y+1][x+4];
-    float4 p10=data[y+2][x], p11=data[y+2][x+1], p12=data[y+2][x+2], p13=data[y+2][x+3], p14=data[y+2][x+4];
-    float4 p15=data[y+3][x], p16=data[y+3][x+1], p17=data[y+3][x+2], p18=data[y+3][x+3], p19=data[y+3][x+4];
-    float4 p20=data[y+4][x], p21=data[y+4][x+1], p22=data[y+4][x+2], p23=data[y+4][x+3], p24=data[y+4][x+4];
-    float4 mid;
-
-    op(p1, p2); op(p0, p1); op(p1, p2); op(p4, p5); op(p3, p4);
-    op(p4, p5); op(p0, p3); op(p2, p5); op(p2, p3); op(p1, p4);
-    op(p1, p2); op(p3, p4); op(p7, p8); op(p6, p7); op(p7, p8);
-    op(p10, p11); op(p9, p10); op(p10, p11); op(p6, p9); op(p8, p11);
-    op(p8, p9); op(p7, p10); op(p7, p8); op(p9, p10); op(p0, p6);
-    op(p4, p10); op(p4, p6); op(p2, p8); op(p2, p4); op(p6, p8);
-    op(p1, p7); op(p5, p11); op(p5, p7); op(p3, p9); op(p3, p5);
-    op(p7, p9); op(p1, p2); op(p3, p4); op(p5, p6); op(p7, p8);
-    op(p9, p10); op(p13, p14); op(p12, p13); op(p13, p14); op(p16, p17);
-    op(p15, p16); op(p16, p17); op(p12, p15); op(p14, p17); op(p14, p15);
-    op(p13, p16); op(p13, p14); op(p15, p16); op(p19, p20); op(p18, p19);
-    op(p19, p20); op(p21, p22); op(p23, p24); op(p21, p23); op(p22, p24);
-    op(p22, p23); op(p18, p21); op(p20, p23); op(p20, p21); op(p19, p22);
-    op(p22, p24); op(p19, p20); op(p21, p22); op(p23, p24); op(p12, p18);
-    op(p16, p22); op(p16, p18); op(p14, p20); op(p20, p24); op(p14, p16);
-    op(p18, p20); op(p22, p24); op(p13, p19); op(p17, p23); op(p17, p19);
-    op(p15, p21); op(p15, p17); op(p19, p21); op(p13, p14); op(p15, p16);
-    op(p17, p18); op(p19, p20); op(p21, p22); op(p23, p24); op(p0, p12);
-    op(p8, p20); op(p8, p12); op(p4, p16); op(p16, p24); op(p12, p16);
-    op(p2, p14); op(p10, p22); op(p10, p14); op(p6, p18); op(p6, p10);
-    op(p10, p12); op(p1, p13); op(p9, p21); op(p9, p13); op(p5, p17);
-    op(p13, p17); op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
-    op(p7, p11); op(p11, p13); op(p11, p12);
-
-    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
-}
-#undef op
-
-#define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
-__kernel void medianFilter5_C1_D5(__global float * src, __global float * dst,  int srcOffset, int dstOffset, int cols,
-                                int rows, int srcStep, int dstStep)
-{
-
-    __local float data[20][20];
-    __global float* source=src + srcOffset;
-
-    int dx = get_global_id(0) - get_local_id(0) -2;
-    int dy = get_global_id(1) - get_local_id(1) -2;
-
-    const int id = min((int)(get_local_id(0)*16+get_local_id(1)), 10*20-1);
-
-    int dr=id/20;
-    int dc=id%20;
-    int r=clamp(dy+dr, 0, rows-1);
-    int c=clamp(dx+dc, 0, cols-1);
-
-    data[dr][dc] = source[r*srcStep + c];
-    r=clamp(dy+dr+10, 0, rows-1);
-    data[dr+10][dc] = source[r*srcStep + c];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int x =get_local_id(0);
-    int y =get_local_id(1);
-    float p0=data[y][x], p1=data[y][x+1], p2=data[y][x+2], p3=data[y][x+3], p4=data[y][x+4];
-    float p5=data[y+1][x], p6=data[y+1][x+1], p7=data[y+1][x+2], p8=data[y+1][x+3], p9=data[y+1][x+4];
-    float p10=data[y+2][x], p11=data[y+2][x+1], p12=data[y+2][x+2], p13=data[y+2][x+3], p14=data[y+2][x+4];
-    float p15=data[y+3][x], p16=data[y+3][x+1], p17=data[y+3][x+2], p18=data[y+3][x+3], p19=data[y+3][x+4];
-    float p20=data[y+4][x], p21=data[y+4][x+1], p22=data[y+4][x+2], p23=data[y+4][x+3], p24=data[y+4][x+4];
-    float mid;
-
-    op(p1, p2); op(p0, p1); op(p1, p2); op(p4, p5); op(p3, p4);
-    op(p4, p5); op(p0, p3); op(p2, p5); op(p2, p3); op(p1, p4);
-    op(p1, p2); op(p3, p4); op(p7, p8); op(p6, p7); op(p7, p8);
-    op(p10, p11); op(p9, p10); op(p10, p11); op(p6, p9); op(p8, p11);
-    op(p8, p9); op(p7, p10); op(p7, p8); op(p9, p10); op(p0, p6);
-    op(p4, p10); op(p4, p6); op(p2, p8); op(p2, p4); op(p6, p8);
-    op(p1, p7); op(p5, p11); op(p5, p7); op(p3, p9); op(p3, p5);
-    op(p7, p9); op(p1, p2); op(p3, p4); op(p5, p6); op(p7, p8);
-    op(p9, p10); op(p13, p14); op(p12, p13); op(p13, p14); op(p16, p17);
-    op(p15, p16); op(p16, p17); op(p12, p15); op(p14, p17); op(p14, p15);
-    op(p13, p16); op(p13, p14); op(p15, p16); op(p19, p20); op(p18, p19);
-    op(p19, p20); op(p21, p22); op(p23, p24); op(p21, p23); op(p22, p24);
-    op(p22, p23); op(p18, p21); op(p20, p23); op(p20, p21); op(p19, p22);
-    op(p22, p24); op(p19, p20); op(p21, p22); op(p23, p24); op(p12, p18);
-    op(p16, p22); op(p16, p18); op(p14, p20); op(p20, p24); op(p14, p16);
-    op(p18, p20); op(p22, p24); op(p13, p19); op(p17, p23); op(p17, p19);
-    op(p15, p21); op(p15, p17); op(p19, p21); op(p13, p14); op(p15, p16);
-    op(p17, p18); op(p19, p20); op(p21, p22); op(p23, p24); op(p0, p12);
-    op(p8, p20); op(p8, p12); op(p4, p16); op(p16, p24); op(p12, p16);
-    op(p2, p14); op(p10, p22); op(p10, p14); op(p6, p18); op(p6, p10);
-    op(p10, p12); op(p1, p13); op(p9, p21); op(p9, p13); op(p5, p17);
-    op(p13, p17); op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
-    op(p7, p11); op(p11, p13); op(p11, p12);
-
-    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
-        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
-}
-#undef op
diff --git a/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl b/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl
deleted file mode 100644
index 86d4e5d..0000000
--- a/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl
+++ /dev/null
@@ -1,96 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the uintel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business uinterruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-typedef float2 cfloat;
-inline cfloat cmulf(cfloat a, cfloat b)
-{
-    return (cfloat)( a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
-}
-
-inline cfloat conjf(cfloat a)
-{
-    return (cfloat)( a.x, - a.y );
-}
-
-__kernel void
-mulAndScaleSpectrumsKernel(
-    __global const cfloat* a,
-    __global const cfloat* b,
-    float scale,
-    __global cfloat* dst,
-    uint cols,
-    uint rows,
-    uint mstep
-)
-{
-    const uint x = get_global_id(0);
-    const uint y = get_global_id(1);
-    const uint idx = mad24(y, mstep / sizeof(cfloat), x);
-    if (x < cols && y < rows)
-    {
-        cfloat v = cmulf(a[idx], b[idx]);
-        dst[idx] = (cfloat)( v.x * scale, v.y * scale );
-    }
-}
-__kernel void
-mulAndScaleSpectrumsKernel_CONJ(
-    __global const cfloat* a,
-    __global const cfloat* b,
-    float scale,
-    __global cfloat* dst,
-    uint cols,
-    uint rows,
-    uint mstep
-)
-{
-    const uint x = get_global_id(0);
-    const uint y = get_global_id(1);
-    const uint idx = mad24(y, mstep / sizeof(cfloat), x);
-    if (x < cols && y < rows)
-    {
-        cfloat v = cmulf(a[idx], conjf(b[idx]));
-        dst[idx] = (cfloat)( v.x * scale, v.y * scale );
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl
deleted file mode 100644
index e1e3ca8..0000000
--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ /dev/null
@@ -1,408 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Wu Zailong, bullet@yeah.net
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-enum
-{
-    INTER_BITS = 5,
-    INTER_TAB_SIZE = 1 << INTER_BITS,
-    INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE
-};
-
-#ifdef INTER_NEAREST
-#define convertToWT
-#endif
-
-#ifdef BORDER_CONSTANT
-#define EXTRAPOLATE(v2, v) v = scalar;
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(v2, v) \
-    { \
-        v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), (int2)(0)); \
-        v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(v2, v) \
-    { \
-        if (v2.x < 0) \
-            v2.x -= ((v2.x - src_cols + 1) / src_cols) * src_cols; \
-        if (v2.x >= src_cols) \
-            v2.x %= src_cols; \
-        \
-        if (v2.y < 0) \
-            v2.y -= ((v2.y - src_rows + 1) / src_rows) * src_rows; \
-        if( v2.y >= src_rows ) \
-            v2.y %= src_rows; \
-        v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
-#ifdef BORDER_REFLECT
-#define DELTA int delta = 0
-#else
-#define DELTA int delta = 1
-#endif
-#define EXTRAPOLATE(v2, v) \
-    { \
-        DELTA; \
-        if (src_cols == 1) \
-            v2.x = 0; \
-        else \
-            do \
-            { \
-                if( v2.x < 0 ) \
-                    v2.x = -v2.x - 1 + delta; \
-                else \
-                    v2.x = src_cols - 1 - (v2.x - src_cols) - delta; \
-            } \
-            while (v2.x >= src_cols || v2.x < 0); \
-        \
-        if (src_rows == 1) \
-            v2.y = 0; \
-        else \
-            do \
-            { \
-                if( v2.y < 0 ) \
-                    v2.y = -v2.y - 1 + delta; \
-                else \
-                    v2.y = src_rows - 1 - (v2.y - src_rows) - delta; \
-            } \
-            while (v2.y >= src_rows || v2.y < 0); \
-        v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \
-    }
-#else
-#error No extrapolation method
-#endif
-
-#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
-
-#ifdef INTER_NEAREST
-
-__kernel void remap_2_32FC1(__global const T * restrict src, __global T * dst,
-        __global float * map1, __global float * map2,
-        int src_offset, int dst_offset, int map1_offset, int map2_offset,
-        int src_step, int dst_step, int map1_step, int map2_step,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        int dstIdx = mad24(y, dst_step, x + dst_offset);
-        int map1Idx = mad24(y, map1_step, x + map1_offset);
-        int map2Idx = mad24(y, map2_step, x + map2_offset);
-
-        int gx = convert_int_sat_rte(map1[map1Idx]);
-        int gy = convert_int_sat_rte(map2[map2Idx]);
-
-        if (NEED_EXTRAPOLATION(gx, gy))
-        {
-#ifndef BORDER_CONSTANT
-            int2 gxy = (int2)(gx, gy);
-#endif
-            EXTRAPOLATE(gxy, dst[dstIdx]);
-        }
-        else
-        {
-            int srcIdx = mad24(gy, src_step, gx + src_offset);
-            dst[dstIdx] = src[srcIdx];
-        }
-    }
-}
-
-__kernel void remap_32FC2(__global const T * restrict src, __global T * dst, __global float2 * map1,
-        int src_offset, int dst_offset, int map1_offset,
-        int src_step, int dst_step, int map1_step,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        int dstIdx = mad24(y, dst_step, x + dst_offset);
-        int map1Idx = mad24(y, map1_step, x + map1_offset);
-
-        int2 gxy = convert_int2_sat_rte(map1[map1Idx]);
-        int gx = gxy.x, gy = gxy.y;
-
-        if (NEED_EXTRAPOLATION(gx, gy))
-            EXTRAPOLATE(gxy, dst[dstIdx])
-        else
-        {
-            int srcIdx = mad24(gy, src_step, gx + src_offset);
-            dst[dstIdx] = src[srcIdx];
-        }
-    }
-}
-
-__kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __global short2 * map1,
-        int src_offset, int dst_offset, int map1_offset,
-        int src_step, int dst_step, int map1_step,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        int dstIdx = mad24(y, dst_step, x + dst_offset);
-        int map1Idx = mad24(y, map1_step, x + map1_offset);
-
-        int2 gxy = convert_int2(map1[map1Idx]);
-        int gx = gxy.x, gy = gxy.y;
-
-        if (NEED_EXTRAPOLATION(gx, gy))
-            EXTRAPOLATE(gxy, dst[dstIdx])
-        else
-        {
-            int srcIdx = mad24(gy, src_step, gx + src_offset);
-            dst[dstIdx] = src[srcIdx];
-        }
-    }
-}
-
-__kernel void remap_16SC2_16UC1(__global const T * restrict src, __global T * dst, __global short2 * map1, __global ushort * map2,
-        int src_offset, int dst_offset, int map1_offset, int map2_offset,
-        int src_step, int dst_step, int map1_step, int map2_step,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        int dstIdx = mad24(y, dst_step, x + dst_offset);
-        int map1Idx = mad24(y, map1_step, x + map1_offset);
-        int map2Idx = mad24(y, map2_step, x + map2_offset);
-
-        int map2Value = convert_int(map2[map2Idx]) & (INTER_TAB_SIZE2 - 1);
-        int dx = (map2Value & (INTER_TAB_SIZE - 1)) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
-        int dy = (map2Value >> INTER_BITS) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
-        int2 gxy = convert_int2(map1[map1Idx]) + (int2)(dx, dy);
-        int gx = gxy.x, gy = gxy.y;
-
-        if (NEED_EXTRAPOLATION(gx, gy))
-            EXTRAPOLATE(gxy, dst[dstIdx])
-        else
-        {
-            int srcIdx = mad24(gy, src_step, gx + src_offset);
-            dst[dstIdx] = src[srcIdx];
-        }
-    }
-}
-
-#elif INTER_LINEAR
-
-__kernel void remap_16SC2_16UC1(__global T const * restrict src, __global T * dst,
-        __global short2 * restrict map1, __global ushort * restrict map2,
-        int src_offset, int dst_offset, int map1_offset, int map2_offset,
-        int src_step, int dst_step, int map1_step, int map2_step,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        int dstIdx = mad24(y, dst_step, x + dst_offset);
-        int map1Idx = mad24(y, map1_step, x + map1_offset);
-        int map2Idx = mad24(y, map2_step, x + map2_offset);
-
-        int2 map_dataA = convert_int2(map1[map1Idx]);
-        int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
-        int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
-        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
-
-        ushort map2Value = (ushort)(map2[map2Idx] & (INTER_TAB_SIZE2 - 1));
-        WT2 u = (WT2)(map2Value & (INTER_TAB_SIZE - 1), map2Value >> INTER_BITS) / (WT2)(INTER_TAB_SIZE);
-
-        WT scalar = convertToWT(nVal);
-        WT a = scalar, b = scalar, c = scalar, d = scalar;
-
-        if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
-            a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataA, a);
-
-        if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
-            b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataB, b);
-
-        if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
-            c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataC, c);
-
-        if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
-            d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataD, d);
-
-        WT dst_data = a * (1 - u.x) * (1 - u.y) +
-                      b * (u.x)     * (1 - u.y) +
-                      c * (1 - u.x) * (u.y) +
-                      d * (u.x)     * (u.y);
-        dst[dstIdx] = convertToT(dst_data);
-    }
-}
-
-__kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
-        __global float * map1, __global float * map2,
-        int src_offset, int dst_offset, int map1_offset, int map2_offset,
-        int src_step, int dst_step, int map1_step, int map2_step,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        int dstIdx = mad24(y, dst_step, x + dst_offset);
-        int map1Idx = mad24(y, map1_step, x + map1_offset);
-        int map2Idx = mad24(y, map2_step, x + map2_offset);
-
-        float2 map_data = (float2)(map1[map1Idx], map2[map2Idx]);
-
-        int2 map_dataA = convert_int2_sat_rtn(map_data);
-        int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
-        int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
-        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
-
-        float2 _u = map_data - convert_float2(map_dataA);
-        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
-        WT scalar = convertToWT(nVal);
-        WT a = scalar, b = scalar, c = scalar, d = scalar;
-
-        if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
-            a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataA, a);
-
-        if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
-            b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataB, b);
-
-        if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
-            c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataC, c);
-
-        if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
-            d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataD, d);
-
-        WT dst_data = a * (1 - u.x) * (1 - u.y) +
-                      b * (u.x)     * (1 - u.y) +
-                      c * (1 - u.x) * (u.y) +
-                      d * (u.x)     * (u.y);
-        dst[dstIdx] = convertToT(dst_data);
-    }
-}
-
-__kernel void remap_32FC2(__global T const * restrict  src, __global T * dst,
-        __global float2 * map1,
-        int src_offset, int dst_offset, int map1_offset,
-        int src_step, int dst_step, int map1_step,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        int dstIdx = mad24(y, dst_step, x + dst_offset);
-        int map1Idx = mad24(y, map1_step, x + map1_offset);
-
-        float2 map_data = map1[map1Idx];
-        int2 map_dataA = convert_int2_sat_rtn(map_data);
-        int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
-        int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
-        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
-
-        float2 _u = map_data - convert_float2(map_dataA);
-        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
-        WT scalar = convertToWT(nVal);
-        WT a = scalar, b = scalar, c = scalar, d = scalar;
-
-        if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
-            a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataA, a);
-
-        if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
-            b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataB, b);
-
-        if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
-            c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataC, c);
-
-        if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
-            d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]);
-        else
-            EXTRAPOLATE(map_dataD, d);
-
-        WT dst_data = a * (1 - u.x) * (1 - u.y) +
-                      b * (u.x)     * (1 - u.y) +
-                      c * (1 - u.x) * (u.y) +
-                      d * (u.x)     * (u.y);
-        dst[dstIdx] = convertToT(dst_data);
-    }
-}
-
-#endif
diff --git a/modules/ocl/src/opencl/imgproc_resize.cl b/modules/ocl/src/opencl/imgproc_resize.cl
deleted file mode 100644
index ebf8c71..0000000
--- a/modules/ocl/src/opencl/imgproc_resize.cl
+++ /dev/null
@@ -1,405 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Zhang Ying, zhangying913@gmail.com
-//	  Niko Li, newlife20080214@gmail.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-// resize kernel
-// Currently, CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 are supported.
-// We shall support other types later if necessary.
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#define INTER_RESIZE_COEF_BITS 11
-#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
-#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
-#define CAST_SCALE (1.0f/(1<<CAST_BITS))
-#define INC(x,l) ((x+1) >= (l) ? (x):((x)+1))
-
-#ifdef LN
-
-__kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restrict src,
-                     int dst_offset, int src_offset,int dst_step, int src_step,
-                     int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
-{
-    int gx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    float4  sx, u, xf;
-    int4 x, DX;
-    gx = (gx<<2) - (dst_offset&3);
-    DX = (int4)(gx, gx+1, gx+2, gx+3);
-    sx = (convert_float4(DX) + 0.5f) * ifx - 0.5f;
-    xf = floor(sx);
-    x = convert_int4(xf);
-    u = sx - xf;
-    float sy = ((dy+0.5f) * ify - 0.5f);
-    int y = floor(sy);
-    float v = sy - y;
-
-    u = x < 0 ? 0 : u;
-    u = (x >= src_cols) ? 0 : u;
-    x = x < 0 ? 0 : x;
-    x = (x >= src_cols) ? src_cols-1 : x;
-
-    y<0 ? y=0,v=0 : y;
-    y>=src_rows ? y=src_rows-1,v=0 : y;
-
-    int4 U, U1;
-    int V, V1;
-    float4 utmp1, utmp2;
-    float vtmp;
-    float4 scale_vec = INTER_RESIZE_COEF_SCALE;
-    utmp1 = u * scale_vec;
-    utmp2 = scale_vec - utmp1;
-    U = convert_int4(rint(utmp1));
-    U1 = convert_int4(rint(utmp2));
-    vtmp = v * INTER_RESIZE_COEF_SCALE;
-    V = rint(vtmp);
-    V1= rint(INTER_RESIZE_COEF_SCALE - vtmp);
-
-    int y_ = INC(y,src_rows);
-    int4 x_;
-    x_ =  ((x+1 >= src_cols) != 0) ? x : x+1;
-
-    int4 val1, val2, val;
-    int4 sdata1, sdata2, sdata3, sdata4;
-
-    int4 pos1 = mad24((int4)y, (int4)src_step, x+(int4)src_offset);
-    int4 pos2 = mad24((int4)y, (int4)src_step, x_+(int4)src_offset);
-    int4 pos3 = mad24((int4)y_, (int4)src_step, x+(int4)src_offset);
-    int4 pos4 = mad24((int4)y_, (int4)src_step, x_+(int4)src_offset);
-
-    sdata1.s0 = src[pos1.s0];
-    sdata1.s1 = src[pos1.s1];
-    sdata1.s2 = src[pos1.s2];
-    sdata1.s3 = src[pos1.s3];
-
-    sdata2.s0 = src[pos2.s0];
-    sdata2.s1 = src[pos2.s1];
-    sdata2.s2 = src[pos2.s2];
-    sdata2.s3 = src[pos2.s3];
-
-    sdata3.s0 = src[pos3.s0];
-    sdata3.s1 = src[pos3.s1];
-    sdata3.s2 = src[pos3.s2];
-    sdata3.s3 = src[pos3.s3];
-
-    sdata4.s0 = src[pos4.s0];
-    sdata4.s1 = src[pos4.s1];
-    sdata4.s2 = src[pos4.s2];
-    sdata4.s3 = src[pos4.s3];
-
-    val1 = mul24(U1 , sdata1) + mul24(U , sdata2);
-    val2 = mul24(U1 , sdata3) + mul24(U , sdata4);
-    val = mul24((int4)V1 , val1) + mul24((int4)V , val2);
-
-    val = ((val + (1<<(CAST_BITS-1))) >> CAST_BITS);
-
-    pos4 = mad24(dy, dst_step, gx+dst_offset);
-    pos4.y++;
-    pos4.z+=2;
-    pos4.w+=3;
-    uchar4 uval = convert_uchar4_sat(val);
-        int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dst_offset&3)==0);
-    if(con)
-    {
-        *(__global uchar4*)(dst + pos4.x)=uval;
-    }
-    else
-    {
-        if(gx >= 0 && gx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            dst[pos4.x]=uval.x;
-        }
-        if(gx+1 >= 0 && gx+1 < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            dst[pos4.y]=uval.y;
-        }
-        if(gx+2 >= 0 && gx+2 < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            dst[pos4.z]=uval.z;
-        }
-        if(gx+3 >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            dst[pos4.w]=uval.w;
-        }
-    }
-}
-
-__kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
-                     int dst_offset, int src_offset,int dst_step, int src_step,
-                     int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
-    int x = floor(sx), y = floor(sy);
-    float u = sx - x, v = sy - y;
-
-    if ( x<0 ) x=0,u=0;
-    if ( x>=src_cols ) x=src_cols-1,u=0;
-    if ( y<0 ) y=0,v=0;
-    if (y>=src_rows ) y=src_rows-1,v=0;
-
-    u = u * INTER_RESIZE_COEF_SCALE;
-    v = v * INTER_RESIZE_COEF_SCALE;
-
-    int U = rint(u);
-    int V = rint(v);
-    int U1= rint(INTER_RESIZE_COEF_SCALE - u);
-    int V1= rint(INTER_RESIZE_COEF_SCALE - v);
-
-    int y_ = INC(y,src_rows);
-    int x_ = INC(x,src_cols);
-    int4 srcpos;
-    srcpos.x = mad24(y, src_step, x+src_offset);
-    srcpos.y = mad24(y, src_step, x_+src_offset);
-    srcpos.z = mad24(y_, src_step, x+src_offset);
-    srcpos.w = mad24(y_, src_step, x_+src_offset);
-    int4 data0 = convert_int4(src[srcpos.x]);
-    int4 data1 = convert_int4(src[srcpos.y]);
-    int4 data2 = convert_int4(src[srcpos.z]);
-    int4 data3 = convert_int4(src[srcpos.w]);
-    int4 val = mul24((int4)mul24(U1, V1) ,  data0) + mul24((int4)mul24(U, V1) ,  data1)
-               +mul24((int4)mul24(U1, V) ,  data2)+mul24((int4)mul24(U, V) ,  data3);
-    int dstpos = mad24(dy, dst_step, dx+dst_offset);
-    uchar4 uval =   convert_uchar4((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
-    if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
-         dst[dstpos] = uval;
-}
-
-__kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
-                     int dst_offset, int src_offset,int dst_step, int src_step,
-                     int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
-    int x = floor(sx), y = floor(sy);
-    float u = sx - x, v = sy - y;
-
-    if ( x<0 ) x=0,u=0;
-    if ( x>=src_cols ) x=src_cols-1,u=0;
-    if ( y<0 ) y=0,v=0;
-    if (y>=src_rows ) y=src_rows-1,v=0;
-
-    int y_ = INC(y,src_rows);
-    int x_ = INC(x,src_cols);
-    float u1 = 1.f-u;
-    float v1 = 1.f-v;
-    int4 srcpos;
-    srcpos.x = mad24(y, src_step, x+src_offset);
-    srcpos.y = mad24(y, src_step, x_+src_offset);
-    srcpos.z = mad24(y_, src_step, x+src_offset);
-    srcpos.w = mad24(y_, src_step, x_+src_offset);
-    float data0 = src[srcpos.x];
-    float data1 = src[srcpos.y];
-    float data2 = src[srcpos.z];
-    float data3 = src[srcpos.w];
-    float val1 = u1 *  data0 +
-                u  *  data1 ;
-    float val2 = u1 *  data2 +
-                u *  data3;
-    float val = v1 * val1 + v * val2;
-    int dstpos = mad24(dy, dst_step, dx+dst_offset);
-    if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
-         dst[dstpos] = val;
-}
-
-__kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
-                     int dst_offset, int src_offset,int dst_step, int src_step,
-                     int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
-    int x = floor(sx), y = floor(sy);
-    float u = sx - x, v = sy - y;
-
-    if ( x<0 ) x=0,u=0;
-    if ( x>=src_cols ) x=src_cols-1,u=0;
-    if ( y<0 ) y=0,v=0;
-    if (y>=src_rows ) y=src_rows-1,v=0;
-
-    int y_ = INC(y,src_rows);
-    int x_ = INC(x,src_cols);
-    float u1 = 1.f-u;
-    float v1 = 1.f-v;
-    int4 srcpos;
-    srcpos.x = mad24(y, src_step, x+src_offset);
-    srcpos.y = mad24(y, src_step, x_+src_offset);
-    srcpos.z = mad24(y_, src_step, x+src_offset);
-    srcpos.w = mad24(y_, src_step, x_+src_offset);
-    float4 s_data1, s_data2, s_data3, s_data4;
-    s_data1 = src[srcpos.x];
-    s_data2 = src[srcpos.y];
-    s_data3 = src[srcpos.z];
-    s_data4 = src[srcpos.w];
-    float4 val = u1 * v1 * s_data1 + u * v1 * s_data2
-              +u1 * v *s_data3 + u * v *s_data4;
-    int dstpos = mad24(dy, dst_step, dx+dst_offset);
-
-    if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
-         dst[dstpos] = val;
-}
-
-#elif defined NN
-
-__kernel void resizeNN(__global T * dst, __global T * src,
-                       int dst_offset, int src_offset, int dst_step, int src_step,
-                       int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify)
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if (dx < dst_cols && dy < dst_rows)
-    {
-        float s1 = dx * ifx, s2 = dy * ify;
-        int sx = min(convert_int_sat_rtn(s1), src_cols - 1);
-        int sy = min(convert_int_sat_rtn(s2), src_rows - 1);
-
-        int dst_index = mad24(dy, dst_step, dx + dst_offset);
-        int src_index = mad24(sy, src_step, sx + src_offset);
-
-        dst[dst_index] = src[src_index];
-    }
-}
-
-#elif defined AREA
-
-#ifdef AREA_FAST
-
-__kernel void resizeAREA_FAST(__global T * dst, __global T * src,
-                         int dst_offset, int src_offset, int dst_step, int src_step,
-                         int src_cols, int src_rows, int dst_cols, int dst_rows, WT ifx, WT ify,
-                         __global const int * dmap_tab, __global const int * smap_tab)
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if (dx < dst_cols && dy < dst_rows)
-    {
-        int dst_index = mad24(dy, dst_step, dst_offset + dx);
-
-        __global const int * xmap_tab = dmap_tab;
-        __global const int * ymap_tab = dmap_tab + dst_cols;
-        __global const int * sxmap_tab = smap_tab;
-        __global const int * symap_tab = smap_tab + XSCALE * dst_cols;
-
-        int sx = xmap_tab[dx], sy = ymap_tab[dy];
-        WTV sum = (WTV)(0);
-
-        #pragma unroll
-        for (int y = 0; y < YSCALE; ++y)
-        {
-            int src_index = mad24(symap_tab[y + sy], src_step, src_offset);
-            #pragma unroll
-            for (int x = 0; x < XSCALE; ++x)
-                sum += convertToWTV(src[src_index + sxmap_tab[sx + x]]);
-        }
-
-        dst[dst_index] = convertToT(convertToWT2V(sum) * (WT2V)(SCALE));
-    }
-}
-
-#else
-
-__kernel void resizeAREA(__global T * dst, __global T * src,
-                         int dst_offset, int src_offset, int dst_step, int src_step,
-                         int src_cols, int src_rows, int dst_cols, int dst_rows, WT ifx, WT ify,
-                         __global const int * ofs_tab, __global const int * map_tab,
-                         __global const float * alpha_tab)
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if (dx < dst_cols && dy < dst_rows)
-    {
-        int dst_index = mad24(dy, dst_step, dst_offset + dx);
-
-        __global const int * xmap_tab = map_tab;
-        __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
-        __global const float * xalpha_tab = alpha_tab;
-        __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
-        __global const int * xofs_tab = ofs_tab;
-        __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
-
-        int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
-        int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
-
-        int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
-        int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
-
-        WTV sum = (WTV)(0), buf;
-        int src_index = mad24(sy0, src_step, src_offset);
-
-        for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
-        {
-            WTV beta = (WTV)(yalpha_tab[yk]);
-            buf = (WTV)(0);
-
-            for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
-            {
-                WTV alpha = (WTV)(xalpha_tab[xk]);
-                buf += convertToWTV(src[src_index + sx]) * alpha;
-            }
-            sum += buf * beta;
-        }
-
-        dst[dst_index] = convertToT(sum);
-    }
-}
-
-#endif
-
-#endif
diff --git a/modules/ocl/src/opencl/imgproc_sobel3.cl b/modules/ocl/src/opencl/imgproc_sobel3.cl
deleted file mode 100644
index 8356fce..0000000
--- a/modules/ocl/src/opencl/imgproc_sobel3.cl
+++ /dev/null
@@ -1,347 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////Macro for border type////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef BORDER_CONSTANT
-//CCCCCC|abcdefgh|CCCCCCC
-#define EXTRAPOLATE(x, maxV)
-#elif defined BORDER_REPLICATE
-//aaaaaa|abcdefgh|hhhhhhh
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        (x) = max(min((x), (maxV) - 1), 0); \
-    }
-#elif defined BORDER_WRAP
-//cdefgh|abcdefgh|abcdefg
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        (x) = ( (x) + (maxV) ) % (maxV); \
-    }
-#elif defined BORDER_REFLECT
-//fedcba|abcdefgh|hgfedcb
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        (x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \
-    }
-#elif defined BORDER_REFLECT_101
-//gfedcb|abcdefgh|gfedcba
-#define EXTRAPOLATE(x, maxV) \
-    { \
-        (x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \
-    }
-#else
-#error No extrapolation method
-#endif
-
-#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
-
-#ifdef BORDER_CONSTANT
-//CCCCCC|abcdefgh|CCCCCCC
-#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
-#else
-#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
-#endif
-
-#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
-#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
-
-#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
-    int srcX = x + srcOffsetX - (kernel_border); \
-    int srcY = y + srcOffsetY - (kernel_border); \
-    int xb = srcX; \
-    int yb = srcY; \
-    \
-    EXTRAPOLATE(xb, (width)); \
-    EXTRAPOLATE(yb, (height)); \
-    lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
-    \
-    if(lix < ((kernel_border)*2)) \
-    { \
-        int xb = srcX+BLK_X; \
-        EXTRAPOLATE(xb,(width)); \
-        lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
-    } \
-    if(liy< ((kernel_border)*2)) \
-    { \
-        int yb = srcY+BLK_Y; \
-        EXTRAPOLATE(yb, (height)); \
-        lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
-    } \
-    if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
-    { \
-        int xb = srcX+BLK_X; \
-        int yb = srcY+BLK_Y; \
-        EXTRAPOLATE(xb,(width)); \
-        EXTRAPOLATE(yb,(height)); \
-        lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
-    }
-
-__kernel void sobel3(
-        __global uchar* Src,
-        const uint      SrcPitch,
-        const int       srcOffsetX,
-        const int       srcOffsetY,
-        __global uchar* DstX,
-        const int       DstXOffset,
-        const uint      DstXPitch,
-        __global uchar* DstY,
-        const int       DstYOffset,
-        const uint      DstYPitch,
-        int             width,
-        int             height,
-        int             dstWidth,
-        int             dstHeight,
-        float           scale
-        )
-{
-    __local float lsmem[BLK_Y+2][BLK_X+2];
-
-    int lix = get_local_id(0);
-    int liy = get_local_id(1);
-
-    int x = (int)get_global_id(0);
-    int y = (int)get_global_id(1);
-
-    INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if( x >= dstWidth || y >=dstHeight )  return;
-
-    float u1 = lsmem[liy][lix];
-    float u2 = lsmem[liy][lix+1];
-    float u3 = lsmem[liy][lix+2];
-
-    float m1 = lsmem[liy+1][lix];
-    float m3 = lsmem[liy+1][lix+2];
-
-    float b1 = lsmem[liy+2][lix];
-    float b2 = lsmem[liy+2][lix+1];
-    float b3 = lsmem[liy+2][lix+2];
-
-    //calc and store dx and dy;//
-#ifdef SCHARR
-    DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
-    DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
-#else
-    DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
-    DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
-#endif
-}
-
-__kernel void sobel5(
-        __global uchar* Src,
-        const uint      SrcPitch,
-        const int       srcOffsetX,
-        const int       srcOffsetY,
-        __global uchar* DstX,
-        const int       DstXOffset,
-        const uint      DstXPitch,
-        __global uchar* DstY,
-        const int       DstYOffset,
-        const uint      DstYPitch,
-        int             width,
-        int             height,
-        int             dstWidth,
-        int             dstHeight,
-        float           scale
-        )
-{
-    __local float lsmem[BLK_Y+4][BLK_X+4];
-
-    int lix = get_local_id(0);
-    int liy = get_local_id(1);
-
-    int x = (int)get_global_id(0);
-    int y = (int)get_global_id(1);
-
-    INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if( x >= dstWidth || y >=dstHeight )  return;
-
-    float t1 = lsmem[liy][lix];
-    float t2 = lsmem[liy][lix+1];
-    float t3 = lsmem[liy][lix+2];
-    float t4 = lsmem[liy][lix+3];
-    float t5 = lsmem[liy][lix+4];
-
-    float u1 = lsmem[liy+1][lix];
-    float u2 = lsmem[liy+1][lix+1];
-    float u3 = lsmem[liy+1][lix+2];
-    float u4 = lsmem[liy+1][lix+3];
-    float u5 = lsmem[liy+1][lix+4];
-
-    float m1 = lsmem[liy+2][lix];
-    float m2 = lsmem[liy+2][lix+1];
-    float m4 = lsmem[liy+2][lix+3];
-    float m5 = lsmem[liy+2][lix+4];
-
-    float l1 = lsmem[liy+3][lix];
-    float l2 = lsmem[liy+3][lix+1];
-    float l3 = lsmem[liy+3][lix+2];
-    float l4 = lsmem[liy+3][lix+3];
-    float l5 = lsmem[liy+3][lix+4];
-
-    float b1 = lsmem[liy+4][lix];
-    float b2 = lsmem[liy+4][lix+1];
-    float b3 = lsmem[liy+4][lix+2];
-    float b4 = lsmem[liy+4][lix+3];
-    float b5 = lsmem[liy+4][lix+4];
-
-    //calc and store dx and dy;//
-    DSTX(x,y) = scale *
-        mad(12.0f, m4 - m2,
-            mad(6.0f, m5 - m1,
-                mad(8.0f, u4 - u2 + l4 - l2,
-                    mad(4.0f, u5 - u1 + l5 - l1,
-                        mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
-                        )
-                    )
-                )
-            );
-
-    DSTY(x,y) = scale *
-        mad(12.0f, l3 - u3,
-            mad(6.0f, b3 - t3,
-                mad(8.0f, l2 - u2 + l4 - u4,
-                    mad(4.0f, b2 - t2 + b4 - t4,
-                        mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
-                        )
-                    )
-                )
-            );
-}
-
-__kernel void sobel7(
-        __global uchar* Src,
-        const uint      SrcPitch,
-        const int       srcOffsetX,
-        const int       srcOffsetY,
-        __global uchar* DstX,
-        const int       DstXOffset,
-        const uint      DstXPitch,
-        __global uchar* DstY,
-        const int       DstYOffset,
-        const uint      DstYPitch,
-        int             width,
-        int             height,
-        int             dstWidth,
-        int             dstHeight,
-        float           scale
-        )
-{
-    __local float lsmem[BLK_Y+6][BLK_X+6];
-
-    int lix = get_local_id(0);
-    int liy = get_local_id(1);
-
-    int x = (int)get_global_id(0);
-    int y = (int)get_global_id(1);
-
-    INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if( x >= dstWidth || y >=dstHeight )  return;
-
-    float tt1 = lsmem[liy][lix];
-    float tt2 = lsmem[liy][lix+1];
-    float tt3 = lsmem[liy][lix+2];
-    float tt4 = lsmem[liy][lix+3];
-    float tt5 = lsmem[liy][lix+4];
-    float tt6 = lsmem[liy][lix+5];
-    float tt7 = lsmem[liy][lix+6];
-
-    float t1 = lsmem[liy+1][lix];
-    float t2 = lsmem[liy+1][lix+1];
-    float t3 = lsmem[liy+1][lix+2];
-    float t4 = lsmem[liy+1][lix+3];
-    float t5 = lsmem[liy+1][lix+4];
-    float t6 = lsmem[liy+1][lix+5];
-    float t7 = lsmem[liy+1][lix+6];
-
-    float u1 = lsmem[liy+2][lix];
-    float u2 = lsmem[liy+2][lix+1];
-    float u3 = lsmem[liy+2][lix+2];
-    float u4 = lsmem[liy+2][lix+3];
-    float u5 = lsmem[liy+2][lix+4];
-    float u6 = lsmem[liy+2][lix+5];
-    float u7 = lsmem[liy+2][lix+6];
-
-    float m1 = lsmem[liy+3][lix];
-    float m2 = lsmem[liy+3][lix+1];
-    float m3 = lsmem[liy+3][lix+2];
-    float m5 = lsmem[liy+3][lix+4];
-    float m6 = lsmem[liy+3][lix+5];
-    float m7 = lsmem[liy+3][lix+6];
-
-    float l1 = lsmem[liy+4][lix];
-    float l2 = lsmem[liy+4][lix+1];
-    float l3 = lsmem[liy+4][lix+2];
-    float l4 = lsmem[liy+4][lix+3];
-    float l5 = lsmem[liy+4][lix+4];
-    float l6 = lsmem[liy+4][lix+5];
-    float l7 = lsmem[liy+4][lix+6];
-
-    float b1 = lsmem[liy+5][lix];
-    float b2 = lsmem[liy+5][lix+1];
-    float b3 = lsmem[liy+5][lix+2];
-    float b4 = lsmem[liy+5][lix+3];
-    float b5 = lsmem[liy+5][lix+4];
-    float b6 = lsmem[liy+5][lix+5];
-    float b7 = lsmem[liy+5][lix+6];
-
-    float bb1 = lsmem[liy+6][lix];
-    float bb2 = lsmem[liy+6][lix+1];
-    float bb3 = lsmem[liy+6][lix+2];
-    float bb4 = lsmem[liy+6][lix+3];
-    float bb5 = lsmem[liy+6][lix+4];
-    float bb6 = lsmem[liy+6][lix+5];
-    float bb7 = lsmem[liy+6][lix+6];
-
-    //calc and store dx and dy
-    DSTX(x,y) = scale *
-        mad(100.0f, m5 - m3,
-            mad(80.0f, m6 - m2,
-                mad(20.0f, m7 - m1,
-                    mad(75.0f, u5 - u3 + l5 - l3,
-                        mad(60.0f, u6 - u2 + l6 - l2,
-                            mad(15.0f, u7 - u1 + l7 - l1,
-                                mad(30.0f, t5 - t3 + b5 - b3,
-                                    mad(24.0f, t6 - t2 + b6 - b2,
-                                        mad(6.0f, t7 - t1 + b7 - b1,
-                                            mad(5.0f, tt5 - tt3 + bb5 - bb3,
-                                                mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 )
-                                                )
-                                            )
-                                        )
-                                    )
-                                )
-                            )
-                        )
-                    )
-                )
-            );
-
-    DSTY(x,y) = scale *
-        mad(100.0f, l4 - u4,
-            mad(80.0f, b4 - t4,
-                mad(20.0f, bb4 - tt4,
-                    mad(75.0f, l5 - u5 + l3 - u3,
-                        mad(60.0f, b5 - t5 + b3 - t3,
-                            mad(15.0f, bb5 - tt5 + bb3 - tt3,
-                                mad(30.0f, l6 - u6 + l2 - u2,
-                                    mad(24.0f, b6 - t6 + b2 - t2,
-                                        mad(6.0f, bb6 - tt6 + bb2 - tt2,
-                                            mad(5.0f, l7 - u7 + l1 - u1,
-                                                mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 )
-                                                )
-                                            )
-                                        )
-                                    )
-                                )
-                            )
-                        )
-                    )
-                )
-            );
-}
diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl
deleted file mode 100644
index 85631be..0000000
--- a/modules/ocl/src/opencl/imgproc_threshold.cl
+++ /dev/null
@@ -1,136 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Zhang Ying, zhangying913@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#ifdef VECTORIZED
-
-__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
-                        __global T * dst, int dst_offset, int dst_step,
-                        T thresh, T max_val, int max_index, int rows, int cols)
-{
-    int gx = get_global_id(0);
-    int gy = get_global_id(1);
-
-    if (gx < cols && gy < rows)
-    {
-        gx *= VECSIZE;
-        int src_index = mad24(gy, src_step, src_offset + gx);
-        int dst_index = mad24(gy, dst_step, dst_offset + gx);
-
-#ifdef SRC_ALIGNED
-        VT sdata = *((__global VT *)(src + src_index));
-#else
-        VT sdata = VLOADN(0, src + src_index);
-#endif
-        VT vthresh = (VT)(thresh);
-
-#ifdef THRESH_BINARY
-        VT vecValue = sdata > vthresh ? (VT)max_val : (VT)(0);
-#elif defined THRESH_BINARY_INV
-        VT vecValue = sdata > vthresh ? (VT)(0) : (VT)max_val;
-#elif defined THRESH_TRUNC
-        VT vecValue = sdata > vthresh ? (VT)thresh : sdata;
-#elif defined THRESH_TOZERO
-        VT vecValue = sdata > vthresh ? sdata : (VT)(0);
-#elif defined THRESH_TOZERO_INV
-        VT vecValue = sdata > vthresh ? (VT)(0) : sdata;
-#endif
-
-        if (gx + VECSIZE <= max_index)
-#ifdef DST_ALIGNED
-            *(__global VT*)(dst + dst_index) = vecValue;
-#else
-            VSTOREN(vecValue, 0, dst + dst_index);
-#endif
-        else
-        {
-            __attribute__(( aligned(sizeof(VT)) )) T array[VECSIZE];
-            *((VT*)array) = vecValue;
-            #pragma unroll
-            for (int i = 0; i < VECSIZE; ++i)
-                if (gx + i < max_index)
-                    dst[dst_index + i] = array[i];
-        }
-    }
-}
-
-#else
-
-__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
-                        __global T * dst, int dst_offset, int dst_step,
-                        T thresh, T max_val, int rows, int cols)
-{
-    int gx = get_global_id(0);
-    int gy = get_global_id(1);
-
-    if (gx < cols && gy < rows)
-    {
-        int src_index = mad24(gy, src_step, src_offset + gx);
-        int dst_index = mad24(gy, dst_step, dst_offset + gx);
-
-        T sdata = src[src_index];
-
-#ifdef THRESH_BINARY
-        dst[dst_index] = sdata > thresh ? max_val : (T)(0);
-#elif defined THRESH_BINARY_INV
-        dst[dst_index] = sdata > thresh ? (T)(0) : max_val;
-#elif defined THRESH_TRUNC
-        dst[dst_index] = sdata > thresh ? thresh : sdata;
-#elif defined THRESH_TOZERO
-        dst[dst_index] = sdata > thresh ? sdata : (T)(0);
-#elif defined THRESH_TOZERO_INV
-        dst[dst_index] = sdata > thresh ? (T)(0) : sdata;
-#endif
-    }
-}
-
-#endif
diff --git a/modules/ocl/src/opencl/imgproc_warpAffine.cl b/modules/ocl/src/opencl/imgproc_warpAffine.cl
deleted file mode 100644
index 27f99e0..0000000
--- a/modules/ocl/src/opencl/imgproc_warpAffine.cl
+++ /dev/null
@@ -1,761 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Zhang Ying, zhangying913@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-//warpAffine kernel
-//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-typedef double F;
-typedef double4 F4;
-#define convert_F4 convert_double4
-#else
-typedef float F;
-typedef float4 F4;
-#define convert_F4 convert_float4
-#endif
-
-#define INTER_BITS 5
-#define INTER_TAB_SIZE (1 << INTER_BITS)
-#define INTER_SCALE 1.f/INTER_TAB_SIZE
-#define AB_BITS max(10, (int)INTER_BITS)
-#define AB_SCALE (1 << AB_BITS)
-#define INTER_REMAP_COEF_BITS 15
-#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS)
-
-inline void interpolateCubic( float x, float* coeffs )
-{
-    const float A = -0.75f;
-
-    coeffs[0] = ((A*(x + 1.f) - 5.0f*A)*(x + 1.f) + 8.0f*A)*(x + 1.f) - 4.0f*A;
-    coeffs[1] = ((A + 2.f)*x - (A + 3.f))*x*x + 1.f;
-    coeffs[2] = ((A + 2.f)*(1.f - x) - (A + 3.f))*(1.f - x)*(1.f - x) + 1.f;
-    coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
-}
-
-
-/**********************************************8UC1*********************************************
-***********************************************************************************************/
-__kernel void warpAffineNN_C1_D0(__global uchar const * restrict src, __global uchar * dst, int src_cols, int src_rows,
-                                 int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                 int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        dx = (dx<<2) - (dst_offset&3);
-
-        int round_delta = (AB_SCALE>>1);
-
-        int4 X, Y;
-        int4 sx, sy;
-        int4 DX = (int4)(dx, dx+1, dx+2, dx+3);
-        DX = (DX << AB_BITS);
-        F4 M0DX, M3DX;
-        M0DX = M[0] * convert_F4(DX);
-        M3DX = M[3] * convert_F4(DX);
-        X = convert_int4(rint(M0DX));
-        Y = convert_int4(rint(M3DX));
-        int tmp1, tmp2;
-        tmp1 = rint((M[1]*dy + M[2]) * AB_SCALE);
-        tmp2 = rint((M[4]*dy + M[5]) * AB_SCALE);
-
-        X += tmp1 + round_delta;
-        Y += tmp2 + round_delta;
-
-        sx = convert_int4(convert_short4(X >> AB_BITS));
-        sy = convert_int4(convert_short4(Y >> AB_BITS));
-
-        __global uchar4 * d = (__global uchar4 *)(dst+dst_offset+dy*dstStep+dx);
-        uchar4 dval = *d;
-        DX = (int4)(dx, dx+1, dx+2, dx+3);
-        int4 dcon = DX >= 0 && DX < dst_cols && dy >= 0 && dy < dst_rows;
-        int4 scon = sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows;
-        int4 spos = src_offset + sy * srcStep + sx;
-        uchar4 sval;
-        sval.s0 = scon.s0 ? src[spos.s0] : 0;
-        sval.s1 = scon.s1 ? src[spos.s1] : 0;
-        sval.s2 = scon.s2 ? src[spos.s2] : 0;
-        sval.s3 = scon.s3 ? src[spos.s3] : 0;
-        dval = convert_uchar4(dcon) != (uchar4)(0,0,0,0) ? sval : dval;
-        *d = dval;
-    }
-}
-
-__kernel void warpAffineLinear_C1_D0(__global const uchar * restrict src, __global uchar * dst, int src_cols, int src_rows,
-                                     int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                     int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        dx = (dx<<2) - (dst_offset&3);
-
-        int round_delta = ((AB_SCALE >> INTER_BITS) >> 1);
-
-        int4 X, Y;
-        short4  ax, ay;
-        int4 sx, sy;
-        int4 DX = (int4)(dx, dx+1, dx+2, dx+3);
-        DX = (DX << AB_BITS);
-        F4 M0DX, M3DX;
-        M0DX = M[0] * convert_F4(DX);
-        M3DX = M[3] * convert_F4(DX);
-        X = convert_int4(rint(M0DX));
-        Y = convert_int4(rint(M3DX));
-
-        int tmp1, tmp2;
-        tmp1 = rint((M[1]*dy + M[2]) * AB_SCALE);
-        tmp2 = rint((M[4]*dy + M[5]) * AB_SCALE);
-
-        X += tmp1 + round_delta;
-        Y += tmp2 + round_delta;
-
-        X = X >> (AB_BITS - INTER_BITS);
-        Y = Y >> (AB_BITS - INTER_BITS);
-
-        sx = convert_int4(convert_short4(X >> INTER_BITS));
-        sy = convert_int4(convert_short4(Y >> INTER_BITS));
-        ax = convert_short4(X & (INTER_TAB_SIZE-1));
-        ay = convert_short4(Y & (INTER_TAB_SIZE-1));
-
-        uchar4 v0, v1, v2,v3;
-        int4 scon0, scon1, scon2, scon3;
-        int4 spos0, spos1, spos2, spos3;
-
-        scon0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows);
-        scon1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows);
-        scon2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows);
-        scon3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows);
-        spos0 = src_offset + sy * srcStep + sx;
-        spos1 = src_offset + sy * srcStep + sx + 1;
-        spos2 = src_offset + (sy+1) * srcStep + sx;
-        spos3 = src_offset + (sy+1) * srcStep + sx + 1;
-
-        v0.s0 = scon0.s0 ? src[spos0.s0] : 0;
-        v1.s0 = scon1.s0 ? src[spos1.s0] : 0;
-        v2.s0 = scon2.s0 ? src[spos2.s0] : 0;
-        v3.s0 = scon3.s0 ? src[spos3.s0] : 0;
-
-        v0.s1 = scon0.s1 ? src[spos0.s1] : 0;
-        v1.s1 = scon1.s1 ? src[spos1.s1] : 0;
-        v2.s1 = scon2.s1 ? src[spos2.s1] : 0;
-        v3.s1 = scon3.s1 ? src[spos3.s1] : 0;
-
-        v0.s2 = scon0.s2 ? src[spos0.s2] : 0;
-        v1.s2 = scon1.s2 ? src[spos1.s2] : 0;
-        v2.s2 = scon2.s2 ? src[spos2.s2] : 0;
-        v3.s2 = scon3.s2 ? src[spos3.s2] : 0;
-
-        v0.s3 = scon0.s3 ? src[spos0.s3] : 0;
-        v1.s3 = scon1.s3 ? src[spos1.s3] : 0;
-        v2.s3 = scon2.s3 ? src[spos2.s3] : 0;
-        v3.s3 = scon3.s3 ? src[spos3.s3] : 0;
-
-        short4 itab0, itab1, itab2, itab3;
-        float4 taby, tabx;
-        taby = INTER_SCALE * convert_float4(ay);
-        tabx = INTER_SCALE * convert_float4(ax);
-
-        itab0 = convert_short4_sat(( (1.0f-taby)*(1.0f-tabx) * (float4)INTER_REMAP_COEF_SCALE ));
-        itab1 = convert_short4_sat(( (1.0f-taby)*tabx * (float4)INTER_REMAP_COEF_SCALE ));
-        itab2 = convert_short4_sat(( taby*(1.0f-tabx) * (float4)INTER_REMAP_COEF_SCALE ));
-        itab3 = convert_short4_sat(( taby*tabx * (float4)INTER_REMAP_COEF_SCALE ));
-
-
-        int4 val;
-        uchar4 tval;
-        val = convert_int4(v0) * convert_int4(itab0) + convert_int4(v1) * convert_int4(itab1)
-              + convert_int4(v2) * convert_int4(itab2) + convert_int4(v3) * convert_int4(itab3);
-        tval = convert_uchar4_sat ( (val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
-
-        __global uchar4 * d =(__global uchar4 *)(dst+dst_offset+dy*dstStep+dx);
-        uchar4 dval = *d;
-        DX = (int4)(dx, dx+1, dx+2, dx+3);
-        int4 dcon = DX >= 0 && DX < dst_cols && dy >= 0 && dy < dst_rows;
-        dval = convert_uchar4(dcon != 0) ? tval : dval;
-        *d = dval;
-    }
-}
-
-__kernel void warpAffineCubic_C1_D0(__global uchar * src, __global uchar * dst, int src_cols, int src_rows,
-                                    int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                    int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = ((AB_SCALE>>INTER_BITS)>>1);
-
-        int X0 = rint(M[0] * dx * AB_SCALE);
-        int Y0 = rint(M[3] * dx * AB_SCALE);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-        int X = X0 >> (AB_BITS - INTER_BITS);
-        int Y = Y0 >> (AB_BITS - INTER_BITS);
-
-        short sx = (short)(X >> INTER_BITS) - 1;
-        short sy = (short)(Y >> INTER_BITS) - 1;
-        short ay = (short)(Y & (INTER_TAB_SIZE-1));
-        short ax = (short)(X & (INTER_TAB_SIZE-1));
-
-        uchar v[16];
-        int i, j;
-
-#pragma unroll 4
-        for(i=0; i<4;  i++)
-            for(j=0; j<4;  j++)
-            {
-                v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? src[src_offset+(sy+i) * srcStep + (sx+j)] : 0;
-            }
-
-        short itab[16];
-        float tab1y[4], tab1x[4];
-        float axx, ayy;
-
-        ayy = 1.f/INTER_TAB_SIZE * ay;
-        axx = 1.f/INTER_TAB_SIZE * ax;
-        interpolateCubic(ayy, tab1y);
-        interpolateCubic(axx, tab1x);
-        int isum = 0;
-
-#pragma unroll 16
-        for( i=0; i<16; i++ )
-        {
-            F v = tab1y[(i>>2)] * tab1x[(i&3)];
-            isum += itab[i] = convert_short_sat( rint( v * INTER_REMAP_COEF_SCALE ) );
-        }
-
-        if( isum != INTER_REMAP_COEF_SCALE )
-        {
-            int k1, k2;
-            int diff = isum - INTER_REMAP_COEF_SCALE;
-            int Mk1=2, Mk2=2, mk1=2, mk2=2;
-            for( k1 = 2; k1 < 4; k1++ )
-                for( k2 = 2; k2 < 4; k2++ )
-                {
-                    if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] )
-                        mk1 = k1, mk2 = k2;
-                    else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] )
-                        Mk1 = k1, Mk2 = k2;
-                }
-            diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff));
-        }
-
-        if( dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            int sum=0;
-            for ( i =0; i<16; i++ )
-            {
-                sum += v[i] * itab[i] ;
-            }
-            dst[dst_offset+dy*dstStep+dx] = convert_uchar_sat( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
-        }
-    }
-}
-
-/**********************************************8UC4*********************************************
-***********************************************************************************************/
-
-__kernel void warpAffineNN_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, int src_cols, int src_rows,
-                                 int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                 int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = (AB_SCALE >> 1);
-
-        int X0 = rint(M[0] * dx * AB_SCALE);
-        int Y0 = rint(M[3] * dx * AB_SCALE);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-
-        int sx0 = (short)(X0 >> AB_BITS);
-        int sy0 = (short)(Y0 >> AB_BITS);
-
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>2)+dy*(dstStep>>2)+dx]= (sx0>=0 && sx0<src_cols && sy0>=0 && sy0<src_rows) ? src[(src_offset>>2)+sy0*(srcStep>>2)+sx0] : (uchar4)0;
-    }
-}
-
-__kernel void warpAffineLinear_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, int src_cols, int src_rows,
-                                     int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                     int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = AB_SCALE/INTER_TAB_SIZE/2;
-
-        src_offset = (src_offset>>2);
-        srcStep = (srcStep>>2);
-
-        int tmp = (dx << AB_BITS);
-        int X0 = rint(M[0] * tmp);
-        int Y0 = rint(M[3] * tmp);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-        X0 = X0 >> (AB_BITS - INTER_BITS);
-        Y0 = Y0 >> (AB_BITS - INTER_BITS);
-
-        short sx0 = (short)(X0 >> INTER_BITS);
-        short sy0 = (short)(Y0 >> INTER_BITS);
-        short ax0 = (short)(X0 & (INTER_TAB_SIZE-1));
-        short ay0 = (short)(Y0 & (INTER_TAB_SIZE-1));
-
-        int4 v0, v1, v2, v3;
-
-        v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? convert_int4(src[src_offset+sy0 * srcStep + sx0]) : 0;
-        v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? convert_int4(src[src_offset+sy0 * srcStep + sx0+1]) : 0;
-        v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? convert_int4(src[src_offset+(sy0+1) * srcStep + sx0]) : 0;
-        v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? convert_int4(src[src_offset+(sy0+1) * srcStep + sx0+1]) : 0;
-
-        int itab0, itab1, itab2, itab3;
-        float taby, tabx;
-        taby = 1.f/INTER_TAB_SIZE*ay0;
-        tabx = 1.f/INTER_TAB_SIZE*ax0;
-
-        itab0 = convert_short_sat(rint( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ));
-        itab1 = convert_short_sat(rint( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE ));
-        itab2 = convert_short_sat(rint( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ));
-        itab3 = convert_short_sat(rint( taby*tabx * INTER_REMAP_COEF_SCALE ));
-
-        int4 val;
-        val = v0 * itab0 +  v1 * itab1 + v2 * itab2 + v3 * itab3;
-
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>2)+dy*(dstStep>>2)+dx] =  convert_uchar4_sat ( (val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
-    }
-}
-
-__kernel void warpAffineCubic_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, int src_cols, int src_rows,
-                                    int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                    int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = ((AB_SCALE>>INTER_BITS)>>1);
-
-        src_offset = (src_offset>>2);
-        srcStep = (srcStep>>2);
-        dst_offset = (dst_offset>>2);
-        dstStep = (dstStep>>2);
-
-        int tmp = (dx << AB_BITS);
-        int X0 = rint(M[0] * tmp);
-        int Y0 = rint(M[3] * tmp);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-        X0 = X0 >> (AB_BITS - INTER_BITS);
-        Y0 = Y0 >> (AB_BITS - INTER_BITS);
-
-        int sx = (short)(X0 >> INTER_BITS) - 1;
-        int sy = (short)(Y0 >> INTER_BITS) - 1;
-        int ay = (short)(Y0 & (INTER_TAB_SIZE-1));
-        int ax = (short)(X0 & (INTER_TAB_SIZE-1));
-
-        uchar4 v[16];
-        int i,j;
-#pragma unroll 4
-        for(i=0; i<4; i++)
-            for(j=0; j<4; j++)
-            {
-                v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? (src[src_offset+(sy+i) * srcStep + (sx+j)])  : (uchar4)0;
-            }
-        int itab[16];
-        float tab1y[4], tab1x[4];
-        float axx, ayy;
-
-        ayy = INTER_SCALE * ay;
-        axx = INTER_SCALE * ax;
-        interpolateCubic(ayy, tab1y);
-        interpolateCubic(axx, tab1x);
-        int isum = 0;
-
-#pragma unroll 16
-        for( i=0; i<16; i++ )
-        {
-            float tmp;
-            tmp = tab1y[(i>>2)] * tab1x[(i&3)] * INTER_REMAP_COEF_SCALE;
-            itab[i] = rint(tmp);
-            isum += itab[i];
-        }
-
-        if( isum != INTER_REMAP_COEF_SCALE )
-        {
-            int k1, k2;
-            int diff = isum - INTER_REMAP_COEF_SCALE;
-            int Mk1=2, Mk2=2, mk1=2, mk2=2;
-
-            for( k1 = 2; k1 < 4; k1++ )
-                for( k2 = 2; k2 < 4; k2++ )
-                {
-
-                    if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] )
-                        mk1 = k1, mk2 = k2;
-                    else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] )
-                        Mk1 = k1, Mk2 = k2;
-                }
-
-            diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff));
-        }
-
-        if( dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            int4 sum=0;
-            for ( i =0; i<16; i++ )
-            {
-                sum += convert_int4(v[i]) * itab[i];
-            }
-            dst[dst_offset+dy*dstStep+dx] = convert_uchar4_sat( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
-        }
-    }
-}
-
-
-/**********************************************32FC1********************************************
-***********************************************************************************************/
-
-__kernel void warpAffineNN_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
-                                 int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                 int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = AB_SCALE/2;
-
-        int X0 = rint(M[0] * dx * AB_SCALE);
-        int Y0 = rint(M[3] * dx * AB_SCALE);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-
-        short sx0 = (short)(X0 >> AB_BITS);
-        short sy0 = (short)(Y0 >> AB_BITS);
-
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>2)+dy*dstStep+dx]= (sx0>=0 && sx0<src_cols && sy0>=0 && sy0<src_rows) ? src[(src_offset>>2)+sy0*srcStep+sx0] : 0;
-    }
-}
-
-__kernel void warpAffineLinear_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
-                                     int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                     int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = AB_SCALE/INTER_TAB_SIZE/2;
-
-        src_offset = (src_offset>>2);
-
-        int X0 = rint(M[0] * dx * AB_SCALE);
-        int Y0 = rint(M[3] * dx * AB_SCALE);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-        X0 = X0 >> (AB_BITS - INTER_BITS);
-        Y0 = Y0 >> (AB_BITS - INTER_BITS);
-
-        short sx0 = (short)(X0 >> INTER_BITS);
-        short sy0 = (short)(Y0 >> INTER_BITS);
-        short ax0 = (short)(X0 & (INTER_TAB_SIZE-1));
-        short ay0 = (short)(Y0 & (INTER_TAB_SIZE-1));
-
-        float v0, v1, v2, v3;
-
-        v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : 0;
-        v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : 0;
-        v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : 0;
-        v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : 0;
-
-        float tab[4];
-        float taby[2], tabx[2];
-        taby[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay0;
-        taby[1] = 1.f/INTER_TAB_SIZE*ay0;
-        tabx[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax0;
-        tabx[1] = 1.f/INTER_TAB_SIZE*ax0;
-
-        tab[0] = taby[0] * tabx[0];
-        tab[1] = taby[0] * tabx[1];
-        tab[2] = taby[1] * tabx[0];
-        tab[3] = taby[1] * tabx[1];
-
-        float sum = 0;
-        sum += v0 * tab[0] +  v1 * tab[1] +  v2 * tab[2] +  v3 * tab[3];
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>2)+dy*dstStep+dx] = sum;
-    }
-}
-
-__kernel void warpAffineCubic_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
-                                    int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                    int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = AB_SCALE/INTER_TAB_SIZE/2;
-
-        src_offset = (src_offset>>2);
-        dst_offset = (dst_offset>>2);
-
-        int X0 = rint(M[0] * dx * AB_SCALE);
-        int Y0 = rint(M[3] * dx * AB_SCALE);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-        X0 = X0 >> (AB_BITS - INTER_BITS);
-        Y0 = Y0 >> (AB_BITS - INTER_BITS);
-
-        short sx = (short)(X0 >> INTER_BITS) - 1;
-        short sy = (short)(Y0 >> INTER_BITS) - 1;
-        short ay = (short)(Y0 & (INTER_TAB_SIZE-1));
-        short ax = (short)(X0 & (INTER_TAB_SIZE-1));
-
-        float v[16];
-        int i;
-
-        for(i=0; i<16;  i++)
-            v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : 0;
-
-        float tab[16];
-        float tab1y[4], tab1x[4];
-        float axx, ayy;
-
-        ayy = 1.f/INTER_TAB_SIZE * ay;
-        axx = 1.f/INTER_TAB_SIZE * ax;
-        interpolateCubic(ayy, tab1y);
-        interpolateCubic(axx, tab1x);
-
-#pragma unroll 4
-        for( i=0; i<16; i++ )
-        {
-            tab[i] = tab1y[(i>>2)] * tab1x[(i&3)];
-        }
-
-        if( dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            float sum = 0;
-#pragma unroll 4
-            for ( i =0; i<16; i++ )
-            {
-                sum += v[i] * tab[i];
-            }
-            dst[dst_offset+dy*dstStep+dx] = sum;
-
-        }
-    }
-}
-
-
-/**********************************************32FC4********************************************
-***********************************************************************************************/
-
-__kernel void warpAffineNN_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
-                                 int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                 int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = AB_SCALE/2;
-
-        int X0 = rint(M[0] * dx * AB_SCALE);
-        int Y0 = rint(M[3] * dx * AB_SCALE);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-
-        short sx0 = (short)(X0 >> AB_BITS);
-        short sy0 = (short)(Y0 >> AB_BITS);
-
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>4)+dy*(dstStep>>2)+dx]= (sx0>=0 && sx0<src_cols && sy0>=0 && sy0<src_rows) ? src[(src_offset>>4)+sy0*(srcStep>>2)+sx0] : (float4)0;
-    }
-}
-
-__kernel void warpAffineLinear_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
-                                     int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                     int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = AB_SCALE/INTER_TAB_SIZE/2;
-
-        src_offset = (src_offset>>4);
-        dst_offset = (dst_offset>>4);
-        srcStep = (srcStep>>2);
-        dstStep = (dstStep>>2);
-
-        int X0 = rint(M[0] * dx * AB_SCALE);
-        int Y0 = rint(M[3] * dx * AB_SCALE);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-        X0 = X0 >> (AB_BITS - INTER_BITS);
-        Y0 = Y0 >> (AB_BITS - INTER_BITS);
-
-        short sx0 = (short)(X0 >> INTER_BITS);
-        short sy0 = (short)(Y0 >> INTER_BITS);
-        short ax0 = (short)(X0 & (INTER_TAB_SIZE-1));
-        short ay0 = (short)(Y0 & (INTER_TAB_SIZE-1));
-
-        float4 v0, v1, v2, v3;
-
-        v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : (float4)0;
-        v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : (float4)0;
-        v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : (float4)0;
-        v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : (float4)0;
-
-        float tab[4];
-        float taby[2], tabx[2];
-        taby[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay0;
-        taby[1] = 1.f/INTER_TAB_SIZE*ay0;
-        tabx[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax0;
-        tabx[1] = 1.f/INTER_TAB_SIZE*ax0;
-
-        tab[0] = taby[0] * tabx[0];
-        tab[1] = taby[0] * tabx[1];
-        tab[2] = taby[1] * tabx[0];
-        tab[3] = taby[1] * tabx[1];
-
-        float4 sum = 0;
-        sum += v0 * tab[0] +  v1 * tab[1] +  v2 * tab[2] +  v3 * tab[3];
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[dst_offset+dy*dstStep+dx] = sum;
-    }
-}
-
-__kernel void warpAffineCubic_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
-                                    int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                    int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        int round_delta = AB_SCALE/INTER_TAB_SIZE/2;
-
-        src_offset = (src_offset>>4);
-        dst_offset = (dst_offset>>4);
-        srcStep = (srcStep>>2);
-        dstStep = (dstStep>>2);
-
-        int X0 = rint(M[0] * dx * AB_SCALE);
-        int Y0 = rint(M[3] * dx * AB_SCALE);
-        X0 += rint((M[1]*dy + M[2]) * AB_SCALE) + round_delta;
-        Y0 += rint((M[4]*dy + M[5]) * AB_SCALE) + round_delta;
-        X0 = X0 >> (AB_BITS - INTER_BITS);
-        Y0 = Y0 >> (AB_BITS - INTER_BITS);
-
-        short sx = (short)(X0 >> INTER_BITS) - 1;
-        short sy = (short)(Y0 >> INTER_BITS) - 1;
-        short ay = (short)(Y0 & (INTER_TAB_SIZE-1));
-        short ax = (short)(X0 & (INTER_TAB_SIZE-1));
-
-        float4 v[16];
-        int i;
-
-        for(i=0; i<16;  i++)
-            v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : (float4)0;
-
-        float tab[16];
-        float tab1y[4], tab1x[4];
-        float axx, ayy;
-
-        ayy = 1.f/INTER_TAB_SIZE * ay;
-        axx = 1.f/INTER_TAB_SIZE * ax;
-        interpolateCubic(ayy, tab1y);
-        interpolateCubic(axx, tab1x);
-
-#pragma unroll 4
-        for( i=0; i<16; i++ )
-        {
-            tab[i] = tab1y[(i>>2)] * tab1x[(i&3)];
-        }
-
-        if( dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            float4 sum = 0;
-#pragma unroll 4
-            for ( i =0; i<16; i++ )
-            {
-                sum += v[i] * tab[i];
-            }
-            dst[dst_offset+dy*dstStep+dx] = sum;
-
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/imgproc_warpPerspective.cl b/modules/ocl/src/opencl/imgproc_warpPerspective.cl
deleted file mode 100644
index 97f8664..0000000
--- a/modules/ocl/src/opencl/imgproc_warpPerspective.cl
+++ /dev/null
@@ -1,682 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Zhang Ying, zhangying913@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-//wrapPerspective kernel
-//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-typedef double F;
-typedef double4 F4;
-#define convert_F4 convert_double4
-#else
-typedef float F;
-typedef float4 F4;
-#define convert_F4 convert_float4
-#endif
-
-
-#define INTER_BITS 5
-#define INTER_TAB_SIZE (1 << INTER_BITS)
-#define INTER_SCALE 1.f/INTER_TAB_SIZE
-#define AB_BITS max(10, (int)INTER_BITS)
-#define AB_SCALE (1 << AB_BITS)
-#define INTER_REMAP_COEF_BITS 15
-#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS)
-
-inline void interpolateCubic( float x, float* coeffs )
-{
-    const float A = -0.75f;
-
-    coeffs[0] = ((A*(x + 1.f) - 5.0f*A)*(x + 1.f) + 8.0f*A)*(x + 1.f) - 4.0f*A;
-    coeffs[1] = ((A + 2.f)*x - (A + 3.f))*x*x + 1.f;
-    coeffs[2] = ((A + 2.f)*(1.f - x) - (A + 3.f))*(1.f - x)*(1.f - x) + 1.f;
-    coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
-}
-
-
-/**********************************************8UC1*********************************************
-***********************************************************************************************/
-__kernel void warpPerspectiveNN_C1_D0(__global uchar const * restrict src, __global uchar * dst, int src_cols, int src_rows,
-                                      int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                      int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        dx = (dx<<2) - (dst_offset&3);
-
-        F4 DX = (F4)(dx, dx+1, dx+2, dx+3);
-        F4 X0 = M[0]*DX + M[1]*dy + M[2];
-        F4 Y0 = M[3]*DX + M[4]*dy + M[5];
-        F4 W = M[6]*DX + M[7]*dy + M[8],one=1,zero=0;
-        W = (W!=zero) ? one/W : zero;
-        short4 X = convert_short4_sat_rte(X0*W);
-        short4 Y = convert_short4_sat_rte(Y0*W);
-        int4 sx = convert_int4(X);
-        int4 sy = convert_int4(Y);
-
-        int4 DXD = (int4)(dx, dx+1, dx+2, dx+3);
-        __global uchar4 * d = (__global uchar4 *)(dst+dst_offset+dy*dstStep+dx);
-        uchar4 dval = *d;
-        int4 dcon = DXD >= 0 && DXD < dst_cols && dy >= 0 && dy < dst_rows;
-        int4 scon = sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows;
-        int4 spos = src_offset + sy * srcStep + sx;
-        uchar4 sval;
-        sval.s0 = scon.s0 ? src[spos.s0] : 0;
-        sval.s1 = scon.s1 ? src[spos.s1] : 0;
-        sval.s2 = scon.s2 ? src[spos.s2] : 0;
-        sval.s3 = scon.s3 ? src[spos.s3] : 0;
-        dval = convert_uchar4(dcon) != (uchar4)(0,0,0,0) ? sval : dval;
-        *d = dval;
-    }
-}
-
-__kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, __global uchar * dst,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
-        int dstStep, int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
-        int X = rint(X0*W);
-        int Y = rint(Y0*W);
-
-        int sx = convert_short_sat(X >> INTER_BITS);
-        int sy = convert_short_sat(Y >> INTER_BITS);
-        int ay = (short)(Y & (INTER_TAB_SIZE-1));
-        int ax = (short)(X & (INTER_TAB_SIZE-1));
-
-        uchar v[4];
-        int i;
-#pragma unroll 4
-        for(i=0; i<4;  i++)
-            v[i] = (sx+(i&1) >= 0 && sx+(i&1) < src_cols && sy+(i>>1) >= 0 && sy+(i>>1) < src_rows) ? src[src_offset + (sy+(i>>1)) * srcStep + (sx+(i&1))] : (uchar)0;
-
-        short itab[4];
-        float tab1y[2], tab1x[2];
-        tab1y[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay;
-        tab1y[1] = 1.f/INTER_TAB_SIZE*ay;
-        tab1x[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax;
-        tab1x[1] = 1.f/INTER_TAB_SIZE*ax;
-
-#pragma unroll 4
-        for(i=0; i<4;  i++)
-        {
-            float v = tab1y[(i>>1)] * tab1x[(i&1)];
-            itab[i] = convert_short_sat_rte( v * INTER_REMAP_COEF_SCALE );
-        }
-        if(dx >=0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            int sum = 0;
-            for ( i =0; i<4; i++ )
-            {
-                sum += v[i] * itab[i] ;
-            }
-            dst[dst_offset+dy*dstStep+dx] = convert_uchar_sat ( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
-        }
-    }
-}
-
-__kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar * dst, int src_cols, int src_rows,
-        int dst_cols, int dst_rows, int srcStep, int dstStep,
-        int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
-        int X = rint(X0*W);
-        int Y = rint(Y0*W);
-
-        short sx = convert_short_sat(X >> INTER_BITS) - 1;
-        short sy = convert_short_sat(Y >> INTER_BITS) - 1;
-        short ay = (short)(Y & (INTER_TAB_SIZE-1));
-        short ax = (short)(X & (INTER_TAB_SIZE-1));
-
-        uchar v[16];
-        int i, j;
-
-#pragma unroll 4
-        for(i=0; i<4;  i++)
-            for(j=0; j<4;  j++)
-            {
-                v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? src[src_offset+(sy+i) * srcStep + (sx+j)] : (uchar)0;
-            }
-
-        short itab[16];
-        float tab1y[4], tab1x[4];
-        float axx, ayy;
-
-        ayy = 1.f/INTER_TAB_SIZE * ay;
-        axx = 1.f/INTER_TAB_SIZE * ax;
-        interpolateCubic(ayy, tab1y);
-        interpolateCubic(axx, tab1x);
-
-        int isum = 0;
-#pragma unroll 16
-        for( i=0; i<16; i++ )
-        {
-            F v = tab1y[(i>>2)] * tab1x[(i&3)];
-            isum += itab[i] = convert_short_sat( rint( v * INTER_REMAP_COEF_SCALE ) );
-        }
-        if( isum != INTER_REMAP_COEF_SCALE )
-        {
-            int k1, k2;
-            int diff = isum - INTER_REMAP_COEF_SCALE;
-            int Mk1=2, Mk2=2, mk1=2, mk2=2;
-            for( k1 = 2; k1 < 4; k1++ )
-                for( k2 = 2; k2 < 4; k2++ )
-                {
-                    if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] )
-                        mk1 = k1, mk2 = k2;
-                    else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] )
-                        Mk1 = k1, Mk2 = k2;
-                }
-            diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff));
-        }
-
-
-        if( dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            int sum=0;
-            for ( i =0; i<16; i++ )
-            {
-                sum += v[i] * itab[i] ;
-            }
-            dst[dst_offset+dy*dstStep+dx] = convert_uchar_sat( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
-        }
-    }
-}
-
-/**********************************************8UC4*********************************************
-***********************************************************************************************/
-
-__kernel void warpPerspectiveNN_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst,
-                                      int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
-                                      int dstStep, int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? 1.f/W : 0.0f;
-        short sx = convert_short_sat_rte(X0*W);
-        short sy = convert_short_sat_rte(Y0*W);
-
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>2)+dy*(dstStep>>2)+dx]= (sx>=0 && sx<src_cols && sy>=0 && sy<src_rows) ? src[(src_offset>>2)+sy*(srcStep>>2)+sx] : (uchar4)0;
-    }
-}
-
-__kernel void warpPerspectiveLinear_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
-        int dstStep, int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        src_offset = (src_offset>>2);
-        srcStep = (srcStep>>2);
-
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
-        int X = rint(X0*W);
-        int Y = rint(Y0*W);
-
-        short sx = convert_short_sat(X >> INTER_BITS);
-        short sy = convert_short_sat(Y >> INTER_BITS);
-        short ay = (short)(Y & (INTER_TAB_SIZE-1));
-        short ax = (short)(X & (INTER_TAB_SIZE-1));
-
-
-        int4 v0, v1, v2, v3;
-
-        v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx]) : (int4)0;
-        v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx+1]) : (int4)0;
-        v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx]) : (int4)0;
-        v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx+1]) : (int4)0;
-
-        int itab0, itab1, itab2, itab3;
-        float taby, tabx;
-        taby = 1.f/INTER_TAB_SIZE*ay;
-        tabx = 1.f/INTER_TAB_SIZE*ax;
-
-        itab0 = convert_short_sat(rint( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ));
-        itab1 = convert_short_sat(rint( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE ));
-        itab2 = convert_short_sat(rint( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ));
-        itab3 = convert_short_sat(rint( taby*tabx * INTER_REMAP_COEF_SCALE ));
-
-        int4 val;
-        val = v0 * itab0 +  v1 * itab1 + v2 * itab2 + v3 * itab3;
-
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>2)+dy*(dstStep>>2)+dx] =  convert_uchar4_sat ( (val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
-    }
-}
-
-__kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
-        int dstStep, int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        src_offset = (src_offset>>2);
-        srcStep = (srcStep>>2);
-        dst_offset = (dst_offset>>2);
-        dstStep = (dstStep>>2);
-
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
-        int X = rint(X0*W);
-        int Y = rint(Y0*W);
-
-        short sx = convert_short_sat(X >> INTER_BITS) - 1;
-        short sy = convert_short_sat(Y >> INTER_BITS) - 1;
-        short ay = (short)(Y & (INTER_TAB_SIZE-1));
-        short ax = (short)(X & (INTER_TAB_SIZE-1));
-
-        uchar4 v[16];
-        int i,j;
-#pragma unroll 4
-        for(i=0; i<4; i++)
-            for(j=0; j<4; j++)
-            {
-                v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? (src[src_offset+(sy+i) * srcStep + (sx+j)])  : (uchar4)0;
-            }
-        int itab[16];
-        float tab1y[4], tab1x[4];
-        float axx, ayy;
-
-        ayy = INTER_SCALE * ay;
-        axx = INTER_SCALE * ax;
-        interpolateCubic(ayy, tab1y);
-        interpolateCubic(axx, tab1x);
-        int isum = 0;
-
-#pragma unroll 16
-        for( i=0; i<16; i++ )
-        {
-            float tmp;
-            tmp = tab1y[(i>>2)] * tab1x[(i&3)] * INTER_REMAP_COEF_SCALE;
-            itab[i] = rint(tmp);
-            isum += itab[i];
-        }
-
-        if( isum != INTER_REMAP_COEF_SCALE )
-        {
-            int k1, k2;
-            int diff = isum - INTER_REMAP_COEF_SCALE;
-            int Mk1=2, Mk2=2, mk1=2, mk2=2;
-
-            for( k1 = 2; k1 < 4; k1++ )
-                for( k2 = 2; k2 < 4; k2++ )
-                {
-
-                    if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] )
-                        mk1 = k1, mk2 = k2;
-                    else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] )
-                        Mk1 = k1, Mk2 = k2;
-                }
-
-            diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff));
-        }
-
-        if( dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            int4 sum=0;
-            for ( i =0; i<16; i++ )
-            {
-                sum += convert_int4(v[i]) * itab[i];
-            }
-            dst[dst_offset+dy*dstStep+dx] = convert_uchar4_sat( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
-        }
-    }
-}
-
-
-/**********************************************32FC1********************************************
-***********************************************************************************************/
-
-__kernel void warpPerspectiveNN_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
-                                      int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                      int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? 1.f/W : 0.0f;
-        short sx = convert_short_sat_rte(X0*W);
-        short sy = convert_short_sat_rte(Y0*W);
-
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>2)+dy*dstStep+dx]= (sx>=0 && sx<src_cols && sy>=0 && sy<src_rows) ? src[(src_offset>>2)+sy*srcStep+sx] : 0;
-    }
-}
-
-__kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
-        int dst_cols, int dst_rows, int srcStep, int dstStep,
-        int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        src_offset = (src_offset>>2);
-
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
-        int X = rint(X0*W);
-        int Y = rint(Y0*W);
-
-        short sx = convert_short_sat(X >> INTER_BITS);
-        short sy = convert_short_sat(Y >> INTER_BITS);
-        short ay = (short)(Y & (INTER_TAB_SIZE-1));
-        short ax = (short)(X & (INTER_TAB_SIZE-1));
-
-        float v0, v1, v2, v3;
-
-        v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx] : (float)0;
-        v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx+1] : (float)0;
-        v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx] : (float)0;
-        v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx+1] : (float)0;
-
-        float tab[4];
-        float taby[2], tabx[2];
-        taby[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay;
-        taby[1] = 1.f/INTER_TAB_SIZE*ay;
-        tabx[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax;
-        tabx[1] = 1.f/INTER_TAB_SIZE*ax;
-
-        tab[0] = taby[0] * tabx[0];
-        tab[1] = taby[0] * tabx[1];
-        tab[2] = taby[1] * tabx[0];
-        tab[3] = taby[1] * tabx[1];
-
-        float sum = 0;
-        sum += v0 * tab[0] +  v1 * tab[1] +  v2 * tab[2] +  v3 * tab[3];
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>2)+dy*dstStep+dx] = sum;
-    }
-}
-
-__kernel void warpPerspectiveCubic_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
-        int dst_cols, int dst_rows, int srcStep, int dstStep,
-        int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        src_offset = (src_offset>>2);
-        dst_offset = (dst_offset>>2);
-
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
-        int X = rint(X0*W);
-        int Y = rint(Y0*W);
-
-        short sx = convert_short_sat(X >> INTER_BITS) - 1;
-        short sy = convert_short_sat(Y >> INTER_BITS) - 1;
-        short ay = (short)(Y & (INTER_TAB_SIZE-1));
-        short ax = (short)(X & (INTER_TAB_SIZE-1));
-
-        float v[16];
-        int i;
-
-        for(i=0; i<16;  i++)
-            v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : (float)0;
-
-        float tab[16];
-        float tab1y[4], tab1x[4];
-        float axx, ayy;
-
-        ayy = 1.f/INTER_TAB_SIZE * ay;
-        axx = 1.f/INTER_TAB_SIZE * ax;
-        interpolateCubic(ayy, tab1y);
-        interpolateCubic(axx, tab1x);
-
-#pragma unroll 4
-        for( i=0; i<16; i++ )
-        {
-            tab[i] = tab1y[(i>>2)] * tab1x[(i&3)];
-        }
-
-        if( dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            float sum = 0;
-#pragma unroll 4
-            for ( i =0; i<16; i++ )
-            {
-                sum += v[i] * tab[i];
-            }
-            dst[dst_offset+dy*dstStep+dx] = sum;
-
-        }
-    }
-}
-
-
-/**********************************************32FC4********************************************
-***********************************************************************************************/
-
-__kernel void warpPerspectiveNN_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
-                                      int dst_cols, int dst_rows, int srcStep, int dstStep,
-                                      int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W =(W != 0.0f)? 1.f/W : 0.0f;
-        short sx = convert_short_sat_rte(X0*W);
-        short sy = convert_short_sat_rte(Y0*W);
-
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[(dst_offset>>4)+dy*(dstStep>>2)+dx]= (sx>=0 && sx<src_cols && sy>=0 && sy<src_rows) ? src[(src_offset>>4)+sy*(srcStep>>2)+sx] : (float)0;
-    }
-}
-
-__kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
-        int dst_cols, int dst_rows, int srcStep, int dstStep,
-        int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows)
-    {
-        src_offset = (src_offset>>4);
-        dst_offset = (dst_offset>>4);
-        srcStep = (srcStep>>2);
-        dstStep = (dstStep>>2);
-
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
-        int X = rint(X0*W);
-        int Y = rint(Y0*W);
-
-        short sx0 = convert_short_sat(X >> INTER_BITS);
-        short sy0 = convert_short_sat(Y >> INTER_BITS);
-        short ay0 = (short)(Y & (INTER_TAB_SIZE-1));
-        short ax0 = (short)(X & (INTER_TAB_SIZE-1));
-
-
-        float4 v0, v1, v2, v3;
-
-        v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : (float4)0;
-        v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : (float4)0;
-        v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : (float4)0;
-        v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : (float4)0;
-
-        float tab[4];
-        float taby[2], tabx[2];
-        taby[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay0;
-        taby[1] = 1.f/INTER_TAB_SIZE*ay0;
-        tabx[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax0;
-        tabx[1] = 1.f/INTER_TAB_SIZE*ax0;
-
-        tab[0] = taby[0] * tabx[0];
-        tab[1] = taby[0] * tabx[1];
-        tab[2] = taby[1] * tabx[0];
-        tab[3] = taby[1] * tabx[1];
-
-        float4 sum = 0;
-        sum += v0 * tab[0] +  v1 * tab[1] +  v2 * tab[2] +  v3 * tab[3];
-        if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-            dst[dst_offset+dy*dstStep+dx] = sum;
-    }
-}
-
-__kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4 * dst,
-        int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
-        int dstStep, int src_offset, int dst_offset,  __constant F * M, int threadCols )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    if( dx < threadCols && dy < dst_rows )
-    {
-        src_offset = (src_offset>>4);
-        dst_offset = (dst_offset>>4);
-        srcStep = (srcStep>>2);
-        dstStep = (dstStep>>2);
-
-        F X0 = M[0]*dx + M[1]*dy + M[2];
-        F Y0 = M[3]*dx + M[4]*dy + M[5];
-        F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
-        int X = rint(X0*W);
-        int Y = rint(Y0*W);
-
-        short sx = convert_short_sat(X >> INTER_BITS)-1;
-        short sy = convert_short_sat(Y >> INTER_BITS)-1;
-        short ay = (short)(Y & (INTER_TAB_SIZE-1));
-        short ax = (short)(X & (INTER_TAB_SIZE-1));
-
-
-        float4 v[16];
-        int i;
-
-        for(i=0; i<16;  i++)
-            v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : (float4)0;
-
-        float tab[16];
-        float tab1y[4], tab1x[4];
-        float axx, ayy;
-
-        ayy = 1.f/INTER_TAB_SIZE * ay;
-        axx = 1.f/INTER_TAB_SIZE * ax;
-        interpolateCubic(ayy, tab1y);
-        interpolateCubic(axx, tab1x);
-
-#pragma unroll 4
-        for( i=0; i<16; i++ )
-        {
-            tab[i] = tab1y[(i>>2)] * tab1x[(i&3)];
-        }
-
-        if( dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            float4 sum = 0;
-#pragma unroll 4
-            for ( i =0; i<16; i++ )
-            {
-                sum += v[i] * tab[i];
-            }
-            dst[dst_offset+dy*dstStep+dx] = sum;
-
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/interpolate_frames.cl b/modules/ocl/src/opencl/interpolate_frames.cl
deleted file mode 100644
index eb0b55f..0000000
--- a/modules/ocl/src/opencl/interpolate_frames.cl
+++ /dev/null
@@ -1,252 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
-#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
-
-// Image read mode
-__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
-
-// atomic add for 32bit floating point
-inline void atomic_addf(volatile __global float *source, const float operand) {
-    union {
-        unsigned int intVal;
-        float floatVal;
-    } newVal;
-    union {
-        unsigned int intVal;
-        float floatVal;
-    } prevVal;
-    do {
-        prevVal.floatVal = *source;
-        newVal.floatVal = prevVal.floatVal + operand;
-    } while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);
-}
-
-__kernel void memsetKernel(
-    float val,
-    __global float * image,
-    int width,
-    int height,
-    int step, // in element
-    int offset
-    )
-{
-    if(get_global_id(0) >= width || get_global_id(1) >= height)
-    {
-        return;
-    }
-    image += offset;
-    image[get_global_id(0) + get_global_id(1) * step] = val;
-}
-
-__kernel void normalizeKernel(
-    __global float * buffer,
-    int width,
-    int height,
-    int step,
-    int f_offset,
-    int d_offset
-    )
-{
-    __global float * factors = buffer + f_offset;
-    __global float * dst     = buffer + d_offset;
-
-    int j = get_global_id(0);
-    int i = get_global_id(1);
-
-    if(j >= width || i >= height)
-    {
-        return;
-    }
-    float scale = factors[step * i + j];
-    float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale);
-
-    dst[step * i + j] *= invScale;
-}
-
-__kernel void forwardWarpKernel(
-    __global const float * src,
-    __global float * buffer,
-    __global const float * u,
-    __global const float * v,
-    const int w,
-    const int h,
-    const int flow_stride,
-    const int image_stride,
-    const int factor_offset,
-    const int dst_offset,
-    const float time_scale
-    )
-{
-    int j = get_global_id(0);
-    int i = get_global_id(1);
-
-    if (i >= h || j >= w) return;
-
-    volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset;
-    volatile __global float * dst = (volatile __global float *)buffer + dst_offset;
-
-    int flow_row_offset  = i * flow_stride;
-    int image_row_offset = i * image_stride;
-
-    //bottom left corner of a target pixel
-    float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f;
-    float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f;
-    // pixel containing bottom left corner
-    float px;
-    float py;
-    float dx = modf(cx, &px);
-    float dy = modf(cy, &py);
-    // target pixel integer coords
-    int tx;
-    int ty;
-    tx = (int) px;
-    ty = (int) py;
-    float value = src[image_row_offset + j];
-    float weight;
-    // fill pixel containing bottom right corner
-    if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
-    {
-        weight = dx * dy;
-        atomic_addf(dst + ty * image_stride + tx, value * weight);
-        atomic_addf(normalization_factor + ty * image_stride + tx, weight);
-    }
-
-    // fill pixel containing bottom left corner
-    tx -= 1;
-    if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
-    {
-        weight = (1.0f - dx) * dy;
-        atomic_addf(dst + ty * image_stride + tx, value * weight);
-        atomic_addf(normalization_factor + ty * image_stride + tx, weight);
-    }
-
-    // fill pixel containing upper left corner
-    ty -= 1;
-    if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
-    {
-        weight = (1.0f - dx) * (1.0f - dy);
-        atomic_addf(dst + ty * image_stride + tx, value * weight);
-        atomic_addf(normalization_factor + ty * image_stride + tx, weight);
-    }
-
-    // fill pixel containing upper right corner
-    tx += 1;
-    if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
-    {
-        weight = dx * (1.0f - dy);
-        atomic_addf(dst + ty * image_stride + tx, value * weight);
-        atomic_addf(normalization_factor + ty * image_stride + tx, weight);
-    }
-}
-
-// define buffer offsets
-enum
-{
-    O0_OS = 0,
-    O1_OS,
-    U_OS,
-    V_OS,
-    UR_OS,
-    VR_OS
-};
-
-__kernel void blendFramesKernel(
-    image2d_t tex_src0,
-    image2d_t tex_src1,
-    __global float * buffer,
-    __global float * out,
-    int w,
-    int h,
-    int step,
-    float theta
-    )
-{
-    __global float * u  = buffer + h * step * U_OS;
-    __global float * v  = buffer + h * step * V_OS;
-    __global float * ur = buffer + h * step * UR_OS;
-    __global float * vr = buffer + h * step * VR_OS;
-    __global float * o0 = buffer + h * step * O0_OS;
-    __global float * o1 = buffer + h * step * O1_OS;
-
-    int ix = get_global_id(0);
-    int iy = get_global_id(1);
-
-    if(ix >= w || iy >= h) return;
-
-    int pos = ix + step * iy;
-
-    float _u  = u[pos];
-    float _v  = v[pos];
-
-    float _ur = ur[pos];
-    float _vr = vr[pos];
-
-    float x = (float)ix + 0.5f;
-    float y = (float)iy + 0.5f;
-    bool b0 = o0[pos] > 1e-4f;
-    bool b1 = o1[pos] > 1e-4f;
-
-    float2 coord0 = (float2)(x - _u * theta, y - _v * theta);
-    float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta));
-
-    if (b0 && b1)
-    {
-        // pixel is visible on both frames
-        out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) +
-            read_imagef(tex_src1, sampler, coord1).x * theta;
-    }
-    else if (b0)
-    {
-        // visible on the first frame only
-        out[pos] = read_imagef(tex_src0, sampler, coord0).x;
-    }
-    else
-    {
-        // visible on the second frame only
-        out[pos] = read_imagef(tex_src1, sampler, coord1).x;
-    }
-}
diff --git a/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl b/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
deleted file mode 100644
index 7e09f3f..0000000
--- a/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
+++ /dev/null
@@ -1,176 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
-
-#ifndef N   // number of radices
-#define N 4
-#endif
-
-#ifndef K_T
-#define K_T float
-#endif
-
-#ifndef V_T
-#define V_T float
-#endif
-
-#ifndef IS_GT
-#define IS_GT 0
-#endif
-
-
-// from Thrust::b40c, link:
-// https://github.com/thrust/thrust/blob/master/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h
-__inline uint convertKey(uint converted_key)
-{
-#ifdef K_FLT
-    unsigned int mask = (converted_key & 0x80000000) ? 0xffffffff : 0x80000000;
-    converted_key ^= mask;
-#elif defined(K_INT)
-    const uint SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
-    converted_key ^= SIGN_MASK;
-#else
-
-#endif
-    return converted_key;
-}
-
-//FIXME(pengx17):
-// exclusive scan, need to be optimized as this is too naive...
-kernel
-    void naiveScanAddition(
-    __global int * input,
-    __global int * output,
-    int size
-    )
-{
-    if(get_global_id(0) == 0)
-    {
-        output[0] = 0;
-        for(int i = 1; i < size; i ++)
-        {
-            output[i] = output[i - 1] + input[i - 1];
-        }
-    }
-}
-
-// following is ported from
-// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_uint_kernels.cl
-kernel
-    void histogramRadixN (
-    __global K_T* unsortedKeys,
-    __global int * buckets,
-    uint shiftCount
-    )
-{
-    const int RADIX_T     = N;
-    const int RADICES_T   = (1 << RADIX_T);
-    const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T;
-    const int MASK_T      = (1 << RADIX_T) - 1;
-    int localBuckets[16] = {0,0,0,0,0,0,0,0,
-                            0,0,0,0,0,0,0,0};
-    int globalId    = get_global_id(0);
-    int numOfGroups = get_num_groups(0);
-
-    /* Calculate thread-histograms */
-    for(int i = 0; i < NUM_OF_ELEMENTS_PER_WORK_ITEM_T; ++i)
-    {
-        uint value = convertKey(as_uint(unsortedKeys[mad24(globalId, NUM_OF_ELEMENTS_PER_WORK_ITEM_T, i)]));
-        value = (value >> shiftCount) & MASK_T;
-#if IS_GT
-        localBuckets[RADICES_T - value - 1]++;
-#else
-        localBuckets[value]++;
-#endif
-    }
-
-    for(int i = 0; i < NUM_OF_ELEMENTS_PER_WORK_ITEM_T; ++i)
-    {
-        buckets[mad24(i, RADICES_T * numOfGroups, globalId) ] = localBuckets[i];
-    }
-}
-
-kernel
-    void permuteRadixN (
-    __global K_T*  unsortedKeys,
-    __global V_T*  unsortedVals,
-    __global int* scanedBuckets,
-    uint shiftCount,
-    __global K_T*  sortedKeys,
-    __global V_T*  sortedVals
-    )
-{
-    const int RADIX_T     = N;
-    const int RADICES_T   = (1 << RADIX_T);
-    const int MASK_T = (1<<RADIX_T)  -1;
-
-    int globalId  = get_global_id(0);
-    int numOfGroups = get_num_groups(0);
-    const int NUM_OF_ELEMENTS_PER_WORK_GROUP_T = numOfGroups << N;
-    int  localIndex[16];
-
-    /*Load the index to local memory*/
-    for(int i = 0; i < RADICES_T; ++i)
-    {
-#if IS_GT
-        localIndex[i] = scanedBuckets[mad24(RADICES_T - i - 1, NUM_OF_ELEMENTS_PER_WORK_GROUP_T, globalId)];
-#else
-        localIndex[i] = scanedBuckets[mad24(i, NUM_OF_ELEMENTS_PER_WORK_GROUP_T, globalId)];
-#endif
-    }
-    /* Permute elements to appropriate location */
-    for(int i = 0; i < RADICES_T; ++i)
-    {
-        int old_idx = mad24(globalId, RADICES_T, i);
-        K_T  ovalue = unsortedKeys[old_idx];
-        uint value = convertKey(as_uint(ovalue));
-        uint maskedValue = (value >> shiftCount) & MASK_T;
-        uint index = localIndex[maskedValue];
-        sortedKeys[index] = ovalue;
-        sortedVals[index] = unsortedVals[old_idx];
-        localIndex[maskedValue] = index + 1;
-    }
-}
diff --git a/modules/ocl/src/opencl/kernel_sort_by_key.cl b/modules/ocl/src/opencl/kernel_sort_by_key.cl
deleted file mode 100644
index 0e8d581..0000000
--- a/modules/ocl/src/opencl/kernel_sort_by_key.cl
+++ /dev/null
@@ -1,244 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef K_T
-#define K_T float
-#endif
-
-#ifndef V_T
-#define V_T float
-#endif
-
-#ifndef IS_GT
-#define IS_GT false
-#endif
-
-#if IS_GT
-#define my_comp(x,y) ((x) > (y))
-#else
-#define my_comp(x,y) ((x) < (y))
-#endif
-
-/////////////////////// Bitonic sort ////////////////////////////
-// ported from
-// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_by_key_kernels.cl
-__kernel
-    void bitonicSort
-    (
-        __global K_T * keys,
-        __global V_T * vals,
-        int count,
-        int stage,
-        int passOfStage
-    )
-{
-    const int threadId = get_global_id(0);
-    if(threadId >= count / 2)
-    {
-        return;
-    }
-    const int pairDistance = 1 << (stage - passOfStage);
-    const int blockWidth   = 2 * pairDistance;
-
-    int leftId = min( (threadId % pairDistance)
-                   + (threadId / pairDistance) * blockWidth, count );
-
-    int rightId = min( leftId + pairDistance, count );
-
-    int temp;
-
-    const V_T lval = vals[leftId];
-    const V_T rval = vals[rightId];
-
-    const K_T lkey = keys[leftId];
-    const K_T rkey = keys[rightId];
-
-    int sameDirectionBlockWidth = 1 << stage;
-
-    if((threadId/sameDirectionBlockWidth) % 2 == 1)
-    {
-        temp = rightId;
-        rightId = leftId;
-        leftId = temp;
-    }
-
-    const bool compareResult = my_comp(lkey, rkey);
-
-    if(compareResult)
-    {
-        keys[rightId] = rkey;
-        keys[leftId]  = lkey;
-        vals[rightId] = rval;
-        vals[leftId]  = lval;
-    }
-    else
-    {
-        keys[rightId] = lkey;
-        keys[leftId]  = rkey;
-        vals[rightId] = lval;
-        vals[leftId]  = rval;
-    }
-}
-
-/////////////////////// Selection sort ////////////////////////////
-//kernel is ported from Bolt library:
-//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
-__kernel
-    void selectionSortLocal
-    (
-        __global K_T * keys,
-        __global V_T * vals,
-        const int count,
-        __local  K_T * scratch
-    )
-{
-    int          i  = get_local_id(0); // index in workgroup
-    int numOfGroups = get_num_groups(0); // index in workgroup
-    int groupID     = get_group_id(0);
-    int         wg  = get_local_size(0); // workgroup size = block size
-    int n; // number of elements to be processed for this work group
-
-    int offset   = groupID * wg;
-    int same     = 0;
-
-    vals      += offset;
-    keys      += offset;
-    n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
-
-    int clamped_i= min(i, n - 1);
-
-    K_T key1 = keys[clamped_i], key2;
-    V_T val1 = vals[clamped_i];
-    scratch[i] = key1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(i >= n)
-    {
-        return;
-    }
-
-    int pos = 0;
-    for (int j=0;j<n;++j)
-    {
-        key2  = scratch[j];
-        if(my_comp(key2, key1))
-            pos++;//calculate the rank of this element in this work group
-        else
-        {
-            if(my_comp(key1, key2))
-                continue;
-            else
-            {
-                // key1 and key2 are same
-                same++;
-            }
-        }
-    }
-    for (int j=0; j< same; j++)
-    {
-        vals[pos + j] = val1;
-        keys[pos + j] = key1;
-    }
-}
-__kernel
-    void selectionSortFinal
-    (
-        __global K_T * keys,
-        __global V_T * vals,
-        const int count
-    )
-{
-    const int          i  = get_local_id(0); // index in workgroup
-    const int numOfGroups = get_num_groups(0); // index in workgroup
-    const int         wg  = get_local_size(0); // workgroup size = block size
-    int pos = 0, same = 0;
-    const int offset = get_group_id(0) * wg;
-    const int remainder = count - wg*(numOfGroups-1);
-
-    if((offset + i ) >= count)
-        return;
-    V_T val1 = vals[offset + i];
-
-    K_T key1 = keys[offset + i];
-    K_T key2;
-
-    for(int j=0; j<numOfGroups-1; j++ )
-    {
-        for(int k=0; k<wg; k++)
-        {
-            key2 = keys[j*wg + k];
-            if(my_comp(key1, key2))
-                break;
-            else
-            {
-                //Increment only if the value is not the same.
-                if(my_comp(key2, key1))
-                    pos++;
-                else
-                    same++;
-            }
-        }
-    }
-
-    for(int k=0; k<remainder; k++)
-    {
-        key2 = keys[(numOfGroups-1)*wg + k];
-        if(my_comp(key1, key2))
-            break;
-        else
-        {
-            //Don't increment if the value is the same.
-            if(my_comp(key2, key1))
-                pos++;
-            else
-                same++;
-        }
-    }
-    for (int j=0; j< same; j++)
-    {
-        vals[pos + j] = val1;
-        keys[pos + j] = key1;
-    }
-}
diff --git a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
deleted file mode 100644
index c573e3e..0000000
--- a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+++ /dev/null
@@ -1,264 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef K_T
-#define K_T float
-#endif
-
-#ifndef V_T
-#define V_T float
-#endif
-
-#ifndef IS_GT
-#define IS_GT false
-#endif
-
-#if IS_GT
-#define my_comp(x,y) ((x) > (y))
-#else
-#define my_comp(x,y) ((x) < (y))
-#endif
-
-//  This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
-//  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
-//  passed as a functor parameter my_comp
-//  This function returns an index that is the first index whos value would be equal to the searched value
-inline uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
-{
-    //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
-    uint firstIndex = left;
-    uint lastIndex = right;
-
-    //  This loops through [firstIndex, lastIndex)
-    //  Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
-    //  this while loop will be divergent within a wavefront
-    while( firstIndex < lastIndex )
-    {
-        //  midIndex is the average of first and last, rounded down
-        uint midIndex = ( firstIndex + lastIndex ) / 2;
-        K_T midValue = data[ midIndex ];
-
-        //  This branch will create divergent wavefronts
-        if( my_comp( midValue, searchVal ) )
-        {
-            firstIndex = midIndex+1;
-            // printf( "lowerBound: lastIndex[ %i ]=%i\n", get_local_id( 0 ), lastIndex );
-        }
-        else
-        {
-            lastIndex = midIndex;
-            // printf( "lowerBound: firstIndex[ %i ]=%i\n", get_local_id( 0 ), firstIndex );
-        }
-    }
-
-    return firstIndex;
-}
-
-//  This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
-//  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
-//  passed as a functor parameter my_comp
-//  This function returns an index that is the first index whos value would be greater than the searched value
-//  If the search value is not found in the sequence, upperbound returns the same result as lowerbound
-inline uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
-{
-    uint upperBound = lowerBoundBinary( data, left, right, searchVal );
-
-    // printf( "upperBoundBinary: upperBound[ %i, %i ]= %i\n", left, right, upperBound );
-    //  If upperBound == right, then  searchVal was not found in the sequence.  Just return.
-    if( upperBound != right )
-    {
-        //  While the values are equal i.e. !(x < y) && !(y < x) increment the index
-        K_T upperValue = data[ upperBound ];
-        while( !my_comp( upperValue, searchVal ) && !my_comp( searchVal, upperValue) && (upperBound != right) )
-        {
-            upperBound++;
-            upperValue = data[ upperBound ];
-        }
-    }
-
-    return upperBound;
-}
-
-//  This kernel implements merging of blocks of sorted data.  The input to this kernel most likely is
-//  the output of blockInsertionSortTemplate.  It is expected that the source array contains multiple
-//  blocks, each block is independently sorted.  The goal is to write into the output buffer half as
-//  many blocks, of double the size.  The even and odd blocks are stably merged together to form
-//  a new sorted block of twice the size.  The algorithm is out-of-place.
-kernel void merge(
-    global K_T*   iKey_ptr,
-    global V_T*   iValue_ptr,
-    global K_T*   oKey_ptr,
-    global V_T*   oValue_ptr,
-    const uint    srcVecSize,
-    const uint    srcLogicalBlockSize,
-    local K_T*    key_lds,
-    local V_T*    val_lds
-)
-{
-    size_t globalID     = get_global_id( 0 );
-
-    //  Abort threads that are passed the end of the input vector
-    if( globalID >= srcVecSize )
-        return; // on SI this doesn't mess-up barriers
-
-    //  For an element in sequence A, find the lowerbound index for it in sequence B
-    uint srcBlockNum   = globalID / srcLogicalBlockSize;
-    uint srcBlockIndex = globalID % srcLogicalBlockSize;
-
-    // printf( "mergeTemplate: srcBlockNum[%i]=%i\n", srcBlockNum, srcBlockIndex );
-
-    //  Pairs of even-odd blocks will be merged together
-    //  An even block should search for an insertion point in the next odd block,
-    //  and the odd block should look for an insertion point in the corresponding previous even block
-    uint dstLogicalBlockSize = srcLogicalBlockSize<<1;
-    uint leftBlockIndex = globalID & ~((dstLogicalBlockSize) - 1 );
-    leftBlockIndex += (srcBlockNum & 0x1) ? 0 : srcLogicalBlockSize;
-    leftBlockIndex = min( leftBlockIndex, srcVecSize );
-    uint rightBlockIndex = min( leftBlockIndex + srcLogicalBlockSize, srcVecSize );
-
-    // if( localID == 0 )
-    // {
-    // printf( "mergeTemplate: wavefront[ %i ] logicalBlock[ %i ] logicalIndex[ %i ] leftBlockIndex[ %i ] <=> rightBlockIndex[ %i ]\n", groupID, srcBlockNum, srcBlockIndex, leftBlockIndex, rightBlockIndex );
-    // }
-
-    //  For a particular element in the input array, find the lowerbound index for it in the search sequence given by leftBlockIndex & rightBlockIndex
-    // uint insertionIndex = lowerBoundLinear( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ], my_comp ) - leftBlockIndex;
-    uint insertionIndex = 0;
-    if( (srcBlockNum & 0x1) == 0 )
-    {
-        insertionIndex = lowerBoundBinary( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ] ) - leftBlockIndex;
-    }
-    else
-    {
-        insertionIndex = upperBoundBinary( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ] ) - leftBlockIndex;
-    }
-
-    //  The index of an element in the result sequence is the summation of it's indixes in the two input
-    //  sequences
-    uint dstBlockIndex = srcBlockIndex + insertionIndex;
-    uint dstBlockNum = srcBlockNum/2;
-
-    // if( (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex == 395 )
-    // {
-    // printf( "mergeTemplate: (dstBlockNum[ %i ] * dstLogicalBlockSize[ %i ]) + dstBlockIndex[ %i ] = srcBlockIndex[ %i ] + insertionIndex[ %i ]\n", dstBlockNum, dstLogicalBlockSize, dstBlockIndex, srcBlockIndex, insertionIndex );
-    // printf( "mergeTemplate: dstBlockIndex[ %i ] = iKey_ptr[ %i ] ( %i )\n", (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex, globalID, iKey_ptr[ globalID ] );
-    // }
-    oKey_ptr[ (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex ] = iKey_ptr[ globalID ];
-    oValue_ptr[ (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex ] = iValue_ptr[ globalID ];
-    // printf( "mergeTemplate: leftResultIndex[ %i ]=%i + %i\n", leftResultIndex, srcBlockIndex, leftInsertionIndex );
-}
-
-kernel void blockInsertionSort(
-    global K_T*   key_ptr,
-    global V_T*   value_ptr,
-    const uint    vecSize,
-    local K_T*    key_lds,
-    local V_T*    val_lds
-)
-{
-    int gloId    = get_global_id( 0 );
-    int groId    = get_group_id( 0 );
-    int locId    = get_local_id( 0 );
-    int wgSize   = get_local_size( 0 );
-
-    bool in_range = gloId < (int)vecSize;
-    K_T key;
-    V_T val;
-    //  Abort threads that are passed the end of the input vector
-    if (in_range)
-    {
-        //  Make a copy of the entire input array into fast local memory
-        key = key_ptr[ gloId ];
-        val = value_ptr[ gloId ];
-        key_lds[ locId ] = key;
-        val_lds[ locId ] = val;
-    }
-    barrier( CLK_LOCAL_MEM_FENCE );
-    //  Sorts a workgroup using a naive insertion sort
-    //  The sort uses one thread within a workgroup to sort the entire workgroup
-    if( locId == 0 && in_range )
-    {
-        //  The last workgroup may have an irregular size, so we calculate a per-block endIndex
-        //  endIndex is essentially emulating a mod operator with subtraction and multiply
-        int endIndex = vecSize - ( groId * wgSize );
-        endIndex = min( endIndex, wgSize );
-
-        // printf( "Debug: endIndex[%i]=%i\n", groId, endIndex );
-
-        //  Indices are signed because the while loop will generate a -1 index inside of the max function
-        for( int currIndex = 1; currIndex < endIndex; ++currIndex )
-        {
-            key = key_lds[ currIndex ];
-            val = val_lds[ currIndex ];
-            int scanIndex = currIndex;
-            K_T ldsKey = key_lds[scanIndex - 1];
-            while( scanIndex > 0 && my_comp( key, ldsKey ) )
-            {
-                V_T ldsVal = val_lds[scanIndex - 1];
-
-                //  If the keys are being swapped, make sure the values are swapped identicaly
-                key_lds[ scanIndex ] = ldsKey;
-                val_lds[ scanIndex ] = ldsVal;
-
-                scanIndex = scanIndex - 1;
-                ldsKey = key_lds[ max( 0, scanIndex - 1 ) ];  // scanIndex-1 may be -1
-            }
-            key_lds[ scanIndex ] = key;
-            val_lds[ scanIndex ] = val;
-        }
-    }
-    barrier( CLK_LOCAL_MEM_FENCE );
-
-    if(in_range)
-    {
-        key = key_lds[ locId ];
-        key_ptr[ gloId ] = key;
-
-        val = val_lds[ locId ];
-        value_ptr[ gloId ] = val;
-    }
-}
-
-///////////// Radix sort from b40c library /////////////
diff --git a/modules/ocl/src/opencl/kmeans_kernel.cl b/modules/ocl/src/opencl/kmeans_kernel.cl
deleted file mode 100644
index bb0e9c9..0000000
--- a/modules/ocl/src/opencl/kmeans_kernel.cl
+++ /dev/null
@@ -1,107 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Xiaopeng Fu, fuxiaopeng2222@163.com
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-inline float distance_(__global const float * center, __global const float * src, int feature_length)
-{
-    float res = 0;
-    float4 v0, v1, v2;
-    int i = 0;
-
-#ifdef L1_DIST
-    float4 sum = (float4)(0.0f);
-#endif
-
-    for ( ; i <= feature_length - 4; i += 4)
-    {
-        v0 = vload4(0, center + i);
-        v1 = vload4(0, src + i);
-        v2 = v1 - v0;
-#ifdef L1_DIST
-        v0 = fabs(v2);
-        sum += v0;
-#else
-        res += dot(v2, v2);
-#endif
-    }
-
-#ifdef L1_DIST
-    res = sum.x + sum.y + sum.z + sum.w;
-#endif
-
-    for ( ; i < feature_length; ++i)
-    {
-        float t0 = src[i];
-        float t1 = center[i];
-#ifdef L1_DIST
-        res += fabs(t0 - t1);
-#else
-        float t2 = t0 - t1;
-        res += t2 * t2;
-#endif
-    }
-
-    return res;
-}
-
-__kernel void distanceToCenters(__global const float * src, __global const float * centers,
-                                __global float * dists, int feature_length,
-                                int src_step, int centers_step,
-                                int features_count, int centers_count,
-                                int src_offset, int centers_offset)
-{
-    int gid = get_global_id(0);
-
-    if (gid < (features_count * centers_count))
-    {
-        int feature_index = gid / centers_count;
-        int center_index = gid % centers_count;
-
-        int center_idx = mad24(center_index, centers_step, centers_offset);
-        int src_idx = mad24(feature_index, src_step, src_offset);
-
-        dists[gid] = distance_(centers + center_idx, src + src_idx, feature_length);
-    }
-}
diff --git a/modules/ocl/src/opencl/knearest.cl b/modules/ocl/src/opencl/knearest.cl
deleted file mode 100644
index 85e2451..0000000
--- a/modules/ocl/src/opencl/knearest.cl
+++ /dev/null
@@ -1,186 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma, jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define TYPE double
-#else
-#define TYPE float
-#endif
-
-#define CV_SWAP(a,b,t) ((t) = (a), (a) = (b), (b) = (t))
-///////////////////////////////////// find_nearest //////////////////////////////////////
-__kernel void knn_find_nearest(__global float* sample, int sample_row, int sample_col, int sample_step,
-                               int k, __global float* samples_ocl, int sample_ocl_row, int sample_ocl_step,
-                               __global float* _results, int _results_step, int _regression, int K1,
-                               int sample_ocl_col, int nThreads, __local float* nr)
-{
-    int k1 = 0;
-    int k2 = 0;
-
-    bool regression = false;
-
-    if(_regression)
-        regression = true;
-
-    TYPE inv_scale;
-#ifdef DOUBLE_SUPPORT
-    inv_scale = 1.0/K1;
-#else
-    inv_scale = 1.0f/K1;
-#endif
-
-    int y = get_global_id(1);
-    int j, j1;
-    int threadY = (y % nThreads);
-    __local float* dd = nr + nThreads * k;
-    if(y >= sample_row)
-    {
-        return;
-    }
-    for(j = 0; j < sample_ocl_row; j++)
-    {
-        TYPE sum;
-#ifdef DOUBLE_SUPPORT
-        sum = 0.0;
-#else
-        sum = 0.0f;
-#endif
-        float si;
-        int t, ii, ii1;
-        for(t = 0; t < sample_col - 16; t += 16)
-        {
-            float16 t0 = vload16(0, sample + y * sample_step + t) - vload16(0, samples_ocl + j * sample_ocl_step + t);
-            t0 *= t0;
-            sum += t0.s0 + t0.s1 + t0.s2 + t0.s3 + t0.s4 + t0.s5 + t0.s6 + t0.s7 +
-                t0.s8 + t0.s9 + t0.sa + t0.sb + t0.sc + t0.sd + t0.se + t0.sf;
-        }
-
-        for(; t < sample_col; t++)
-        {
-#ifdef DOUBLE_SUPPORT
-            double t0 = sample[y * sample_step + t] - samples_ocl[j * sample_ocl_step + t];
-#else
-            float t0 = sample[y * sample_step + t] - samples_ocl[j * sample_ocl_step + t];
-#endif
-            sum = sum + t0 * t0;
-        }
-
-        si = (float)sum;
-        for(ii = k1 - 1; ii >= 0; ii--)
-        {
-            if(as_int(si) > as_int(dd[ii * nThreads + threadY]))
-                break;
-        }
-        if(ii < k - 1)
-        {
-            for(ii1 = k2 - 1; ii1 > ii; ii1--)
-            {
-                dd[(ii1 + 1) * nThreads + threadY] = dd[ii1 * nThreads + threadY];
-                nr[(ii1 + 1) * nThreads + threadY] = nr[ii1 * nThreads + threadY];
-            }
-
-            dd[(ii + 1) * nThreads + threadY] = si;
-            nr[(ii + 1) * nThreads + threadY] = samples_ocl[sample_col + j * sample_ocl_step];
-        }
-        k1 = (k1 + 1) < k ? (k1 + 1) : k;
-        k2 = k1 < (k - 1) ? k1 : (k - 1);
-    }
-    /*! find_nearest_neighbor done!*/
-    /*! write_results start!*/
-    if (regression)
-    {
-        TYPE s;
-#ifdef DOUBLE_SUPPORT
-        s = 0.0;
-#else
-        s = 0.0f;
-#endif
-        for(j = 0; j < K1; j++)
-            s += nr[j * nThreads + threadY];
-
-        _results[y * _results_step] = (float)(s * inv_scale);
-    }
-    else
-    {
-        int prev_start = 0, best_count = 0, cur_count;
-        float best_val;
-
-        for(j = K1 - 1; j > 0; j--)
-        {
-            bool swap_f1 = false;
-            for(j1 = 0; j1 < j; j1++)
-            {
-                if(nr[j1 * nThreads + threadY] > nr[(j1 + 1) * nThreads + threadY])
-                {
-                    int t;
-                    CV_SWAP(nr[j1 * nThreads + threadY], nr[(j1 + 1) * nThreads + threadY], t);
-                    swap_f1 = true;
-                }
-            }
-            if(!swap_f1)
-                break;
-        }
-
-        best_val = 0;
-        for(j = 1; j <= K1; j++)
-            if(j == K1 || nr[j * nThreads + threadY] != nr[(j - 1) * nThreads + threadY])
-            {
-                cur_count = j - prev_start;
-                if(best_count < cur_count)
-                {
-                    best_count = cur_count;
-                    best_val = nr[(j - 1) * nThreads + threadY];
-                }
-                prev_start = j;
-            }
-            _results[y * _results_step] = best_val;
-    }
-    ///*! write_results done!*/
-}
diff --git a/modules/ocl/src/opencl/match_template.cl b/modules/ocl/src/opencl/match_template.cl
deleted file mode 100644
index 4d46d00..0000000
--- a/modules/ocl/src/opencl/match_template.cl
+++ /dev/null
@@ -1,853 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define TYPE_IMAGE_SQSUM double
-#else
-#define TYPE_IMAGE_SQSUM float
-#endif
-
-#ifndef CN4
-#define CN4 1
-#else
-#define CN4 4
-#endif
-
-//////////////////////////////////////////////////
-// utilities
-#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, (gidx + img_sqsums_offset + ox) * CN4)
-#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
-// normAcc* are accurate normalization routines which make GPU matchTemplate
-// consistent with CPU one
-inline float normAcc(float num, float denum)
-{
-    if(fabs(num) < denum)
-    {
-        return num / denum;
-    }
-    if(fabs(num) < denum * 1.125f)
-    {
-        return num > 0 ? 1 : -1;
-    }
-    return 0;
-}
-
-inline float normAcc_SQDIFF(float num, float denum)
-{
-    if(fabs(num) < denum)
-    {
-        return num / denum;
-    }
-    if(fabs(num) < denum * 1.125f)
-    {
-        return num > 0 ? 1 : -1;
-    }
-    return 1;
-}
-//////////////////////////////////////////////////////////////////////
-// normalize
-
-__kernel
-void normalizeKernel_C1_D0
-(
-    __global const float * img_sqsums,
-    __global float * res,
-    ulong tpl_sqsum,
-    int res_rows,
-    int res_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int img_sqsums_offset,
-    int img_sqsums_step,
-    int res_offset,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-    img_sqsums_step /= sizeof(*img_sqsums);
-    img_sqsums_offset /= sizeof(*img_sqsums);
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        float image_sqsum_ = (float)(
-                                 (img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
-                                 (img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
-        res[res_idx] = normAcc(res[res_idx], sqrt(image_sqsum_ * tpl_sqsum));
-    }
-}
-
-__kernel
-void matchTemplate_Prepared_SQDIFF_C1_D0
-(
-    __global const TYPE_IMAGE_SQSUM * img_sqsums,
-    __global float * res,
-    ulong tpl_sqsum,
-    int res_rows,
-    int res_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int img_sqsums_offset,
-    int img_sqsums_step,
-    int res_offset,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-    img_sqsums_step /= sizeof(*img_sqsums);
-    img_sqsums_offset /= sizeof(*img_sqsums);
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        float image_sqsum_ = (float)(
-                                 (img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
-                                 (img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
-        res[res_idx] = image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum;
-    }
-}
-
-__kernel
-void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
-(
-    __global const float * img_sqsums,
-    __global float * res,
-    ulong tpl_sqsum,
-    int res_rows,
-    int res_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int img_sqsums_offset,
-    int img_sqsums_step,
-    int res_offset,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-    img_sqsums_step /= sizeof(*img_sqsums);
-    img_sqsums_offset /= sizeof(*img_sqsums);
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        float image_sqsum_ = (float)(
-                                 (img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
-                                 (img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
-        res[res_idx] = normAcc_SQDIFF(image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum,
-                                      sqrt(image_sqsum_ * tpl_sqsum));
-    }
-}
-
-//////////////////////////////////////////////////
-// SQDIFF
-__kernel
-void matchTemplate_Naive_SQDIFF_C1_D0
-(
-    __global const uchar * img,
-    __global const uchar * tpl,
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int img_offset,
-    int tpl_offset,
-    int res_offset,
-    int img_step,
-    int tpl_step,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int i,j;
-    int delta;
-    int sum = 0;
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        for(i = 0; i < tpl_rows; i ++)
-        {
-            // get specific rows of img data
-            __global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
-            __global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
-            for(j = 0; j < tpl_cols; j ++)
-            {
-                delta = img_ptr[j] - tpl_ptr[j];
-                sum   = mad24(delta, delta, sum);
-            }
-        }
-        res[res_idx] = sum;
-    }
-}
-
-__kernel
-void matchTemplate_Naive_SQDIFF_C1_D5
-(
-    __global const float * img,
-    __global const float * tpl,
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int img_offset,
-    int tpl_offset,
-    int res_offset,
-    int img_step,
-    int tpl_step,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int i,j;
-    float delta;
-    float sum = 0;
-    img_step   /= sizeof(*img);
-    img_offset /= sizeof(*img);
-    tpl_step   /= sizeof(*tpl);
-    tpl_offset /= sizeof(*tpl);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        for(i = 0; i < tpl_rows; i ++)
-        {
-            // get specific rows of img data
-            __global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
-            __global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
-            for(j = 0; j < tpl_cols; j ++)
-            {
-                delta = img_ptr[j] - tpl_ptr[j];
-                sum   = mad(delta, delta, sum);
-            }
-        }
-        res[res_idx] = sum;
-    }
-}
-
-__kernel
-void matchTemplate_Naive_SQDIFF_C4_D0
-(
-    __global const uchar4 * img,
-    __global const uchar4 * tpl,
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int img_offset,
-    int tpl_offset,
-    int res_offset,
-    int img_step,
-    int tpl_step,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int i,j;
-    int4 delta;
-    int4 sum = (int4)(0, 0, 0, 0);
-    img_step   /= sizeof(*img);
-    img_offset /= sizeof(*img);
-    tpl_step   /= sizeof(*tpl);
-    tpl_offset /= sizeof(*tpl);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        for(i = 0; i < tpl_rows; i ++)
-        {
-            // get specific rows of img data
-            __global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
-            __global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
-            for(j = 0; j < tpl_cols; j ++)
-            {
-                //delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
-                delta.x = img_ptr[j].x - tpl_ptr[j].x;
-                delta.y = img_ptr[j].y - tpl_ptr[j].y;
-                delta.z = img_ptr[j].z - tpl_ptr[j].z;
-                delta.w = img_ptr[j].w - tpl_ptr[j].w;
-                sum   = mad24(delta, delta, sum);
-            }
-        }
-        res[res_idx] = sum.x + sum.y + sum.z + sum.w;
-    }
-}
-
-__kernel
-void matchTemplate_Naive_SQDIFF_C4_D5
-(
-    __global const float4 * img,
-    __global const float4 * tpl,
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int img_offset,
-    int tpl_offset,
-    int res_offset,
-    int img_step,
-    int tpl_step,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int i,j;
-    float4 delta;
-    float4 sum = (float4)(0, 0, 0, 0);
-    img_step   /= sizeof(*img);
-    img_offset /= sizeof(*img);
-    tpl_step   /= sizeof(*tpl);
-    tpl_offset /= sizeof(*tpl);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        for(i = 0; i < tpl_rows; i ++)
-        {
-            // get specific rows of img data
-            __global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
-            __global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
-            for(j = 0; j < tpl_cols; j ++)
-            {
-                //delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
-                delta.x = img_ptr[j].x - tpl_ptr[j].x;
-                delta.y = img_ptr[j].y - tpl_ptr[j].y;
-                delta.z = img_ptr[j].z - tpl_ptr[j].z;
-                delta.w = img_ptr[j].w - tpl_ptr[j].w;
-                sum   = mad(delta, delta, sum);
-            }
-        }
-        res[res_idx] = sum.x + sum.y + sum.z + sum.w;
-    }
-}
-
-//////////////////////////////////////////////////
-// CCORR
-__kernel
-void matchTemplate_Naive_CCORR_C1_D0
-(
-    __global const uchar * img,
-    __global const uchar * tpl,
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int img_offset,
-    int tpl_offset,
-    int res_offset,
-    int img_step,
-    int tpl_step,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int i,j;
-    int sum = 0;
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        for(i = 0; i < tpl_rows; i ++)
-        {
-            // get specific rows of img data
-            __global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
-            __global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
-            for(j = 0; j < tpl_cols; j ++)
-            {
-                sum = mad24(convert_int(img_ptr[j]), convert_int(tpl_ptr[j]), sum);
-            }
-        }
-        res[res_idx] = (float)sum;
-    }
-}
-
-__kernel
-void matchTemplate_Naive_CCORR_C1_D5
-(
-    __global const float * img,
-    __global const float * tpl,
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int img_offset,
-    int tpl_offset,
-    int res_offset,
-    int img_step,
-    int tpl_step,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int i,j;
-    float sum = 0;
-    img_step   /= sizeof(*img);
-    img_offset /= sizeof(*img);
-    tpl_step   /= sizeof(*tpl);
-    tpl_offset /= sizeof(*tpl);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        for(i = 0; i < tpl_rows; i ++)
-        {
-            // get specific rows of img data
-            __global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
-            __global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
-            for(j = 0; j < tpl_cols; j ++)
-            {
-                sum = mad(img_ptr[j], tpl_ptr[j], sum);
-            }
-        }
-        res[res_idx] = sum;
-    }
-}
-
-__kernel
-void matchTemplate_Naive_CCORR_C4_D0
-(
-    __global const uchar4 * img,
-    __global const uchar4 * tpl,
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int img_offset,
-    int tpl_offset,
-    int res_offset,
-    int img_step,
-    int tpl_step,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int i,j;
-    int4 sum = (int4)(0, 0, 0, 0);
-    img_step   /= sizeof(*img);
-    img_offset /= sizeof(*img);
-    tpl_step   /= sizeof(*tpl);
-    tpl_offset /= sizeof(*tpl);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        for(i = 0; i < tpl_rows; i ++)
-        {
-            // get specific rows of img data
-            __global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
-            __global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
-            for(j = 0; j < tpl_cols; j ++)
-            {
-                sum   = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum);
-            }
-        }
-        res[res_idx] = (float)(sum.x + sum.y + sum.z + sum.w);
-    }
-}
-
-__kernel
-void matchTemplate_Naive_CCORR_C4_D5
-(
-    __global const float4 * img,
-    __global const float4 * tpl,
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int img_offset,
-    int tpl_offset,
-    int res_offset,
-    int img_step,
-    int tpl_step,
-    int res_step
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    int i,j;
-    float4 sum = (float4)(0, 0, 0, 0);
-    img_step   /= sizeof(*img);
-    img_offset /= sizeof(*img);
-    tpl_step   /= sizeof(*tpl);
-    tpl_offset /= sizeof(*tpl);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        for(i = 0; i < tpl_rows; i ++)
-        {
-            // get specific rows of img data
-            __global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
-            __global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
-            for(j = 0; j < tpl_cols; j ++)
-            {
-                sum = mad(convert_float4(img_ptr[j]), convert_float4(tpl_ptr[j]), sum);
-            }
-        }
-        res[res_idx] = sum.x + sum.y + sum.z + sum.w;
-    }
-}
-
-//////////////////////////////////////////////////
-// CCOFF
-__kernel
-void matchTemplate_Prepared_CCOFF_C1_D0
-(
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int res_offset,
-    int res_step,
-    __global const uint * img_sums,
-    int img_sums_offset,
-    int img_sums_step,
-    float tpl_sum
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    img_sums_offset   /= sizeof(*img_sums);
-    img_sums_step     /= sizeof(*img_sums);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        float sum = (float)((img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
-                            -(img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
-        res[res_idx] -= sum * tpl_sum;
-    }
-}
-__kernel
-void matchTemplate_Prepared_CCOFF_C4_D0
-(
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int res_offset,
-    int res_step,
-    __global const uint * img_sums_c0,
-    __global const uint * img_sums_c1,
-    __global const uint * img_sums_c2,
-    __global const uint * img_sums_c3,
-    int img_sums_offset,
-    int img_sums_step,
-    float tpl_sum_c0,
-    float tpl_sum_c1,
-    float tpl_sum_c2,
-    float tpl_sum_c3
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    img_sums_offset   /= sizeof(*img_sums_c0);
-    img_sums_step     /= sizeof(*img_sums_c0);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        float ccorr = res[res_idx];
-        ccorr -= tpl_sum_c0*(float)(
-                     (img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
-                     - (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
-        ccorr -= tpl_sum_c1*(float)(
-                     (img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
-                     - (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
-        ccorr -= tpl_sum_c2*(float)(
-                     (img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
-                     - (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
-        ccorr -= tpl_sum_c3*(float)(
-                     (img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
-                     - (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
-        res[res_idx] = ccorr;
-    }
-}
-
-__kernel
-void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
-(
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int res_offset,
-    int res_step,
-    float weight,
-    __global const uint * img_sums,
-    int img_sums_offset,
-    int img_sums_step,
-    __global const float * img_sqsums,
-    int img_sqsums_offset,
-    int img_sqsums_step,
-    float tpl_sum,
-    float tpl_sqsum
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    img_sqsums_step   /= sizeof(*img_sqsums);
-    img_sqsums_offset /= sizeof(*img_sqsums);
-    img_sums_offset   /= sizeof(*img_sums);
-    img_sums_step     /= sizeof(*img_sums);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        float image_sum_ =  (float)(
-                                (img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
-                                - (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
-
-        float image_sqsum_ = (float)(
-                                 (img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
-                                 (img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
-        res[res_idx] = normAcc(res[res_idx] - image_sum_ * tpl_sum,
-                               sqrt(tpl_sqsum * (image_sqsum_ - weight * image_sum_ * image_sum_)));
-    }
-}
-__kernel
-void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
-(
-    __global float * res,
-    int img_rows,
-    int img_cols,
-    int tpl_rows,
-    int tpl_cols,
-    int res_rows,
-    int res_cols,
-    int res_offset,
-    int res_step,
-    float weight,
-    __global const uint * img_sums_c0,
-    __global const uint * img_sums_c1,
-    __global const uint * img_sums_c2,
-    __global const uint * img_sums_c3,
-    int img_sums_offset,
-    int img_sums_step,
-    __global const float * img_sqsums_c0,
-    __global const float * img_sqsums_c1,
-    __global const float * img_sqsums_c2,
-    __global const float * img_sqsums_c3,
-    int img_sqsums_offset,
-    int img_sqsums_step,
-    float tpl_sum_c0,
-    float tpl_sum_c1,
-    float tpl_sum_c2,
-    float tpl_sum_c3,
-    float tpl_sqsum
-)
-{
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-
-    img_sqsums_step   /= sizeof(*img_sqsums_c0);
-    img_sqsums_offset /= sizeof(*img_sqsums_c0);
-    img_sums_offset   /= sizeof(*img_sums_c0);
-    img_sums_step     /= sizeof(*img_sums_c0);
-    res_step   /= sizeof(*res);
-    res_offset /= sizeof(*res);
-
-    int res_idx = mad24(gidy, res_step, res_offset + gidx);
-
-    if(gidx < res_cols && gidy < res_rows)
-    {
-        float image_sum_c0 =  (float)(
-                                  (img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
-                                  - (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
-        float image_sum_c1 =  (float)(
-                                  (img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
-                                  - (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
-        float image_sum_c2 =  (float)(
-                                  (img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
-                                  - (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
-        float image_sum_c3 =  (float)(
-                                  (img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
-                                  - (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
-
-        float image_sqsum_c0 = (float)(
-                                   (img_sqsums_c0[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(tpl_cols, 0)]) -
-                                   (img_sqsums_c0[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(0, 0)]));
-        float image_sqsum_c1 = (float)(
-                                   (img_sqsums_c1[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(tpl_cols, 0)]) -
-                                   (img_sqsums_c1[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(0, 0)]));
-        float image_sqsum_c2 = (float)(
-                                   (img_sqsums_c2[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(tpl_cols, 0)]) -
-                                   (img_sqsums_c2[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(0, 0)]));
-        float image_sqsum_c3 = (float)(
-                                   (img_sqsums_c3[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(tpl_cols, 0)]) -
-                                   (img_sqsums_c3[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(0, 0)]));
-
-        float num = res[res_idx] -
-                    image_sum_c0 * tpl_sum_c0 -
-                    image_sum_c1 * tpl_sum_c1 -
-                    image_sum_c2 * tpl_sum_c2 -
-                    image_sum_c3 * tpl_sum_c3;
-        float denum = sqrt( tpl_sqsum * (
-                                image_sqsum_c0 - weight * image_sum_c0 * image_sum_c0 +
-                                image_sqsum_c1 - weight * image_sum_c1 * image_sum_c1 +
-                                image_sqsum_c2 - weight * image_sum_c2 * image_sum_c2 +
-                                image_sqsum_c3 - weight * image_sum_c0 * image_sum_c3)
-                          );
-        res[res_idx] = normAcc(num, denum);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// extractFirstChannel
-__kernel
-void extractFirstChannel
-(
-    const __global float4* img,
-    __global float* res,
-    int rows,
-    int cols,
-    int img_offset,
-    int res_offset,
-    int img_step,
-    int res_step
-)
-{
-    img_step   /= sizeof(float4);
-    res_step   /= sizeof(float);
-    img_offset /= sizeof(float4);
-    res_offset /= sizeof(float);
-    img += img_offset;
-    res += res_offset;
-    int gidx = get_global_id(0);
-    int gidy = get_global_id(1);
-    if(gidx < cols && gidy < rows)
-    {
-        res[gidx + gidy * res_step] = img[gidx + gidy * img_step].x;
-    }
-}
diff --git a/modules/ocl/src/opencl/meanShift.cl b/modules/ocl/src/opencl/meanShift.cl
deleted file mode 100644
index 3fff473..0000000
--- a/modules/ocl/src/opencl/meanShift.cl
+++ /dev/null
@@ -1,241 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//    Xu Pang, pangxu010@163.com
-//    Wenju He, wenju@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-inline short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
-               __global uchar4* in, int in_step, int dst_off, int src_off,
-               int cols, int rows, int sp, int sr, int maxIter, float eps)
-{
-    int isr2 = sr*sr;
-    in_step = in_step >> 2;
-    out_step = out_step >> 2;
-    src_off = src_off >> 2;
-    dst_off = dst_off >> 2;
-    int idx = src_off + y0 * in_step + x0;
-    uchar4 c = in[idx];
-    int base = dst_off + get_global_id(1)*out_step + get_global_id(0) ;
-
-    // iterate meanshift procedure
-    for( int iter = 0; iter < maxIter; iter++ )
-    {
-        int count = 0;
-        int4 s = (int4)0;
-        int sx = 0, sy = 0;
-
-        //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
-        //deal with the image boundary
-        int minx = (x0-sp)>0 ? x0-sp : 0;
-        int miny = (y0-sp)>0 ? y0-sp : 0;
-        int maxx = (x0+sp)<cols ? x0+sp : cols-1;
-        int maxy = (y0+sp)<rows ? y0+sp : rows-1;
-
-        for( int y = miny; y <= maxy; y++)
-        {
-            int rowCount = 0;
-            int x = minx;
-            for( ; x+3 <= maxx; x+=4 )
-            {
-                int id = src_off + y*in_step + x;
-                uchar16 t = (uchar16)(in[id],in[id+1],in[id+2],in[id+3]);
-                int norm2_1 = (t.s0 - c.x) * (t.s0 - c.x) + (t.s1 - c.y) * (t.s1 - c.y) +
-                              (t.s2 - c.z) * (t.s2 - c.z);
-                int norm2_2 = (t.s4 - c.x) * (t.s4 - c.x) + (t.s5 - c.y) * (t.s5 - c.y) +
-                              (t.s6 - c.z) * (t.s6 - c.z);
-                int norm2_3 = (t.s8 - c.x) * (t.s8 - c.x) + (t.s9 - c.y) * (t.s9 - c.y) +
-                              (t.sa - c.z) * (t.sa - c.z);
-                int norm2_4 = (t.sc - c.x) * (t.sc - c.x) + (t.sd - c.y) * (t.sd - c.y) +
-                              (t.se - c.z) * (t.se - c.z);
-                if( norm2_1 <= isr2 )
-                {
-                    s.x += t.s0; s.y += t.s1; s.z += t.s2;
-                    sx += x; rowCount++;
-                }
-                if( norm2_2 <= isr2 )
-                {
-                    s.x += t.s4; s.y += t.s5; s.z += t.s6;
-                    sx += x+1; rowCount++;
-                }
-                if( norm2_3 <= isr2 )
-                {
-                    s.x += t.s8; s.y += t.s9; s.z += t.sa;
-                    sx += x+2; rowCount++;
-                }
-                if( norm2_4 <= isr2 )
-                {
-                    s.x += t.sc; s.y += t.sd; s.z += t.se;
-                    sx += x+3; rowCount++;
-                }
-            }
-            if(x == maxx)
-            {
-                int id = src_off + y*in_step + x;
-                uchar4 t = in[id];
-                int norm2 = (t.s0 - c.x) * (t.s0 - c.x) + (t.s1 - c.y) * (t.s1 - c.y) +
-                            (t.s2 - c.z) * (t.s2 - c.z);
-                if( norm2 <= isr2 )
-                {
-                    s.x += t.s0; s.y += t.s1; s.z += t.s2;
-                    sx += x; rowCount++;
-                }
-
-            }
-            if(x+1 == maxx)
-            {
-                  int id = src_off + y*in_step + x;
-                  uchar8 t = (uchar8)(in[id],in[id+1]);
-                  int norm2_1 = (t.s0 - c.x) * (t.s0 - c.x) + (t.s1 - c.y) * (t.s1 - c.y) +
-                                (t.s2 - c.z) * (t.s2 - c.z);
-                  int norm2_2 = (t.s4 - c.x) * (t.s4 - c.x) + (t.s5 - c.y) * (t.s5 - c.y) +
-                                (t.s6 - c.z) * (t.s6 - c.z);
-                  if( norm2_1 <= isr2 )
-                  {
-                      s.x += t.s0; s.y += t.s1; s.z += t.s2;
-                      sx += x; rowCount++;
-                  }
-                  if( norm2_2 <= isr2 )
-                  {
-                      s.x += t.s4; s.y += t.s5; s.z += t.s6;
-                      sx += x+1; rowCount++;
-                  }
-            }
-            if(x+2 == maxx)
-            {
-                  int id = src_off + y*in_step + x;
-                  uchar16 t = (uchar16)(in[id],in[id+1],in[id+2],in[id+3]);
-                  int norm2_1 = (t.s0 - c.x) * (t.s0 - c.x) + (t.s1 - c.y) * (t.s1 - c.y) +
-                                (t.s2 - c.z) * (t.s2 - c.z);
-                  int norm2_2 = (t.s4 - c.x) * (t.s4 - c.x) + (t.s5 - c.y) * (t.s5 - c.y) +
-                                (t.s6 - c.z) * (t.s6 - c.z);
-                  int norm2_3 = (t.s8 - c.x) * (t.s8 - c.x) + (t.s9 - c.y) * (t.s9 - c.y) +
-                                (t.sa - c.z) * (t.sa - c.z);
-                  if( norm2_1 <= isr2 )
-                  {
-                      s.x += t.s0; s.y += t.s1; s.z += t.s2;
-                      sx += x; rowCount++;
-                  }
-                  if( norm2_2 <= isr2 )
-                  {
-                      s.x += t.s4; s.y += t.s5; s.z += t.s6;
-                      sx += x+1; rowCount++;
-                  }
-                  if( norm2_3 <= isr2 )
-                  {
-                      s.x += t.s8; s.y += t.s9; s.z += t.sa;
-                      sx += x+2; rowCount++;
-                  }
-            }
-            if(rowCount == 0)
-               continue;
-            count += rowCount;
-            if(y == 0)
-               continue;
-            sy += y*rowCount;
-        }
-
-        if( count == 0 )
-            break;
-
-        int x1 = sx/count;
-        int y1 = sy/count;
-        s.x = s.x/count;
-        s.y = s.y/count;
-        s.z = s.z/count;
-
-        int4 tmp = s - convert_int4(c);
-        int norm2 = tmp.x * tmp.x + tmp.y *  tmp.y +
-                    tmp.z * tmp.z;
-
-        bool stopFlag = (x1 == x0 && y1 == y0) || (abs(x1-x0) + abs(y1-y0) + norm2 <= eps);
-
-        x0 = x1;
-        y0 = y1;
-        c.x = s.x;
-        c.y = s.y;
-        c.z = s.z;
-
-        if( stopFlag )
-            break;
-    }
-
-    out[base] = c;
-
-    return (short2)((short)x0, (short)y0);
-}
-
-
-__kernel void meanshift_kernel(__global uchar4* out, int out_step,
-                               __global uchar4* in, int in_step,
-                        int dst_off, int src_off, int cols, int rows,
-                        int sp, int sr, int maxIter, float eps)
-{
-    int x0 = get_global_id(0);
-    int y0 = get_global_id(1);
-    if( x0 < cols && y0 < rows )
-        do_mean_shift(x0, y0, out, out_step, in, in_step, dst_off, src_off,
-                          cols, rows, sp, sr, maxIter, eps);
-}
-
-__kernel void meanshiftproc_kernel( __global uchar4* in, __global uchar4* outr,
-                             __global short2* outsp, int instep, int outrstep,
-                             int outspstep, int in_off, int outr_off, int outsp_off,
-                             int cols, int rows, int sp, int sr, int maxIter, float eps )
-{
-    int x0 = get_global_id(0);
-    int y0 = get_global_id(1);
-
-    if( x0 < cols && y0 < rows )
-    {
-        //int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
-        //*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
-        // we have ensured before that ((outspstep & 0x11)==0).
-        outsp_off >>= 2;
-        outspstep >>= 2;
-        int basesp = outsp_off + y0 * outspstep + x0;
-        outsp[basesp] = do_mean_shift(x0, y0, outr, outrstep, in, instep, outr_off, in_off, cols, rows, sp, sr, maxIter, eps);
-//        outsp[basesp] =(short2)((short)x0,(short)y0);
-    }
-}
diff --git a/modules/ocl/src/opencl/merge_mat.cl b/modules/ocl/src/opencl/merge_mat.cl
deleted file mode 100644
index aea05ae..0000000
--- a/modules/ocl/src/opencl/merge_mat.cl
+++ /dev/null
@@ -1,1378 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-///////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////optimized code using vector roi//////////////////////////
-////////////vector fuction name format: merge_vector_C(channels number)D_(data type depth)//////
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void merge_vector_C2_D0(__global uchar *mat_dst,  int dst_step,  int dst_offset,
-                                 __global uchar *mat_src0, int src0_step, int src0_offset,
-                                 __global uchar *mat_src1, int src1_step, int src1_offset,
-                                 int rows, int cols, int dst_step1)
-
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        x = x << 1;
-
-        #define dst_align  ((dst_offset & 3) >> 1)
-        int src0_index = mad24(y, src0_step, src0_offset + x - dst_align);
-        int src1_index = mad24(y, src1_step, src1_offset + x - dst_align);
-
-        int dst_start  = mad24(y, dst_step, dst_offset);
-        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index  = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
-        __global uchar4 * dst  = (__global uchar4 *)(mat_dst + dst_index);
-        __global uchar  * src0 = mat_src0 + src0_index;
-        __global uchar  * src1 = src0     + 1;
-        __global uchar  * src2 = mat_src1 + src1_index;
-        __global uchar  * src3 = src2     + 1;
-
-        uchar4 dst_data = *dst;
-        uchar  data_0   = *(src0);
-        uchar  data_1   = *(src1);
-        uchar  data_2   = *(src2);
-        uchar  data_3   = *(src3);
-
-        uchar4 tmp_data = (uchar4)(data_0, data_2, data_1, data_3);
-
-        tmp_data.xy = dst_index + 0 >= dst_start ? tmp_data.xy : dst_data.xy;
-        tmp_data.zw = dst_index + 2 <  dst_end   ? tmp_data.zw : dst_data.zw;
-
-        *dst = tmp_data;
-    }
-}
-__kernel void merge_vector_C2_D1(__global char *mat_dst,  int dst_step,  int dst_offset,
-                                 __global char *mat_src0, int src0_step, int src0_offset,
-                                 __global char *mat_src1, int src1_step, int src1_offset,
-                                 int rows, int cols, int dst_step1)
-
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        x = x << 1;
-
-        #define dst_align  ((dst_offset & 3) >> 1)
-        int src0_index = mad24(y, src0_step, src0_offset + x - dst_align);
-        int src1_index = mad24(y, src1_step, src1_offset + x - dst_align);
-
-        int dst_start  = mad24(y, dst_step, dst_offset);
-        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index  = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
-        __global char4 * dst  = (__global char4 *)(mat_dst + dst_index);
-        __global char  * src0 = mat_src0 + src0_index;
-        __global char  * src1 = src0     + 1;
-        __global char  * src2 = mat_src1 + src1_index;
-        __global char  * src3 = src2     + 1;
-
-        char4 dst_data = *dst;
-        char  data_0   = *(src0);
-        char  data_1   = *(src1);
-        char  data_2   = *(src2);
-        char  data_3   = *(src3);
-
-        char4 tmp_data = (char4)(data_0, data_2, data_1, data_3);
-
-        tmp_data.xy = dst_index + 0 >= dst_start ? tmp_data.xy : dst_data.xy;
-        tmp_data.zw = dst_index + 2 <  dst_end   ? tmp_data.zw : dst_data.zw;
-
-        *dst = tmp_data;
-    }
-}
-__kernel void merge_vector_C2_D2(__global ushort *mat_dst,  int dst_step,  int dst_offset,
-                                 __global ushort *mat_src0, int src0_step, int src0_offset,
-                                 __global ushort *mat_src1, int src1_step, int src1_offset,
-                                 int rows, int cols, int dst_step1)
-
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        __global ushort*  src0 = (__global ushort * )((__global uchar *)mat_src0 + src0_index + (x << 1));
-        __global ushort*  src1 = (__global ushort * )((__global uchar *)mat_src1 + src1_index + (x << 1));
-        __global ushort2* dist = (__global ushort2 *)((__global uchar *)mat_dst  + dst_index  + (x << 2));
-
-        ushort  src0_data = *src0;
-        ushort  src1_data = *src1;
-
-        *dist = (ushort2)(src0_data, src1_data);
-
-    }
-}
-__kernel void merge_vector_C2_D3(__global short *mat_dst,  int dst_step,  int dst_offset,
-                                 __global short *mat_src0, int src0_step, int src0_offset,
-                                 __global short *mat_src1, int src1_step, int src1_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        __global short*  src0 = (__global short * )((__global uchar *)mat_src0 + src0_index + (x << 1));
-        __global short*  src1 = (__global short * )((__global uchar *)mat_src1 + src1_index + (x << 1));
-        __global short2* dist = (__global short2 *)((__global uchar *)mat_dst  + dst_index   + (x << 2));
-
-        short  src0_data = *src0;
-        short  src1_data = *src1;
-
-        *dist = (short2)(src0_data, src1_data);
-    }
-}
-
-__kernel void merge_vector_C2_D4(__global int *mat_dst,  int dst_step,  int dst_offset,
-                                 __global int *mat_src0, int src0_step, int src0_offset,
-                                 __global int *mat_src1, int src1_step, int src1_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        int src0 = *((__global int *)((__global uchar *)mat_src0 + src0_index + (x << 2)));
-        int src1 = *((__global int *)((__global uchar *)mat_src1 + src1_index + (x << 2)));
-
-        *((__global int2 *)((__global uchar *)mat_dst  + dst_index + (x << 3))) = (int2)(src0, src1);
-    }
-}
-__kernel void merge_vector_C2_D5(__global float *mat_dst,  int dst_step,  int dst_offset,
-                                 __global float *mat_src0, int src0_step, int src0_offset,
-                                 __global float *mat_src1, int src1_step, int src1_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        float src0 = *((__global float *)((__global uchar *)mat_src0 + src0_index + (x << 2)));
-        float src1 = *((__global float *)((__global uchar *)mat_src1 + src1_index + (x << 2)));
-
-        *((__global float2 *)((__global uchar *)mat_dst  + dst_index + (x << 3))) = (float2)(src0, src1);
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void merge_vector_C2_D6(__global double *mat_dst,  int dst_step,  int dst_offset,
-                                 __global double *mat_src0, int src0_step, int src0_offset,
-                                 __global double *mat_src1, int src1_step, int src1_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        double src0 = *((__global double *)((__global uchar *)mat_src0 + src0_index + (x << 3)));
-        double src1 = *((__global double *)((__global uchar *)mat_src1 + src1_index + (x << 3)));
-
-        *((__global double2 *)((__global uchar *)mat_dst  + dst_index + (x << 4))) = (double2)(src0, src1);
-    }
-}
-#endif
-
-__kernel void merge_vector_C3_D0(__global uchar *mat_dst,  int dst_step,  int dst_offset,
-                                 __global uchar *mat_src0, int src0_step, int src0_offset,
-                                 __global uchar *mat_src1, int src1_step, int src1_offset,
-                                 __global uchar *mat_src2, int src2_step, int src2_offset, int offset_cols,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        x = x << 2;
-
-        int src0_index = mad24(y, src0_step, x + src0_offset - offset_cols);
-        int src1_index = mad24(y, src1_step, x + src1_offset - offset_cols);
-        int src2_index = mad24(y, src2_step, x + src2_offset - offset_cols);
-
-        int dst_start = mad24(y, dst_step, dst_offset);
-        int dst_end   = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index = mad24(y, dst_step, dst_offset + 3 * x - offset_cols * 3);
-
-        uchar data0_0 = *(mat_src0 + src0_index + 0);
-        uchar data0_1 = *(mat_src0 + src0_index + 1);
-        uchar data0_2 = *(mat_src0 + src0_index + 2);
-        uchar data0_3 = *(mat_src0 + src0_index + 3);
-
-        uchar data1_0 = *(mat_src1 + src1_index + 0);
-        uchar data1_1 = *(mat_src1 + src1_index + 1);
-        uchar data1_2 = *(mat_src1 + src1_index + 2);
-        uchar data1_3 = *(mat_src1 + src1_index + 3);
-
-        uchar data2_0 = *(mat_src2 + src2_index + 0);
-        uchar data2_1 = *(mat_src2 + src2_index + 1);
-        uchar data2_2 = *(mat_src2 + src2_index + 2);
-        uchar data2_3 = *(mat_src2 + src2_index + 3);
-
-        uchar4 tmp_data0 = (uchar4)(data0_0, data1_0, data2_0, data0_1);
-        uchar4 tmp_data1 = (uchar4)(data1_1, data2_1, data0_2, data1_2);
-        uchar4 tmp_data2 = (uchar4)(data2_2, data0_3, data1_3, data2_3);
-
-        uchar4 dst_data0 = *((__global uchar4*)(mat_dst + dst_index + 0));
-        uchar4 dst_data1 = *((__global uchar4*)(mat_dst + dst_index + 4));
-        uchar4 dst_data2 = *((__global uchar4*)(mat_dst + dst_index + 8));
-
-        tmp_data0.x = ((dst_index + 0  >= dst_start) && (dst_index + 0  < dst_end)) ? tmp_data0.x : dst_data0.x;
-        tmp_data0.y = ((dst_index + 1  >= dst_start) && (dst_index + 1  < dst_end)) ? tmp_data0.y : dst_data0.y;
-        tmp_data0.z = ((dst_index + 2  >= dst_start) && (dst_index + 2  < dst_end)) ? tmp_data0.z : dst_data0.z;
-        tmp_data0.w = ((dst_index + 3  >= dst_start) && (dst_index + 3  < dst_end)) ? tmp_data0.w : dst_data0.w;
-
-        tmp_data1.x = ((dst_index + 4  >= dst_start) && (dst_index + 4  < dst_end)) ? tmp_data1.x : dst_data1.x;
-        tmp_data1.y = ((dst_index + 5  >= dst_start) && (dst_index + 5  < dst_end)) ? tmp_data1.y : dst_data1.y;
-        tmp_data1.z = ((dst_index + 6  >= dst_start) && (dst_index + 6  < dst_end)) ? tmp_data1.z : dst_data1.z;
-        tmp_data1.w = ((dst_index + 7  >= dst_start) && (dst_index + 7  < dst_end)) ? tmp_data1.w : dst_data1.w;
-
-        tmp_data2.x = ((dst_index + 8  >= dst_start) && (dst_index + 8  < dst_end)) ? tmp_data2.x : dst_data2.x;
-        tmp_data2.y = ((dst_index + 9  >= dst_start) && (dst_index + 9  < dst_end)) ? tmp_data2.y : dst_data2.y;
-        tmp_data2.z = ((dst_index + 10 >= dst_start) && (dst_index + 10 < dst_end)) ? tmp_data2.z : dst_data2.z;
-        tmp_data2.w = ((dst_index + 11 >= dst_start) && (dst_index + 11 < dst_end)) ? tmp_data2.w : dst_data2.w;
-
-        *((__global uchar4*)(mat_dst + dst_index + 0)) = tmp_data0;
-        *((__global uchar4*)(mat_dst + dst_index + 4)) = tmp_data1;
-        *((__global uchar4*)(mat_dst + dst_index + 8)) = tmp_data2;
-    }
-}
-__kernel void merge_vector_C3_D1(__global char *mat_dst,  int dst_step,  int dst_offset,
-                                 __global char *mat_src0, int src0_step, int src0_offset,
-                                 __global char *mat_src1, int src1_step, int src1_offset,
-                                 __global char *mat_src2, int src2_step, int src2_offset, int offset_cols,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        x = x << 2;
-
-        int src0_index = mad24(y, src0_step, x + src0_offset - offset_cols);
-        int src1_index = mad24(y, src1_step, x + src1_offset - offset_cols);
-        int src2_index = mad24(y, src2_step, x + src2_offset - offset_cols);
-
-        int dst_start = mad24(y, dst_step, dst_offset);
-        int dst_end   = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index = mad24(y, dst_step, dst_offset + 3 * x - offset_cols * 3);
-
-        char data0_0 = *(mat_src0 + src0_index + 0);
-        char data0_1 = *(mat_src0 + src0_index + 1);
-        char data0_2 = *(mat_src0 + src0_index + 2);
-        char data0_3 = *(mat_src0 + src0_index + 3);
-
-        char data1_0 = *(mat_src1 + src1_index + 0);
-        char data1_1 = *(mat_src1 + src1_index + 1);
-        char data1_2 = *(mat_src1 + src1_index + 2);
-        char data1_3 = *(mat_src1 + src1_index + 3);
-
-        char data2_0 = *(mat_src2 + src2_index + 0);
-        char data2_1 = *(mat_src2 + src2_index + 1);
-        char data2_2 = *(mat_src2 + src2_index + 2);
-        char data2_3 = *(mat_src2 + src2_index + 3);
-
-        char4 tmp_data0 = (char4)(data0_0, data1_0, data2_0, data0_1);
-        char4 tmp_data1 = (char4)(data1_1, data2_1, data0_2, data1_2);
-        char4 tmp_data2 = (char4)(data2_2, data0_3, data1_3, data2_3);
-
-        char4 dst_data0 = *((__global char4*)(mat_dst + dst_index + 0));
-        char4 dst_data1 = *((__global char4*)(mat_dst + dst_index + 4));
-        char4 dst_data2 = *((__global char4*)(mat_dst + dst_index + 8));
-
-        tmp_data0.x = ((dst_index + 0  >= dst_start) && (dst_index + 0  < dst_end)) ? tmp_data0.x : dst_data0.x;
-        tmp_data0.y = ((dst_index + 1  >= dst_start) && (dst_index + 1  < dst_end)) ? tmp_data0.y : dst_data0.y;
-        tmp_data0.z = ((dst_index + 2  >= dst_start) && (dst_index + 2  < dst_end)) ? tmp_data0.z : dst_data0.z;
-        tmp_data0.w = ((dst_index + 3  >= dst_start) && (dst_index + 3  < dst_end)) ? tmp_data0.w : dst_data0.w;
-
-        tmp_data1.x = ((dst_index + 4  >= dst_start) && (dst_index + 4  < dst_end)) ? tmp_data1.x : dst_data1.x;
-        tmp_data1.y = ((dst_index + 5  >= dst_start) && (dst_index + 5  < dst_end)) ? tmp_data1.y : dst_data1.y;
-        tmp_data1.z = ((dst_index + 6  >= dst_start) && (dst_index + 6  < dst_end)) ? tmp_data1.z : dst_data1.z;
-        tmp_data1.w = ((dst_index + 7  >= dst_start) && (dst_index + 7  < dst_end)) ? tmp_data1.w : dst_data1.w;
-
-        tmp_data2.x = ((dst_index + 8  >= dst_start) && (dst_index + 8  < dst_end)) ? tmp_data2.x : dst_data2.x;
-        tmp_data2.y = ((dst_index + 9  >= dst_start) && (dst_index + 9  < dst_end)) ? tmp_data2.y : dst_data2.y;
-        tmp_data2.z = ((dst_index + 10 >= dst_start) && (dst_index + 10 < dst_end)) ? tmp_data2.z : dst_data2.z;
-        tmp_data2.w = ((dst_index + 11 >= dst_start) && (dst_index + 11 < dst_end)) ? tmp_data2.w : dst_data2.w;
-
-        *((__global char4*)(mat_dst + dst_index + 0)) = tmp_data0;
-        *((__global char4*)(mat_dst + dst_index + 4)) = tmp_data1;
-        *((__global char4*)(mat_dst + dst_index + 8)) = tmp_data2;
-    }
-}
-__kernel void merge_vector_C3_D2(__global ushort *mat_dst,  int dst_step,  int dst_offset,
-                                 __global ushort *mat_src0, int src0_step, int src0_offset,
-                                 __global ushort *mat_src1, int src1_step, int src1_offset,
-                                 __global ushort *mat_src2, int src2_step, int src2_offset, int offset_cols,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        x = x << 1;
-
-        int src0_index = mad24(y, src0_step, (x << 1) + src0_offset - offset_cols);
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - offset_cols);
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - offset_cols);
-
-        int dst_start = mad24(y, dst_step, dst_offset);
-        int dst_end   = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index = mad24(y, dst_step, dst_offset + 6 * x - offset_cols * 6);
-
-        ushort data0_0 = *((__global ushort *)((__global char *)mat_src0 + src0_index + 0));
-        ushort data0_1 = *((__global ushort *)((__global char *)mat_src0 + src0_index + 2));
-
-        ushort data1_0 = *((__global ushort *)((__global char *)mat_src1 + src1_index + 0));
-        ushort data1_1 = *((__global ushort *)((__global char *)mat_src1 + src1_index + 2));
-
-        ushort data2_0 = *((__global ushort *)((__global char *)mat_src2 + src2_index + 0));
-        ushort data2_1 = *((__global ushort *)((__global char *)mat_src2 + src2_index + 2));
-
-        ushort2 tmp_data0 = (ushort2)(data0_0, data1_0);
-        ushort2 tmp_data1 = (ushort2)(data2_0, data0_1);
-        ushort2 tmp_data2 = (ushort2)(data1_1, data2_1);
-
-        ushort2 dst_data0 = *((__global ushort2*)((__global char *)mat_dst + dst_index + 0));
-        ushort2 dst_data1 = *((__global ushort2*)((__global char *)mat_dst + dst_index + 4));
-        ushort2 dst_data2 = *((__global ushort2*)((__global char *)mat_dst + dst_index + 8));
-
-        tmp_data0.x = ((dst_index + 0  >= dst_start) && (dst_index + 0  < dst_end)) ? tmp_data0.x : dst_data0.x;
-        tmp_data0.y = ((dst_index + 2  >= dst_start) && (dst_index + 2  < dst_end)) ? tmp_data0.y : dst_data0.y;
-
-        tmp_data1.x = ((dst_index + 4  >= dst_start) && (dst_index + 4  < dst_end)) ? tmp_data1.x : dst_data1.x;
-        tmp_data1.y = ((dst_index + 6  >= dst_start) && (dst_index + 6  < dst_end)) ? tmp_data1.y : dst_data1.y;
-
-        tmp_data2.x = ((dst_index + 8  >= dst_start) && (dst_index + 8  < dst_end)) ? tmp_data2.x : dst_data2.x;
-        tmp_data2.y = ((dst_index + 10 >= dst_start) && (dst_index + 10 < dst_end)) ? tmp_data2.y : dst_data2.y;
-
-        *((__global ushort2*)((__global char *)mat_dst + dst_index + 0)) = tmp_data0;
-        *((__global ushort2*)((__global char *)mat_dst + dst_index + 4)) = tmp_data1;
-        *((__global ushort2*)((__global char *)mat_dst + dst_index + 8)) = tmp_data2;
-    }
-}
-__kernel void merge_vector_C3_D3(__global short *mat_dst,  int dst_step,  int dst_offset,
-                                 __global short *mat_src0, int src0_step, int src0_offset,
-                                 __global short *mat_src1, int src1_step, int src1_offset,
-                                 __global short *mat_src2, int src2_step, int src2_offset, int offset_cols,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        x = x << 1;
-
-        int src0_index = mad24(y, src0_step, (x << 1) + src0_offset - offset_cols);
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - offset_cols);
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - offset_cols);
-
-        int dst_start = mad24(y, dst_step, dst_offset);
-        int dst_end   = mad24(y, dst_step, dst_offset + dst_step1);
-        int dst_index = mad24(y, dst_step, dst_offset + 6 * x - offset_cols * 6);
-
-        short data0_0 = *((__global short *)((__global char *)mat_src0 + src0_index + 0));
-        short data0_1 = *((__global short *)((__global char *)mat_src0 + src0_index + 2));
-
-        short data1_0 = *((__global short *)((__global char *)mat_src1 + src1_index + 0));
-        short data1_1 = *((__global short *)((__global char *)mat_src1 + src1_index + 2));
-
-        short data2_0 = *((__global short *)((__global char *)mat_src2 + src2_index + 0));
-        short data2_1 = *((__global short *)((__global char *)mat_src2 + src2_index + 2));
-
-        short2 tmp_data0 = (short2)(data0_0, data1_0);
-        short2 tmp_data1 = (short2)(data2_0, data0_1);
-        short2 tmp_data2 = (short2)(data1_1, data2_1);
-
-        short2 dst_data0 = *((__global short2*)((__global char *)mat_dst + dst_index + 0));
-        short2 dst_data1 = *((__global short2*)((__global char *)mat_dst + dst_index + 4));
-        short2 dst_data2 = *((__global short2*)((__global char *)mat_dst + dst_index + 8));
-
-        tmp_data0.x = ((dst_index + 0  >= dst_start) && (dst_index + 0  < dst_end)) ? tmp_data0.x : dst_data0.x;
-        tmp_data0.y = ((dst_index + 2  >= dst_start) && (dst_index + 2  < dst_end)) ? tmp_data0.y : dst_data0.y;
-
-        tmp_data1.x = ((dst_index + 4  >= dst_start) && (dst_index + 4  < dst_end)) ? tmp_data1.x : dst_data1.x;
-        tmp_data1.y = ((dst_index + 6  >= dst_start) && (dst_index + 6  < dst_end)) ? tmp_data1.y : dst_data1.y;
-
-        tmp_data2.x = ((dst_index + 8  >= dst_start) && (dst_index + 8  < dst_end)) ? tmp_data2.x : dst_data2.x;
-        tmp_data2.y = ((dst_index + 10 >= dst_start) && (dst_index + 10 < dst_end)) ? tmp_data2.y : dst_data2.y;
-
-        *((__global short2*)((__global char *)mat_dst + dst_index + 0)) = tmp_data0;
-        *((__global short2*)((__global char *)mat_dst + dst_index + 4)) = tmp_data1;
-        *((__global short2*)((__global char *)mat_dst + dst_index + 8)) = tmp_data2;
-    }
-}
-__kernel void merge_vector_C3_D4(__global int *mat_dst,  int dst_step,  int dst_offset,
-                                 __global int *mat_src0, int src0_step, int src0_offset,
-                                 __global int *mat_src1, int src1_step, int src1_offset,
-                                 __global int *mat_src2, int src2_step, int src2_offset, int offset_cols,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        __global int* src0 = (__global int * )((__global uchar *)mat_src0 + src0_index + (x << 2));
-        __global int* src1 = (__global int * )((__global uchar *)mat_src1 + src1_index + (x << 2));
-        __global int* src2 = (__global int * )((__global uchar *)mat_src2 + src2_index + (x << 2));
-
-        __global int* dist0 = (__global int *)((__global uchar *)mat_dst  + dst_index  + 3 * (x << 2));
-        __global int* dist1 = dist0 + 1;
-        __global int* dist2 = dist0 + 2;
-
-        int  src0_data = *src0;
-        int  src1_data = *src1;
-        int  src2_data = *src2;
-
-        *dist0 = src0_data;
-        *dist1 = src1_data;
-        *dist2 = src2_data;
-    }
-}
-__kernel void merge_vector_C3_D5(__global float *mat_dst,  int dst_step,  int dst_offset,
-                                 __global float *mat_src0, int src0_step, int src0_offset,
-                                 __global float *mat_src1, int src1_step, int src1_offset,
-                                 __global float *mat_src2, int src2_step, int src2_offset, int offset_cols,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        __global float* src0 = (__global float * )((__global uchar *)mat_src0 + src0_index + (x << 2));
-        __global float* src1 = (__global float * )((__global uchar *)mat_src1 + src1_index + (x << 2));
-        __global float* src2 = (__global float * )((__global uchar *)mat_src2 + src2_index + (x << 2));
-
-        __global float* dist0 = (__global float *)((__global uchar *)mat_dst  + dst_index  + 3 * (x << 2));
-        __global float* dist1 = dist0 + 1;
-        __global float* dist2 = dist0 + 2;
-
-        float  src0_data = *src0;
-        float  src1_data = *src1;
-        float  src2_data = *src2;
-
-        *dist0 = src0_data;
-        *dist1 = src1_data;
-        *dist2 = src2_data;
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void merge_vector_C3_D6(__global double *mat_dst,  int dst_step,  int dst_offset,
-                                 __global double *mat_src0, int src0_step, int src0_offset,
-                                 __global double *mat_src1, int src1_step, int src1_offset,
-                                 __global double *mat_src2, int src2_step, int src2_offset, int offset_cols,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        __global double* src0 = (__global double * )((__global uchar *)mat_src0 + src0_index + (x << 3));
-        __global double* src1 = (__global double * )((__global uchar *)mat_src1 + src1_index + (x << 3));
-        __global double* src2 = (__global double * )((__global uchar *)mat_src2 + src2_index + (x << 3));
-
-        __global double* dist0 = (__global double *)((__global uchar *)mat_dst  + dst_index  + 3 * (x << 3));
-        __global double* dist1 = dist0 + 1;
-        __global double* dist2 = dist0 + 2;
-
-        double  src0_data = *src0;
-        double  src1_data = *src1;
-        double  src2_data = *src2;
-
-        *dist0 = src0_data;
-        *dist1 = src1_data;
-        *dist2 = src2_data;
-    }
-}
-#endif
-__kernel void merge_vector_C4_D0(__global uchar *mat_dst,  int dst_step,  int dst_offset,
-                                 __global uchar *mat_src0, int src0_step, int src0_offset,
-                                 __global uchar *mat_src1, int src1_step, int src1_offset,
-                                 __global uchar *mat_src2, int src2_step, int src2_offset,
-                                 __global uchar *mat_src3, int src3_step, int src3_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-        int src3_index = mad24(y, src3_step, src3_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        uchar src0 = *(mat_src0 + src0_index + x );
-        uchar src1 = *(mat_src1 + src1_index + x);
-        uchar src2 = *(mat_src2 + src2_index + x);
-        uchar src3 = *(mat_src3 + src3_index + x);
-
-        *((__global uchar4 *)(mat_dst  + dst_index + (x << 2))) = (uchar4)(src0, src1, src2, src3);
-    }
-}
-__kernel void merge_vector_C4_D1(__global char *mat_dst,  int dst_step,  int dst_offset,
-                                 __global char *mat_src0, int src0_step, int src0_offset,
-                                 __global char *mat_src1, int src1_step, int src1_offset,
-                                 __global char *mat_src2, int src2_step, int src2_offset,
-                                 __global char *mat_src3, int src3_step, int src3_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-        int src3_index = mad24(y, src3_step, src3_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        char src0 = *(mat_src0 + src0_index + x );
-        char src1 = *(mat_src1 + src1_index + x);
-        char src2 = *(mat_src2 + src2_index + x);
-        char src3 = *(mat_src3 + src3_index + x);
-
-        *((__global char4 *)(mat_dst  + dst_index + (x << 2))) = (char4)(src0, src1, src2, src3);
-    }
-}
-__kernel void merge_vector_C4_D2(__global ushort *mat_dst,  int dst_step,  int dst_offset,
-                                 __global ushort *mat_src0, int src0_step, int src0_offset,
-                                 __global ushort *mat_src1, int src1_step, int src1_offset,
-                                 __global ushort *mat_src2, int src2_step, int src2_offset,
-                                 __global ushort *mat_src3, int src3_step, int src3_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-        int src3_index = mad24(y, src3_step, src3_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        ushort src0 = *((__global ushort *)((__global uchar *)mat_src0 + src0_index + (x << 1)));
-        ushort src1 = *((__global ushort *)((__global uchar *)mat_src1 + src1_index + (x << 1)));
-        ushort src2 = *((__global ushort *)((__global uchar *)mat_src2 + src2_index + (x << 1)));
-        ushort src3 = *((__global ushort *)((__global uchar *)mat_src3 + src3_index + (x << 1)));
-
-        *((__global ushort4 *)((__global uchar *)mat_dst  + dst_index + (x << 3))) = (ushort4)(src0, src1, src2, src3);
-    }
-}
-__kernel void merge_vector_C4_D3(__global short *mat_dst,  int dst_step,  int dst_offset,
-                                 __global short *mat_src0, int src0_step, int src0_offset,
-                                 __global short *mat_src1, int src1_step, int src1_offset,
-                                 __global short *mat_src2, int src2_step, int src2_offset,
-                                 __global short *mat_src3, int src3_step, int src3_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-        int src3_index = mad24(y, src3_step, src3_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        short src0 = *((__global short *)((__global uchar *)mat_src0 + src0_index + (x << 1)));
-        short src1 = *((__global short *)((__global uchar *)mat_src1 + src1_index + (x << 1)));
-        short src2 = *((__global short *)((__global uchar *)mat_src2 + src2_index + (x << 1)));
-        short src3 = *((__global short *)((__global uchar *)mat_src3 + src3_index + (x << 1)));
-
-        *((__global short4 *)((__global uchar *)mat_dst  + dst_index + (x << 3))) = (short4)(src0, src1, src2, src3);
-    }
-}
-__kernel void merge_vector_C4_D4(__global int *mat_dst,  int dst_step,  int dst_offset,
-                                 __global int *mat_src0, int src0_step, int src0_offset,
-                                 __global int *mat_src1, int src1_step, int src1_offset,
-                                 __global int *mat_src2, int src2_step, int src2_offset,
-                                 __global int *mat_src3, int src3_step, int src3_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-        int src3_index = mad24(y, src3_step, src3_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        int src0 = *((__global int *)((__global uchar *)mat_src0 + src0_index + (x << 2)));
-        int src1 = *((__global int *)((__global uchar *)mat_src1 + src1_index + (x << 2)));
-        int src2 = *((__global int *)((__global uchar *)mat_src2 + src2_index + (x << 2)));
-        int src3 = *((__global int *)((__global uchar *)mat_src3 + src3_index + (x << 2)));
-
-        *((__global int4 *)((__global uchar *)mat_dst  + dst_index + (x << 4))) = (int4)(src0, src1, src2, src3);
-    }
-}
-__kernel void merge_vector_C4_D5(__global float *mat_dst,  int dst_step,  int dst_offset,
-                                 __global float *mat_src0, int src0_step, int src0_offset,
-                                 __global float *mat_src1, int src1_step, int src1_offset,
-                                 __global float *mat_src2, int src2_step, int src2_offset,
-                                 __global float *mat_src3, int src3_step, int src3_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-        int src3_index = mad24(y, src3_step, src3_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        float src0 = *((__global float *)((__global uchar *)mat_src0 + src0_index + (x << 2)));
-        float src1 = *((__global float *)((__global uchar *)mat_src1 + src1_index + (x << 2)));
-        float src2 = *((__global float *)((__global uchar *)mat_src2 + src2_index + (x << 2)));
-        float src3 = *((__global float *)((__global uchar *)mat_src3 + src3_index + (x << 2)));
-
-        *((__global float4 *)((__global uchar *)mat_dst  + dst_index + (x << 4))) = (float4)(src0, src1, src2, src3);
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void merge_vector_C4_D6(__global double *mat_dst,  int dst_step,  int dst_offset,
-                                 __global double *mat_src0, int src0_step, int src0_offset,
-                                 __global double *mat_src1, int src1_step, int src1_offset,
-                                 __global double *mat_src2, int src2_step, int src2_offset,
-                                 __global double *mat_src3, int src3_step, int src3_offset,
-                                 int rows, int cols, int dst_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        int src0_index = mad24(y, src0_step, src0_offset);
-        int src1_index = mad24(y, src1_step, src1_offset);
-        int src2_index = mad24(y, src2_step, src2_offset);
-        int src3_index = mad24(y, src3_step, src3_offset);
-        int dst_index  = mad24(y, dst_step , dst_offset);
-
-        double src0 = *((__global double *)((__global uchar *)mat_src0 + src0_index + (x << 3)));
-        double src1 = *((__global double *)((__global uchar *)mat_src1 + src1_index + (x << 3)));
-        double src2 = *((__global double *)((__global uchar *)mat_src2 + src2_index + (x << 3)));
-        double src3 = *((__global double *)((__global uchar *)mat_src3 + src3_index + (x << 3)));
-
-        *((__global double4 *)((__global uchar *)mat_dst  + dst_index + (x << 5))) = (double4)(src0, src1, src2, src3);
-    }
-}
-#endif
-///////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////optimized code using vector  no roi//////////////////////////
-////////////vector fuction name format: merge_vector_C(channels number)D_(data type depth)//////
-////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void merge_vector_C2_D0_1(int rows, int cols,
-                                   __global uchar *mat_dst,  int dst_step,
-                                   __global uchar *mat_src0, int src0_step,
-                                   __global uchar *mat_src1, int src1_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global uchar4  *src0_y = (__global uchar4 * )(mat_src0 + y * src0_step);
-        __global uchar4  *src1_y = (__global uchar4 * )(mat_src1 + y * src1_step);
-        __global uchar8 *dst_y  = (__global uchar8 *)(mat_dst  + y * dst_step);
-
-        uchar4 value1 = src0_y[x];
-        uchar4 value2 = src1_y[x];
-
-        uchar8 value;
-        value.even = value1;
-        value.odd = value2;
-
-        dst_y[x] = value;
-    }
-}
-__kernel void merge_vector_C2_D1_1(int rows, int cols,
-                                   __global char *mat_dst,  int dst_step,
-                                   __global char *mat_src0, int src0_step,
-                                   __global char *mat_src1, int src1_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global char4  *src0_y = (__global char4 * )(mat_src0 + y * src0_step);
-        __global char4  *src1_y = (__global char4 * )(mat_src1 + y * src1_step);
-        __global char8 *dst_y  = (__global char8 *)(mat_dst  + y * dst_step);
-
-        char4 value1 = src0_y[x];
-        char4 value2 = src1_y[x];
-
-        char8 value;
-        value.even = value1;
-        value.odd = value2;
-
-        dst_y[x] = value;
-    }
-}
-__kernel void merge_vector_C2_D2_1(int rows, int cols,
-                                   __global ushort *mat_dst,  int dst_step,
-                                   __global ushort *mat_src0, int src0_step,
-                                   __global ushort *mat_src1, int src1_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global ushort2  *src0_y = (__global ushort2 *)((__global uchar *)mat_src0 + y * src0_step);
-        __global ushort2  *src1_y = (__global ushort2 *)((__global uchar *)mat_src1 + y * src1_step);
-        __global ushort4  *dst_y  = (__global ushort4 *)((__global uchar *)mat_dst  + y * dst_step);
-
-        ushort2 value1 = src0_y[x];
-        ushort2 value2 = src1_y[x];
-
-        ushort4 value;
-        value.even = value1;
-        value.odd = value2;
-
-        dst_y[x] = value;
-    }
-}
-__kernel void merge_vector_C2_D3_1(int rows, int cols,
-                                   __global short *mat_dst,  int dst_step,
-                                   __global short *mat_src0, int src0_step,
-                                   __global short *mat_src1, int src1_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global short2  *src0_y = (__global short2 *)((__global uchar *)mat_src0 + y * src0_step);
-        __global short2  *src1_y = (__global short2 *)((__global uchar *)mat_src1 + y * src1_step);
-        __global short4 *dst_y   = (__global short4 *)((__global uchar *)mat_dst  + y * dst_step);
-
-        short2 value1 = src0_y[x];
-        short2 value2 = src1_y[x];
-
-        short4 value;
-        value.even = value1;
-        value.odd = value2;
-
-        dst_y[x] = value;
-    }
-}
-
-__kernel void merge_vector_C2_D4_1(int rows, int cols,
-                                   __global int *mat_dst,  int dst_step,
-                                   __global int *mat_src0, int src0_step,
-                                   __global int *mat_src1, int src1_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global int  *src0_y = (__global int *)((__global uchar *)mat_src0 + y * src0_step);
-        __global int  *src1_y = (__global int *)((__global uchar *)mat_src1 + y * src1_step);
-        __global int2  *dst_y  = (__global int2 *)((__global uchar *)mat_dst  + y * dst_step);
-
-        int value1 = src0_y[x];
-        int value2 = src1_y[x];
-
-        int2 value;
-        value.even = value1;
-        value.odd = value2;
-
-        dst_y[x] = value;
-    }
-}
-__kernel void merge_vector_C2_D5_1(int rows, int cols,
-                                   __global float *mat_dst,  int dst_step,
-                                   __global float *mat_src0, int src0_step,
-                                   __global float *mat_src1, int src1_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global float  *src0_y = (__global float *)((__global uchar *)mat_src0 + y * src0_step);
-        __global float  *src1_y = (__global float *)((__global uchar *)mat_src1 + y * src1_step);
-        __global float2  *dst_y  = (__global float2 *)((__global uchar *)mat_dst  + y * dst_step);
-
-        float value1 = src0_y[x];
-        float value2 = src1_y[x];
-
-        dst_y[x] = (float2)(value1, value2);
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void merge_vector_C2_D6_1(int rows, int cols,
-                                   __global double *mat_dst,  int dst_step,
-                                   __global double *mat_src0, int src0_step,
-                                   __global double *mat_src1, int src1_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global double  *src0_y = (__global double *)((__global uchar *)mat_src0 + y * src0_step);
-        __global double  *src1_y = (__global double *)((__global uchar *)mat_src1 + y * src1_step);
-        __global double2 *dst_y  = (__global double2 *)((__global uchar *)mat_dst  + y * dst_step);
-
-        double value1 = src0_y[x];
-        double value2 = src1_y[x];
-
-        dst_y[x] = (double2)(value1, value2);
-    }
-}
-#endif
-
-__kernel void merge_vector_C3_D0_1(int rows, int cols,
-                                   __global uchar *mat_dst,  int dst_step,
-                                   __global uchar *mat_src0, int src0_step,
-                                   __global uchar *mat_src1, int src1_step,
-                                   __global uchar *mat_src2, int src2_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global uchar4  *src0_y = (__global uchar4 * )(mat_src0 + y * src0_step);
-        __global uchar4  *src1_y = (__global uchar4 * )(mat_src1 + y * src1_step);
-        __global uchar4  *src2_y = (__global uchar4 * )(mat_src2 + y * src0_step);
-
-        __global uchar4 *dst_y  = (__global uchar4 *)(mat_dst  + y * dst_step);
-
-        uchar4 value0 = src0_y[x];
-        uchar4 value1 = src1_y[x];
-        uchar4 value2 = src2_y[x];
-
-        dst_y[3 * x + 0] = (uchar4)(value0.s0, value1.s0, value2.s0,
-                                    value0.s1);
-
-        dst_y[3 * x + 1] = (uchar4)(value1.s1, value2.s1,
-                                    value0.s2, value1.s2);
-
-        dst_y[3 * x + 2] = (uchar4)(value2.s2,
-                                    value0.s3, value1.s3, value2.s3);
-
-    }
-}
-__kernel void merge_vector_C3_D1_1(int rows, int cols,
-                                   __global char *mat_dst,  int dst_step,
-                                   __global char *mat_src0, int src0_step,
-                                   __global char *mat_src1, int src1_step,
-                                   __global char *mat_src2, int src2_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global char4  *src0_y = (__global char4 * )(mat_src0 + y * src0_step);
-        __global char4  *src1_y = (__global char4 * )(mat_src1 + y * src1_step);
-        __global char4  *src2_y = (__global char4 * )(mat_src2 + y * src0_step);
-
-        __global char4 *dst_y  = (__global char4 *)(mat_dst  + y * dst_step);
-
-        char4 value0 = src0_y[x];
-        char4 value1 = src1_y[x];
-        char4 value2 = src2_y[x];
-
-        dst_y[3 * x + 0] = (char4)(value0.s0, value1.s0, value2.s0,
-                                   value0.s1);
-
-        dst_y[3 * x + 1] = (char4)(value1.s1, value2.s1,
-                                     value0.s2, value1.s2);
-
-        dst_y[3 * x + 2] = (char4)(value2.s2,
-                                     value0.s3, value1.s3, value2.s3);
-
-        /* for test do not delete
-        dst_y[3 * x + 0] = (char8)(value0.s0, value1.s0, value2.s0,
-                                    value0.s1, value1.s1, value2.s1,
-                                    value0.s2, value1.s2);
-
-        dst_y[3 * x + 1] = (char8)(value2.s2,
-                                    value0.s3, value1.s3, value2.s3,
-                                    value0.s4, value1.s4, value2.s4,
-                                    value0.s5);
-
-        dst_y[3 * x + 2] = (char8)(value1.s5, value2.s5,
-                                    value0.s6, value1.s6, value2.s6,
-                                    value0.s7, value1.s7, value2.s7);
-                                    */
-    }
-}
-__kernel void merge_vector_C3_D2_1(int rows, int cols,
-                                   __global ushort *mat_dst,  int dst_step,
-                                   __global ushort *mat_src0, int src0_step,
-                                   __global ushort *mat_src1, int src1_step,
-                                   __global ushort *mat_src2, int src2_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global ushort2  *src0_y = (__global ushort2 * )((__global char *)mat_src0 + y * src0_step);
-        __global ushort2  *src1_y = (__global ushort2 * )((__global char *)mat_src1 + y * src1_step);
-        __global ushort2  *src2_y = (__global ushort2 * )((__global char *)mat_src2 + y * src0_step);
-
-        __global ushort2 *dst_y  = (__global ushort2 *)((__global char *)mat_dst  + y * dst_step);
-
-        ushort2 value0 = src0_y[x];
-        ushort2 value1 = src1_y[x];
-        ushort2 value2 = src2_y[x];
-
-        dst_y[3 * x + 0] = (ushort2)(value0.x, value1.x);
-        dst_y[3 * x + 1] = (ushort2)(value2.x, value0.y);
-        dst_y[3 * x + 2] = (ushort2)(value1.y, value2.y);
-
-    }
-}
-__kernel void merge_vector_C3_D3_1(int rows, int cols,
-                                   __global short *mat_dst,  int dst_step,
-                                   __global short *mat_src0, int src0_step,
-                                   __global short *mat_src1, int src1_step,
-                                   __global short *mat_src2, int src2_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global short2  *src0_y = (__global short2 * )((__global char *)mat_src0 + y * src0_step);
-        __global short2  *src1_y = (__global short2 * )((__global char *)mat_src1 + y * src1_step);
-        __global short2  *src2_y = (__global short2 * )((__global char *)mat_src2 + y * src0_step);
-
-        __global short2 *dst_y  = (__global short2 *)((__global char *)mat_dst  + y * dst_step);
-
-        short2 value0 = src0_y[x];
-        short2 value1 = src1_y[x];
-        short2 value2 = src2_y[x];
-
-        dst_y[3 * x + 0] = (short2)(value0.x, value1.x);
-        dst_y[3 * x + 1] = (short2)(value2.x, value0.y);
-        dst_y[3 * x + 2] = (short2)(value1.y, value2.y);
-
-        /*
-        dst_y[3 * x + 0] = (short4)(value0.s0, value1.s0, value2.s0,
-                                    value0.s1);
-
-        dst_y[3 * x + 1] = (short4)(value1.s1, value2.s1,
-                                    value0.s2, value1.s2);
-
-        dst_y[3 * x + 2] = (short4)(value2.s2,
-                                    value0.s3, value1.s3, value2.s3);
-                                    */
-    }
-}
-__kernel void merge_vector_C3_D4_1(int rows, int cols,
-                                   __global int *mat_dst,  int dst_step,
-                                   __global int *mat_src0, int src0_step,
-                                   __global int *mat_src1, int src1_step,
-                                   __global int *mat_src2, int src2_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global int  *src0_y = (__global int * )((__global char *)mat_src0 + y * src0_step);
-        __global int  *src1_y = (__global int * )((__global char *)mat_src1 + y * src1_step);
-        __global int  *src2_y = (__global int * )((__global char *)mat_src2 + y * src0_step);
-
-        __global int *dst_y  = (__global int *)((__global char *)mat_dst  + y * dst_step);
-
-        int value0 = src0_y[x];
-        int value1 = src1_y[x];
-        int value2 = src2_y[x];
-
-        dst_y[3 * x + 0] = value0;
-        dst_y[3 * x + 1] = value1;
-        dst_y[3 * x + 2] = value2;
-
-        /*for test do not delete
-        dst_y[3 * x + 0] = (int2)(value0.x, value1.x);
-        dst_y[3 * x + 1] = (int2)(value2.x, value0.y);
-        dst_y[3 * x + 2] = (int2)(value1.y, value2.y);
-        */
-    }
-}
-__kernel void merge_vector_C3_D5_1(int rows, int cols,
-                                   __global float *mat_dst,  int dst_step,
-                                   __global float *mat_src0, int src0_step,
-                                   __global float *mat_src1, int src1_step,
-                                   __global float *mat_src2, int src2_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global float  *src0_y = (__global float * )((__global char *)mat_src0 + y * src0_step);
-        __global float  *src1_y = (__global float * )((__global char *)mat_src1 + y * src1_step);
-        __global float  *src2_y = (__global float * )((__global char *)mat_src2 + y * src0_step);
-
-        __global float *dst_y  = (__global float *)((__global char *)mat_dst  + y * dst_step);
-
-        float value0 = src0_y[x];
-        float value1 = src1_y[x];
-        float value2 = src2_y[x];
-
-        dst_y[3 * x + 0] = value0;
-        dst_y[3 * x + 1] = value1;
-        dst_y[3 * x + 2] = value2;
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void merge_vector_C3_D6_1(int rows, int cols,
-                                   __global double *mat_dst,  int dst_step,
-                                   __global double *mat_src0, int src0_step,
-                                   __global double *mat_src1, int src1_step,
-                                   __global double *mat_src2, int src2_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global double  *src0_y = (__global double * )((__global char *)mat_src0 + y * src0_step);
-        __global double  *src1_y = (__global double * )((__global char *)mat_src1 + y * src1_step);
-        __global double  *src2_y = (__global double * )((__global char *)mat_src2 + y * src0_step);
-
-        __global double *dst_y  = (__global double *)((__global char *)mat_dst  + y * dst_step);
-
-        double value0 = src0_y[x];
-        double value1 = src1_y[x];
-        double value2 = src2_y[x];
-
-        dst_y[3 * x + 0] = value0;
-        dst_y[3 * x + 1] = value1;
-        dst_y[3 * x + 2] = value2;
-    }
-}
-#endif
-__kernel void merge_vector_C4_D0_1(int rows, int cols,
-                                   __global uchar *mat_dst,  int dst_step,
-                                   __global uchar *mat_src0, int src0_step,
-                                   __global uchar *mat_src1, int src1_step,
-                                   __global uchar *mat_src2, int src2_step,
-                                   __global uchar *mat_src3, int src3_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global uchar4  *src0_y = (__global uchar4 * )(mat_src0 + y * src0_step);
-        __global uchar4  *src1_y = (__global uchar4 * )(mat_src1 + y * src1_step);
-        __global uchar4  *src2_y = (__global uchar4 * )(mat_src2 + y * src0_step);
-        __global uchar4  *src3_y = (__global uchar4 * )(mat_src3 + y * src1_step);
-
-        __global uchar16 *dst_y  = (__global uchar16 *)(mat_dst  + y * dst_step);
-
-        uchar4 value0 = src0_y[x];
-        uchar4 value1 = src1_y[x];
-        uchar4 value2 = src2_y[x];
-        uchar4 value3 = src3_y[x];
-
-        dst_y[x] = (uchar16)(value0.x, value1.x, value2.x, value3.x,
-                             value0.y, value1.y, value2.y, value3.y,
-                             value0.z, value1.z, value2.z, value3.z,
-                             value0.w, value1.w, value2.w, value3.w);
-    }
-}
-
-__kernel void merge_vector_C4_D1_1(int rows, int cols,
-                                   __global char *mat_dst,  int dst_step,
-                                   __global char *mat_src0, int src0_step,
-                                   __global char *mat_src1, int src1_step,
-                                   __global char *mat_src2, int src2_step,
-                                   __global char *mat_src3, int src3_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global char4  *src0_y = (__global char4 * )(mat_src0 + y * src0_step);
-        __global char4  *src1_y = (__global char4 * )(mat_src1 + y * src1_step);
-        __global char4  *src2_y = (__global char4 * )(mat_src2 + y * src0_step);
-        __global char4  *src3_y = (__global char4 * )(mat_src3 + y * src1_step);
-
-        __global char16 *dst_y  = (__global char16 *)(mat_dst  + y * dst_step);
-
-        char4 value0 = src0_y[x];
-        char4 value1 = src1_y[x];
-        char4 value2 = src2_y[x];
-        char4 value3 = src3_y[x];
-
-        dst_y[x] = (char16)(value0.x, value1.x, value2.x, value3.x,
-                            value0.y, value1.y, value2.y, value3.y,
-                            value0.z, value1.z, value2.z, value3.z,
-                            value0.w, value1.w, value2.w, value3.w);
-    }
-}
-__kernel void merge_vector_C4_D2_1(int rows, int cols,
-                                   __global ushort *mat_dst,  int dst_step,
-                                   __global ushort *mat_src0, int src0_step,
-                                   __global ushort *mat_src1, int src1_step,
-                                   __global ushort *mat_src2, int src2_step,
-                                   __global ushort *mat_src3, int src3_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global ushort2  *src0_y = (__global ushort2 * )((__global uchar*)mat_src0 + y * src0_step);
-        __global ushort2  *src1_y = (__global ushort2 * )((__global uchar*)mat_src1 + y * src1_step);
-        __global ushort2  *src2_y = (__global ushort2 * )((__global uchar*)mat_src2 + y * src0_step);
-        __global ushort2  *src3_y = (__global ushort2 * )((__global uchar*)mat_src3 + y * src1_step);
-
-        __global ushort8 *dst_y  = (__global ushort8 *)((__global uchar*)mat_dst  + y * dst_step);
-
-        ushort2 value0 = src0_y[x];
-        ushort2 value1 = src1_y[x];
-        ushort2 value2 = src2_y[x];
-        ushort2 value3 = src3_y[x];
-
-        dst_y[x] = (ushort8)(value0.x, value1.x, value2.x, value3.x,
-                             value0.y, value1.y, value2.y, value3.y);
-    }
-}
-__kernel void merge_vector_C4_D3_1(int rows, int cols,
-                                   __global short *mat_dst,  int dst_step,
-                                   __global short *mat_src0, int src0_step,
-                                   __global short *mat_src1, int src1_step,
-                                   __global short *mat_src2, int src2_step,
-                                   __global short *mat_src3, int src3_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global short2  *src0_y = (__global short2 * )((__global uchar*)mat_src0 + y * src0_step);
-        __global short2  *src1_y = (__global short2 * )((__global uchar*)mat_src1 + y * src1_step);
-        __global short2  *src2_y = (__global short2 * )((__global uchar*)mat_src2 + y * src0_step);
-        __global short2  *src3_y = (__global short2 * )((__global uchar*)mat_src3 + y * src1_step);
-
-        __global short8 *dst_y  = (__global short8 *)((__global uchar*)mat_dst  + y * dst_step);
-
-        short2 value0 = src0_y[x];
-        short2 value1 = src1_y[x];
-        short2 value2 = src2_y[x];
-        short2 value3 = src3_y[x];
-
-        dst_y[x] = (short8)(value0.x, value1.x, value2.x, value3.x,
-                            value0.y, value1.y, value2.y, value3.y);
-    }
-}
-__kernel void merge_vector_C4_D4_1(int rows, int cols,
-                                   __global int *mat_dst,  int dst_step,
-                                   __global int *mat_src0, int src0_step,
-                                   __global int *mat_src1, int src1_step,
-                                   __global int *mat_src2, int src2_step,
-                                   __global int *mat_src3, int src3_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global int *src0_y = (__global int * )((__global uchar*)mat_src0 + y * src0_step);
-        __global int *src1_y = (__global int * )((__global uchar*)mat_src1 + y * src1_step);
-        __global int *src2_y = (__global int * )((__global uchar*)mat_src2 + y * src0_step);
-        __global int *src3_y = (__global int * )((__global uchar*)mat_src3 + y * src1_step);
-
-        __global int4 *dst_y  = (__global int4 *)((__global uchar*)mat_dst  + y * dst_step);
-
-        int value0 = src0_y[x];
-        int value1 = src1_y[x];
-        int value2 = src2_y[x];
-        int value3 = src3_y[x];
-
-        dst_y[x] = (int4)(value0, value1, value2, value3);
-    }
-}
-__kernel void merge_vector_C4_D5_1(int rows, int cols,
-                                   __global float *mat_dst,  int dst_step,
-                                   __global float *mat_src0, int src0_step,
-                                   __global float *mat_src1, int src1_step,
-                                   __global float *mat_src2, int src2_step,
-                                   __global float *mat_src3, int src3_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global float *src0_y = (__global float * )((__global uchar*)mat_src0 + y * src0_step);
-        __global float *src1_y = (__global float * )((__global uchar*)mat_src1 + y * src1_step);
-        __global float *src2_y = (__global float * )((__global uchar*)mat_src2 + y * src0_step);
-        __global float *src3_y = (__global float * )((__global uchar*)mat_src3 + y * src1_step);
-
-        __global float4 *dst_y  = (__global float4 *)((__global uchar*)mat_dst  + y * dst_step);
-
-        float value0 = src0_y[x];
-        float value1 = src1_y[x];
-        float value2 = src2_y[x];
-        float value3 = src3_y[x];
-
-        dst_y[x] = (float4)(value0, value1, value2, value3);
-    }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void merge_vector_C4_D6_1(int rows, int cols,
-                                   __global double *mat_dst,  int dst_step,
-                                   __global double *mat_src0, int src0_step,
-                                   __global double *mat_src1, int src1_step,
-                                   __global double *mat_src2, int src2_step,
-                                   __global double *mat_src3, int src3_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if ((x < cols) && (y < rows))
-    {
-        __global double *src0_y = (__global double * )((__global uchar*)mat_src0 + y * src0_step);
-        __global double *src1_y = (__global double * )((__global uchar*)mat_src1 + y * src1_step);
-        __global double *src2_y = (__global double * )((__global uchar*)mat_src2 + y * src0_step);
-        __global double *src3_y = (__global double * )((__global uchar*)mat_src3 + y * src1_step);
-
-        __global double4 *dst_y  = (__global double4 *)((__global uchar*)mat_dst  + y * dst_step);
-
-        double value0 = src0_y[x];
-        double value1 = src1_y[x];
-        double value2 = src2_y[x];
-        double value3 = src3_y[x];
-
-        dst_y[x] = (double4)(value0, value1, value2, value3);
-    }
-}
-#endif
diff --git a/modules/ocl/src/opencl/moments.cl b/modules/ocl/src/opencl/moments.cl
deleted file mode 100644
index 09c79c4..0000000
--- a/modules/ocl/src/opencl/moments.cl
+++ /dev/null
@@ -1,432 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma,  jin@multicorewareinc.com
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-typedef double T;
-#else
-typedef long T;
-#endif
-
-#define DST_ROW_00     0
-#define DST_ROW_10     1
-#define DST_ROW_01     2
-#define DST_ROW_20     3
-#define DST_ROW_11     4
-#define DST_ROW_02     5
-#define DST_ROW_30     6
-#define DST_ROW_21     7
-#define DST_ROW_12     8
-#define DST_ROW_03     9
-
-__kernel void icvContourMoments(int contour_total,
-                                __global float* reader_oclmat_data,
-                                __global T* dst_a,
-                                int dst_step)
-{
-    T xi_1, yi_1, xi_12, yi_12, xi, yi, xi2, yi2, dxy, xii_1, yii_1;
-    int idx = get_global_id(0);
-
-    if (idx < 0 || idx >= contour_total)
-        return;
-
-    xi_1 = (T)(*(reader_oclmat_data + (get_global_id(0) << 1)));
-    yi_1 = (T)(*(reader_oclmat_data + (get_global_id(0) << 1) + 1));
-    xi_12 = xi_1 * xi_1;
-    yi_12 = yi_1 * yi_1;
-
-    if(idx == contour_total - 1)
-    {
-        xi = (T)(*(reader_oclmat_data));
-        yi = (T)(*(reader_oclmat_data + 1));
-    }
-    else
-    {
-        xi = (T)(*(reader_oclmat_data + (idx + 1) * 2));
-        yi = (T)(*(reader_oclmat_data + (idx + 1) * 2 + 1));
-    }
-    xi2 = xi * xi;
-    yi2 = yi * yi;
-    dxy = xi_1 * yi - xi * yi_1;
-    xii_1 = xi_1 + xi;
-    yii_1 = yi_1 + yi;
-
-    dst_step /= sizeof(T);
-    *( dst_a + DST_ROW_00 * dst_step + idx) = dxy;
-    *( dst_a + DST_ROW_10 * dst_step + idx) = dxy * xii_1;
-    *( dst_a + DST_ROW_01 * dst_step + idx) = dxy * yii_1;
-    *( dst_a + DST_ROW_20 * dst_step + idx) = dxy * (xi_1 * xii_1 + xi2);
-    *( dst_a + DST_ROW_11 * dst_step + idx) = dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi));
-    *( dst_a + DST_ROW_02 * dst_step + idx) = dxy * (yi_1 * yii_1 + yi2);
-    *( dst_a + DST_ROW_30 * dst_step + idx) = dxy * xii_1 * (xi_12 + xi2);
-    *( dst_a + DST_ROW_03 * dst_step + idx) = dxy * yii_1 * (yi_12 + yi2);
-    *( dst_a + DST_ROW_21 * dst_step + idx) =
-        dxy * (xi_12 * (3 * yi_1 + yi) + 2 * xi * xi_1 * yii_1 +
-        xi2 * (yi_1 + 3 * yi));
-    *( dst_a + DST_ROW_12 * dst_step + idx) =
-        dxy * (yi_12 * (3 * xi_1 + xi) + 2 * yi * yi_1 * xii_1 +
-        yi2 * (xi_1 + 3 * xi));
-}
-
-#if defined (DOUBLE_SUPPORT)
-#define WT double
-#define WT4 double4
-#define convert_T4 convert_double4
-#define convert_T convert_double
-#else
-#define WT float
-#define WT4 float4
-#define convert_T4 convert_float4
-#define convert_T convert_float
-#endif
-
-#ifdef CV_8UC1
-#define TT uchar
-#elif defined CV_16UC1
-#define TT ushort
-#elif defined CV_16SC1
-#define TT short
-#elif defined CV_32FC1
-#define TT float
-#elif defined CV_64FC1
-#ifdef DOUBLE_SUPPORT
-#define TT double
-#else
-#define TT float
-#endif
-#endif
-__kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int src_step,
-                        __global WT* dst_m,
-                        int dst_cols, int dst_step, int binary)
-{
-    int dy = get_global_id(1);
-    int ly = get_local_id(1);
-    int gidx = get_group_id(0);
-    int gidy = get_group_id(1);
-    int x_rest = src_cols % 256;
-    int y_rest = src_rows % 256;
-    __local int codxy[256];
-    codxy[ly] = ly;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    WT4 x0 = (WT4)(0.f);
-    WT4 x1 = (WT4)(0.f);
-    WT4 x2 = (WT4)(0.f);
-    WT4 x3 = (WT4)(0.f);
-
-    __global TT* row = src_data + gidy * src_step + ly * src_step + gidx * 256;
-
-    WT4 p;
-    WT4 x;
-    WT4 xp;
-    WT4 xxp;
-
-    WT py = 0.f, sy = 0.f;
-
-    if(dy < src_rows)
-    {
-        if((x_rest > 0) && (gidx == ((int)get_num_groups(0) - 1)))
-        {
-            int i;
-            for(i = 0; i < x_rest - 4; i += 4)
-            {
-                p = convert_T4(vload4(0, row + i));
-                x = convert_T4(vload4(0, codxy + i));
-                xp = x * p;
-                xxp = xp * x;
-
-                x0 += p;
-                x1 += xp;
-                x2 += xxp;
-                x3 += convert_T4(xxp * x);
-            }
-
-            x0.s0 = x0.s0 + x0.s1 + x0.s2 + x0.s3;
-            x1.s0 = x1.s0 + x1.s1 + x1.s2 + x1.s3;
-            x2.s0 = x2.s0 + x2.s1 + x2.s2 + x2.s3;
-            x3.s0 = x3.s0 + x3.s1 + x3.s2 + x3.s3;
-
-            WT x0_ = 0;
-            WT x1_ = 0;
-            WT x2_ = 0;
-            WT x3_ = 0;
-
-            for(; i < x_rest; i++)
-            {
-                WT p_ = 0;
-                p_ = row[i];
-                WT x_ = convert_T(codxy[i]);
-
-
-                WT xp_ = x_ * p_;
-                WT xxp_ = xp_ * x_;
-
-                x0_ += p_;
-                x1_ += xp_;
-                x2_ += xxp_;
-                x3_ += xxp_ * x_;
-            }
-
-            x0.s0 += x0_;
-            x1.s0 += x1_;
-            x2.s0 += x2_;
-            x3.s0 += x3_;
-        }else
-        {
-            for(int i = 0; i < 256; i += 4)
-            {
-                p = convert_T4(vload4(0, row + i));
-                x = convert_T4(vload4(0, codxy + i));
-                xp = x * p;
-                xxp = xp * x;
-
-                x0 += p;
-                x1 += xp;
-                x2 += xxp;
-                x3 += convert_T4(xxp * x);
-            }
-
-            x0.s0 = x0.s0 + x0.s1 + x0.s2 + x0.s3;
-            x1.s0 = x1.s0 + x1.s1 + x1.s2 + x1.s3;
-            x2.s0 = x2.s0 + x2.s1 + x2.s2 + x2.s3;
-            x3.s0 = x3.s0 + x3.s1 + x3.s2 + x3.s3;
-        }
-
-        py = ly * x0.s0;
-        sy = ly * ly;
-    }
-    __local WT mom[10][256];
-
-    if((y_rest > 0) && (gidy == ((int)get_num_groups(1) - 1)))
-    {
-        if(ly < y_rest)
-        {
-            mom[9][ly] = py * sy;
-            mom[8][ly] = x1.s0 * sy;
-            mom[7][ly] = x2.s0 * ly;
-            mom[6][ly] = x3.s0;
-            mom[5][ly] = x0.s0 * sy;
-            mom[4][ly] = x1.s0 * ly;
-            mom[3][ly] = x2.s0;
-            mom[2][ly] = py;
-            mom[1][ly] = x1.s0;
-            mom[0][ly] = x0.s0;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(ly < 10)
-            for(int i = 1; i < y_rest; i++)
-                mom[ly][0] = mom[ly][i] + mom[ly][0];
-    }
-    else
-    {
-        mom[9][ly] = py * sy;
-        mom[8][ly] = x1.s0 * sy;
-        mom[7][ly] = x2.s0 * ly;
-        mom[6][ly] = x3.s0;
-        mom[5][ly] = x0.s0 * sy;
-        mom[4][ly] = x1.s0 * ly;
-        mom[3][ly] = x2.s0;
-        mom[2][ly] = py;
-        mom[1][ly] = x1.s0;
-        mom[0][ly] = x0.s0;
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if(ly < 128)
-        {
-            mom[0][ly] = mom[0][ly] + mom[0][ly + 128];
-            mom[1][ly] = mom[1][ly] + mom[1][ly + 128];
-            mom[2][ly] = mom[2][ly] + mom[2][ly + 128];
-            mom[3][ly] = mom[3][ly] + mom[3][ly + 128];
-            mom[4][ly] = mom[4][ly] + mom[4][ly + 128];
-            mom[5][ly] = mom[5][ly] + mom[5][ly + 128];
-            mom[6][ly] = mom[6][ly] + mom[6][ly + 128];
-            mom[7][ly] = mom[7][ly] + mom[7][ly + 128];
-            mom[8][ly] = mom[8][ly] + mom[8][ly + 128];
-            mom[9][ly] = mom[9][ly] + mom[9][ly + 128];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if(ly < 64)
-        {
-            mom[0][ly] = mom[0][ly] + mom[0][ly + 64];
-            mom[1][ly] = mom[1][ly] + mom[1][ly + 64];
-            mom[2][ly] = mom[2][ly] + mom[2][ly + 64];
-            mom[3][ly] = mom[3][ly] + mom[3][ly + 64];
-            mom[4][ly] = mom[4][ly] + mom[4][ly + 64];
-            mom[5][ly] = mom[5][ly] + mom[5][ly + 64];
-            mom[6][ly] = mom[6][ly] + mom[6][ly + 64];
-            mom[7][ly] = mom[7][ly] + mom[7][ly + 64];
-            mom[8][ly] = mom[8][ly] + mom[8][ly + 64];
-            mom[9][ly] = mom[9][ly] + mom[9][ly + 64];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if(ly < 32)
-        {
-            mom[0][ly] = mom[0][ly] + mom[0][ly + 32];
-            mom[1][ly] = mom[1][ly] + mom[1][ly + 32];
-            mom[2][ly] = mom[2][ly] + mom[2][ly + 32];
-            mom[3][ly] = mom[3][ly] + mom[3][ly + 32];
-            mom[4][ly] = mom[4][ly] + mom[4][ly + 32];
-            mom[5][ly] = mom[5][ly] + mom[5][ly + 32];
-            mom[6][ly] = mom[6][ly] + mom[6][ly + 32];
-            mom[7][ly] = mom[7][ly] + mom[7][ly + 32];
-            mom[8][ly] = mom[8][ly] + mom[8][ly + 32];
-            mom[9][ly] = mom[9][ly] + mom[9][ly + 32];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if(ly < 16)
-        {
-            mom[0][ly] = mom[0][ly] + mom[0][ly + 16];
-            mom[1][ly] = mom[1][ly] + mom[1][ly + 16];
-            mom[2][ly] = mom[2][ly] + mom[2][ly + 16];
-            mom[3][ly] = mom[3][ly] + mom[3][ly + 16];
-            mom[4][ly] = mom[4][ly] + mom[4][ly + 16];
-            mom[5][ly] = mom[5][ly] + mom[5][ly + 16];
-            mom[6][ly] = mom[6][ly] + mom[6][ly + 16];
-            mom[7][ly] = mom[7][ly] + mom[7][ly + 16];
-            mom[8][ly] = mom[8][ly] + mom[8][ly + 16];
-            mom[9][ly] = mom[9][ly] + mom[9][ly + 16];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if(ly < 8)
-        {
-            mom[0][ly] = mom[0][ly] + mom[0][ly + 8];
-            mom[1][ly] = mom[1][ly] + mom[1][ly + 8];
-            mom[2][ly] = mom[2][ly] + mom[2][ly + 8];
-            mom[3][ly] = mom[3][ly] + mom[3][ly + 8];
-            mom[4][ly] = mom[4][ly] + mom[4][ly + 8];
-            mom[5][ly] = mom[5][ly] + mom[5][ly + 8];
-            mom[6][ly] = mom[6][ly] + mom[6][ly + 8];
-            mom[7][ly] = mom[7][ly] + mom[7][ly + 8];
-            mom[8][ly] = mom[8][ly] + mom[8][ly + 8];
-            mom[9][ly] = mom[9][ly] + mom[9][ly + 8];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if(ly < 4)
-        {
-            mom[0][ly] = mom[0][ly] + mom[0][ly + 4];
-            mom[1][ly] = mom[1][ly] + mom[1][ly + 4];
-            mom[2][ly] = mom[2][ly] + mom[2][ly + 4];
-            mom[3][ly] = mom[3][ly] + mom[3][ly + 4];
-            mom[4][ly] = mom[4][ly] + mom[4][ly + 4];
-            mom[5][ly] = mom[5][ly] + mom[5][ly + 4];
-            mom[6][ly] = mom[6][ly] + mom[6][ly + 4];
-            mom[7][ly] = mom[7][ly] + mom[7][ly + 4];
-            mom[8][ly] = mom[8][ly] + mom[8][ly + 4];
-            mom[9][ly] = mom[9][ly] + mom[9][ly + 4];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if(ly < 2)
-        {
-            mom[0][ly] = mom[0][ly] + mom[0][ly + 2];
-            mom[1][ly] = mom[1][ly] + mom[1][ly + 2];
-            mom[2][ly] = mom[2][ly] + mom[2][ly + 2];
-            mom[3][ly] = mom[3][ly] + mom[3][ly + 2];
-            mom[4][ly] = mom[4][ly] + mom[4][ly + 2];
-            mom[5][ly] = mom[5][ly] + mom[5][ly + 2];
-            mom[6][ly] = mom[6][ly] + mom[6][ly + 2];
-            mom[7][ly] = mom[7][ly] + mom[7][ly + 2];
-            mom[8][ly] = mom[8][ly] + mom[8][ly + 2];
-            mom[9][ly] = mom[9][ly] + mom[9][ly + 2];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if(ly < 1)
-        {
-            mom[0][ly] = mom[0][ly] + mom[0][ly + 1];
-            mom[1][ly] = mom[1][ly] + mom[1][ly + 1];
-            mom[2][ly] = mom[2][ly] + mom[2][ly + 1];
-            mom[3][ly] = mom[3][ly] + mom[3][ly + 1];
-            mom[4][ly] = mom[4][ly] + mom[4][ly + 1];
-            mom[5][ly] = mom[5][ly] + mom[5][ly + 1];
-            mom[6][ly] = mom[6][ly] + mom[6][ly + 1];
-            mom[7][ly] = mom[7][ly] + mom[7][ly + 1];
-            mom[8][ly] = mom[8][ly] + mom[8][ly + 1];
-            mom[9][ly] = mom[9][ly] + mom[9][ly + 1];
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(binary)
-    {
-        WT s = 1.0f/255;
-        if(ly < 10)
-            mom[ly][0] *= s;
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-    WT xm = (gidx * 256) * mom[0][0];
-    WT ym = (gidy * 256) * mom[0][0];
-
-    if(ly == 0)
-    {
-        mom[0][1] = mom[0][0];
-        mom[1][1] = mom[1][0] + xm;
-        mom[2][1] = mom[2][0] + ym;
-        mom[3][1] = mom[3][0] + gidx * 256 * (mom[1][0] * 2 + xm);
-        mom[4][1] = mom[4][0] + gidx * 256 * (mom[2][0] + ym) + gidy * 256 * mom[1][0];
-        mom[5][1] = mom[5][0] + gidy * 256 * (mom[2][0] * 2 + ym);
-        mom[6][1] = mom[6][0] + gidx * 256 * (3 * mom[3][0] + 256 * gidx * (3 * mom[1][0] + xm));
-        mom[7][1] = mom[7][0] + gidx * 256 * (2 * (mom[4][0] + 256 * gidy * mom[1][0]) + 256 * gidx * (mom[2][0] + ym)) + 256 * gidy * mom[3][0];
-        mom[8][1] = mom[8][0] + gidy * 256 * (2 * (mom[4][0] + 256 * gidx * mom[2][0]) + 256 * gidy * (mom[1][0] + xm)) + 256 * gidx * mom[5][0];
-        mom[9][1] = mom[9][0] + gidy * 256 * (3 * mom[5][0] + 256 * gidy * (3 * mom[2][0] + ym));
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(ly < 10)
-        dst_m[10 * gidy * dst_step + ly * dst_step + gidx] = mom[ly][1];
-}
diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl
deleted file mode 100644
index e931e82..0000000
--- a/modules/ocl/src/opencl/objdetect_hog.cl
+++ /dev/null
@@ -1,726 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Wenju He, wenju@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#define CELL_WIDTH 8
-#define CELL_HEIGHT 8
-#define CELLS_PER_BLOCK_X 2
-#define CELLS_PER_BLOCK_Y 2
-#define NTHREADS 256
-#define CV_PI_F 3.1415926535897932384626433832795f
-
-#ifdef INTEL_DEVICE
-#define QANGLE_TYPE		int
-#define QANGLE_TYPE2	int2
-#else
-#define QANGLE_TYPE		uchar
-#define QANGLE_TYPE2	uchar2
-#endif
-
-//----------------------------------------------------------------------------
-// Histogram computation
-// 12 threads for a cell, 12x4 threads per block
-// Use pre-computed gaussian and interp_weight lookup tables
-__kernel void compute_hists_lut_kernel(
-    const int cblock_stride_x, const int cblock_stride_y,
-    const int cnbins, const int cblock_hist_size, const int img_block_width,
-    const int blocks_in_group, const int blocks_total,
-    const int grad_quadstep, const int qangle_step,
-    __global const float* grad, __global const QANGLE_TYPE* qangle,
-    __global const float* gauss_w_lut,
-    __global float* block_hists, __local float* smem)
-{
-    const int lx = get_local_id(0);
-    const int lp = lx / 24; /* local group id */
-    const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */
-    const int gidY = gid / img_block_width;
-    const int gidX = gid - gidY * img_block_width;
-
-    const int lidX = lx - lp * 24;
-    const int lidY = get_local_id(1);
-
-    const int cell_x = lidX / 12;
-    const int cell_y = lidY;
-    const int cell_thread_x = lidX - cell_x * 12;
-
-    __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
-        CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y);
-    __local float* final_hist = hists + cnbins *
-        (CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12);
-
-    const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x;
-    const int offset_y = gidY * cblock_stride_y + (cell_y << 2);
-
-    __global const float* grad_ptr = (gid < blocks_total) ?
-        grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
-    __global const QANGLE_TYPE* qangle_ptr = (gid < blocks_total) ?
-        qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
-
-    __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
-        cell_thread_x;
-    for (int bin_id = 0; bin_id < cnbins; ++bin_id)
-        hist[bin_id * 48] = 0.f;
-
-    const int dist_x = -4 + cell_thread_x - 4 * cell_x;
-    const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
-
-    const int dist_y_begin = -4 - 4 * lidY;
-    for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
-    {
-        float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
-        QANGLE_TYPE2 bin = (QANGLE_TYPE2) (qangle_ptr[0], qangle_ptr[1]);
-
-        grad_ptr += grad_quadstep;
-        qangle_ptr += qangle_step;
-
-        int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
-
-        int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8);
-        float gaussian = gauss_w_lut[idx];
-        idx = (dist_y + 8) * 16 + (dist_x + 8);
-        float interp_weight = gauss_w_lut[256+idx];
-
-        hist[bin.x * 48] += gaussian * interp_weight * vote.x;
-        hist[bin.y * 48] += gaussian * interp_weight * vote.y;
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    volatile __local float* hist_ = hist;
-    for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
-    {
-        if (cell_thread_x < 6)
-            hist_[0] += hist_[6];
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if (cell_thread_x < 3)
-            hist_[0] += hist_[3];
-#ifdef CPU
-        barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-        if (cell_thread_x == 0)
-            final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
-                hist_[0] + hist_[1] + hist_[2];
-    }
-#ifdef CPU
-    barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-    int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
-    if ((tid < cblock_hist_size) && (gid < blocks_total))
-    {
-        __global float* block_hist = block_hists +
-            (gidY * img_block_width + gidX) * cblock_hist_size;
-        block_hist[tid] = final_hist[tid];
-    }
-}
-
-//-------------------------------------------------------------
-//  Normalization of histograms via L2Hys_norm
-//  optimized for the case of 9 bins
-__kernel void normalize_hists_36_kernel(__global float* block_hists,
-                                        const float threshold, __local float *squares)
-{
-    const int tid = get_local_id(0);
-    const int gid = get_global_id(0);
-    const int bid = tid / 36;      /* block-hist id, (0 - 6) */
-    const int boffset = bid * 36;  /* block-hist offset in the work-group */
-    const int hid = tid - boffset; /* histogram bin id, (0 - 35) */
-
-    float elem = block_hists[gid];
-    squares[tid] = elem * elem;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    __local float* smem = squares + boffset;
-    float sum = smem[hid];
-    if (hid < 18)
-        smem[hid] = sum = sum + smem[hid + 18];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (hid < 9)
-        smem[hid] = sum = sum + smem[hid + 9];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (hid < 4)
-        smem[hid] = sum + smem[hid + 4];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
-
-    elem = elem / (sqrt(sum) + 3.6f);
-    elem = min(elem, threshold);
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    squares[tid] = elem * elem;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    sum = smem[hid];
-    if (hid < 18)
-      smem[hid] = sum = sum + smem[hid + 18];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (hid < 9)
-        smem[hid] = sum = sum + smem[hid + 9];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (hid < 4)
-        smem[hid] = sum + smem[hid + 4];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
-
-    block_hists[gid] = elem / (sqrt(sum) + 1e-3f);
-}
-
-//-------------------------------------------------------------
-//  Normalization of histograms via L2Hys_norm
-//
-inline float reduce_smem(volatile __local float* smem, int size)
-{
-    unsigned int tid = get_local_id(0);
-    float sum = smem[tid];
-
-    if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-    if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-    if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-#ifdef CPU
-    if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-    if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-    if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-    if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-    if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-    if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1];
-        barrier(CLK_LOCAL_MEM_FENCE); }
-#else
-    if (tid < 32)
-    {
-        if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
-#if WAVE_SIZE < 32
-    } barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16) {
-#endif
-        if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
-        if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
-        if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
-        if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
-        if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
-    }
-#endif
-
-    return sum;
-}
-
-__kernel void normalize_hists_kernel(
-    const int nthreads, const int block_hist_size, const int img_block_width,
-    __global float* block_hists, const float threshold, __local float *squares)
-{
-    const int tid = get_local_id(0);
-    const int gidX = get_group_id(0);
-    const int gidY = get_group_id(1);
-
-    __global float* hist = block_hists + (gidY * img_block_width + gidX) *
-        block_hist_size + tid;
-
-    float elem = 0.f;
-    if (tid < block_hist_size)
-        elem = hist[0];
-
-    squares[tid] = elem * elem;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    float sum = reduce_smem(squares, nthreads);
-
-    float scale = 1.0f / (sqrt(sum) + 0.1f * block_hist_size);
-    elem = min(elem * scale, threshold);
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    squares[tid] = elem * elem;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    sum = reduce_smem(squares, nthreads);
-    scale = 1.0f / (sqrt(sum) + 1e-3f);
-
-    if (tid < block_hist_size)
-        hist[0] = elem * scale;
-}
-
-//---------------------------------------------------------------------
-//  Linear SVM based classification
-//  48x96 window, 9 bins and default parameters
-//  180 threads, each thread corresponds to a bin in a row
-__kernel void classify_hists_180_kernel(
-    const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
-    const int img_win_width, const int img_block_width,
-    const int win_block_stride_x, const int win_block_stride_y,
-    __global const float * block_hists, __global const float* coefs,
-    float free_coef, float threshold, __global uchar* labels)
-{
-    const int tid = get_local_id(0);
-    const int gidX = get_group_id(0);
-    const int gidY = get_group_id(1);
-
-    __global const float* hist = block_hists + (gidY * win_block_stride_y *
-        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
-
-    float product = 0.f;
-
-    for (int i = 0; i < cdescr_height; i++)
-    {
-        product += coefs[i * cdescr_width + tid] *
-            hist[i * img_block_width * cblock_hist_size + tid];
-    }
-
-    __local float products[180];
-
-    products[tid] = product;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 90) products[tid] = product = product + products[tid + 90];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 45) products[tid] = product = product + products[tid + 45];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    volatile __local float* smem = products;
-#ifdef CPU
-    if (tid < 13) smem[tid] = product = product + smem[tid + 32];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16) smem[tid] = product = product + smem[tid + 16];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<8) smem[tid] = product = product + smem[tid + 8];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<4) smem[tid] = product = product + smem[tid + 4];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<2) smem[tid] = product = product + smem[tid + 2];
-    barrier(CLK_LOCAL_MEM_FENCE);
-#else
-    if (tid < 13)
-    {
-        smem[tid] = product = product + smem[tid + 32];
-    }
-#if WAVE_SIZE < 32
-    barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-    if (tid < 16)
-    {
-        smem[tid] = product = product + smem[tid + 16];
-        smem[tid] = product = product + smem[tid + 8];
-        smem[tid] = product = product + smem[tid + 4];
-        smem[tid] = product = product + smem[tid + 2];
-    }
-#endif
-
-    if (tid == 0){
-        product = product + smem[tid + 1];
-        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
-    }
-}
-
-//---------------------------------------------------------------------
-//  Linear SVM based classification
-//  64x128 window, 9 bins and default parameters
-//  256 threads, 252 of them are used
-__kernel void classify_hists_252_kernel(
-    const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
-    const int img_win_width, const int img_block_width,
-    const int win_block_stride_x, const int win_block_stride_y,
-    __global const float * block_hists, __global const float* coefs,
-    float free_coef, float threshold, __global uchar* labels)
-{
-    const int tid = get_local_id(0);
-    const int gidX = get_group_id(0);
-    const int gidY = get_group_id(1);
-
-    __global const float* hist = block_hists + (gidY * win_block_stride_y *
-        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
-
-    float product = 0.f;
-    if (tid < cdescr_width)
-    {
-        for (int i = 0; i < cdescr_height; i++)
-            product += coefs[i * cdescr_width + tid] *
-                hist[i * img_block_width * cblock_hist_size + tid];
-    }
-
-    __local float products[NTHREADS];
-
-    products[tid] = product;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 128) products[tid] = product = product + products[tid + 128];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 64) products[tid] = product = product + products[tid + 64];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    volatile __local float* smem = products;
-#ifdef CPU
-    if(tid<32) smem[tid] = product = product + smem[tid + 32];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<16) smem[tid] = product = product + smem[tid + 16];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<8) smem[tid] = product = product + smem[tid + 8];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<4) smem[tid] = product = product + smem[tid + 4];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<2) smem[tid] = product = product + smem[tid + 2];
-    barrier(CLK_LOCAL_MEM_FENCE);
-#else
-    if (tid < 32)
-    {
-        smem[tid] = product = product + smem[tid + 32];
-#if WAVE_SIZE < 32
-    } barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16) {
-#endif
-        smem[tid] = product = product + smem[tid + 16];
-        smem[tid] = product = product + smem[tid + 8];
-        smem[tid] = product = product + smem[tid + 4];
-        smem[tid] = product = product + smem[tid + 2];
-    }
-#endif
-    if (tid == 0){
-        product = product + smem[tid + 1];
-        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
-    }
-}
-
-//---------------------------------------------------------------------
-//  Linear SVM based classification
-//  256 threads
-__kernel void classify_hists_kernel(
-    const int cdescr_size, const int cdescr_width, const int cblock_hist_size,
-    const int img_win_width, const int img_block_width,
-    const int win_block_stride_x, const int win_block_stride_y,
-    __global const float * block_hists, __global const float* coefs,
-    float free_coef, float threshold, __global uchar* labels)
-{
-    const int tid = get_local_id(0);
-    const int gidX = get_group_id(0);
-    const int gidY = get_group_id(1);
-
-    __global const float* hist = block_hists + (gidY * win_block_stride_y *
-        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
-
-    float product = 0.f;
-    for (int i = tid; i < cdescr_size; i += NTHREADS)
-    {
-        int offset_y = i / cdescr_width;
-        int offset_x = i - offset_y * cdescr_width;
-        product += coefs[i] *
-            hist[offset_y * img_block_width * cblock_hist_size + offset_x];
-    }
-
-    __local float products[NTHREADS];
-
-    products[tid] = product;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 128) products[tid] = product = product + products[tid + 128];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 64) products[tid] = product = product + products[tid + 64];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    volatile __local float* smem = products;
-#ifdef CPU
-    if(tid<32) smem[tid] = product = product + smem[tid + 32];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<16) smem[tid] = product = product + smem[tid + 16];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<8) smem[tid] = product = product + smem[tid + 8];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<4) smem[tid] = product = product + smem[tid + 4];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(tid<2) smem[tid] = product = product + smem[tid + 2];
-    barrier(CLK_LOCAL_MEM_FENCE);
-#else
-    if (tid < 32)
-    {
-        smem[tid] = product = product + smem[tid + 32];
-#if WAVE_SIZE < 32
-    } barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16) {
-#endif
-        smem[tid] = product = product + smem[tid + 16];
-        smem[tid] = product = product + smem[tid + 8];
-        smem[tid] = product = product + smem[tid + 4];
-        smem[tid] = product = product + smem[tid + 2];
-    }
-#endif
-    if (tid == 0){
-        smem[tid] = product = product + smem[tid + 1];
-        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
-    }
-}
-
-//----------------------------------------------------------------------------
-// Extract descriptors
-
-__kernel void extract_descrs_by_rows_kernel(
-    const int cblock_hist_size, const int descriptors_quadstep,
-    const int cdescr_size, const int cdescr_width, const int img_block_width,
-    const int win_block_stride_x, const int win_block_stride_y,
-    __global const float* block_hists, __global float* descriptors)
-{
-    int tid = get_local_id(0);
-    int gidX = get_group_id(0);
-    int gidY = get_group_id(1);
-
-    // Get left top corner of the window in src
-    __global const float* hist = block_hists + (gidY * win_block_stride_y *
-        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
-
-    // Get left top corner of the window in dst
-    __global float* descriptor = descriptors +
-        (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
-
-    // Copy elements from src to dst
-    for (int i = tid; i < cdescr_size; i += NTHREADS)
-    {
-        int offset_y = i / cdescr_width;
-        int offset_x = i - offset_y * cdescr_width;
-        descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x];
-    }
-}
-
-__kernel void extract_descrs_by_cols_kernel(
-    const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
-    const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width,
-    const int win_block_stride_x, const int win_block_stride_y,
-    __global const float* block_hists, __global float* descriptors)
-{
-    int tid = get_local_id(0);
-    int gidX = get_group_id(0);
-    int gidY = get_group_id(1);
-
-    // Get left top corner of the window in src
-    __global const float* hist = block_hists +  (gidY * win_block_stride_y *
-        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
-
-    // Get left top corner of the window in dst
-    __global float* descriptor = descriptors +
-        (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
-
-    // Copy elements from src to dst
-    for (int i = tid; i < cdescr_size; i += NTHREADS)
-    {
-        int block_idx = i / cblock_hist_size;
-        int idx_in_block = i - block_idx * cblock_hist_size;
-
-        int y = block_idx / cnblocks_win_x;
-        int x = block_idx - y * cnblocks_win_x;
-
-        descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] =
-            hist[(y * img_block_width  + x) * cblock_hist_size + idx_in_block];
-    }
-}
-
-//----------------------------------------------------------------------------
-// Gradients computation
-
-__kernel void compute_gradients_8UC4_kernel(
-    const int height, const int width,
-    const int img_step, const int grad_quadstep, const int qangle_step,
-    const __global uchar4 * img, __global float * grad, __global QANGLE_TYPE * qangle,
-    const float angle_scale, const char correct_gamma, const int cnbins)
-{
-    const int x = get_global_id(0);
-    const int tid = get_local_id(0);
-    const int gSizeX = get_local_size(0);
-    const int gidY = get_group_id(1);
-
-    __global const uchar4* row = img + gidY * img_step;
-
-    __local float sh_row[(NTHREADS + 2) * 3];
-
-    uchar4 val;
-    if (x < width)
-        val = row[x];
-    else
-        val = row[width - 2];
-
-    sh_row[tid + 1] = val.x;
-    sh_row[tid + 1 + (NTHREADS + 2)] = val.y;
-    sh_row[tid + 1 + 2 * (NTHREADS + 2)] = val.z;
-
-    if (tid == 0)
-    {
-        val = row[max(x - 1, 1)];
-        sh_row[0] = val.x;
-        sh_row[(NTHREADS + 2)] = val.y;
-        sh_row[2 * (NTHREADS + 2)] = val.z;
-    }
-
-    if (tid == gSizeX - 1)
-    {
-        val = row[min(x + 1, width - 2)];
-        sh_row[gSizeX + 1] = val.x;
-        sh_row[gSizeX + 1 + (NTHREADS + 2)] = val.y;
-        sh_row[gSizeX + 1 + 2 * (NTHREADS + 2)] = val.z;
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (x < width)
-    {
-        float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)],
-            sh_row[tid + 2 * (NTHREADS + 2)]);
-        float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)],
-            sh_row[tid + 2 + 2 * (NTHREADS + 2)]);
-
-        float3 dx;
-        if (correct_gamma == 1)
-            dx = sqrt(b) - sqrt(a);
-        else
-            dx = b - a;
-
-        float3 dy = (float3) 0.f;
-
-        if (gidY > 0 && gidY < height - 1)
-        {
-            a = convert_float3(img[(gidY - 1) * img_step + x].xyz);
-            b = convert_float3(img[(gidY + 1) * img_step + x].xyz);
-
-            if (correct_gamma == 1)
-                dy = sqrt(b) - sqrt(a);
-            else
-                dy = b - a;
-        }
-
-        float best_dx = dx.x;
-        float best_dy = dy.x;
-
-        float mag0 = dx.x * dx.x + dy.x * dy.x;
-        float mag1 = dx.y * dx.y + dy.y * dy.y;
-        if (mag0 < mag1)
-        {
-            best_dx = dx.y;
-            best_dy = dy.y;
-            mag0 = mag1;
-        }
-
-        mag1 = dx.z * dx.z + dy.z * dy.z;
-        if (mag0 < mag1)
-        {
-            best_dx = dx.z;
-            best_dy = dy.z;
-            mag0 = mag1;
-        }
-
-        mag0 = sqrt(mag0);
-
-        float ang = (atan2(best_dy, best_dx) + CV_PI_F) * angle_scale - 0.5f;
-        int hidx = (int)floor(ang);
-        ang -= hidx;
-        hidx = (hidx + cnbins) % cnbins;
-
-        qangle[(gidY * qangle_step + x) << 1] = hidx;
-        qangle[((gidY * qangle_step + x) << 1) + 1] = (hidx + 1) % cnbins;
-        grad[(gidY * grad_quadstep + x) << 1] = mag0 * (1.f - ang);
-        grad[((gidY * grad_quadstep + x) << 1) + 1] = mag0 * ang;
-    }
-}
-
-__kernel void compute_gradients_8UC1_kernel(
-    const int height, const int width,
-    const int img_step, const int grad_quadstep, const int qangle_step,
-    __global const uchar * img, __global float * grad, __global QANGLE_TYPE * qangle,
-    const float angle_scale, const char correct_gamma, const int cnbins)
-{
-    const int x = get_global_id(0);
-    const int tid = get_local_id(0);
-    const int gSizeX = get_local_size(0);
-    const int gidY = get_group_id(1);
-
-    __global const uchar* row = img + gidY * img_step;
-
-    __local float sh_row[NTHREADS + 2];
-
-    if (x < width)
-        sh_row[tid + 1] = row[x];
-    else
-        sh_row[tid + 1] = row[width - 2];
-
-    if (tid == 0)
-        sh_row[0] = row[max(x - 1, 1)];
-
-    if (tid == gSizeX - 1)
-        sh_row[gSizeX + 1] = row[min(x + 1, width - 2)];
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (x < width)
-    {
-        float dx;
-
-        if (correct_gamma == 1)
-            dx = sqrt(sh_row[tid + 2]) - sqrt(sh_row[tid]);
-        else
-            dx = sh_row[tid + 2] - sh_row[tid];
-
-        float dy = 0.f;
-        if (gidY > 0 && gidY < height - 1)
-        {
-            float a = (float) img[ (gidY + 1) * img_step + x ];
-            float b = (float) img[ (gidY - 1) * img_step + x ];
-            if (correct_gamma == 1)
-                dy = sqrt(a) - sqrt(b);
-            else
-                dy = a - b;
-        }
-        float mag = sqrt(dx * dx + dy * dy);
-
-        float ang = (atan2(dy, dx) + CV_PI_F) * angle_scale - 0.5f;
-        int hidx = (int)floor(ang);
-        ang -= hidx;
-        hidx = (hidx + cnbins) % cnbins;
-
-        qangle[ (gidY * qangle_step + x) << 1 ]     = hidx;
-        qangle[ ((gidY * qangle_step + x) << 1) + 1 ] = (hidx + 1) % cnbins;
-        grad[ (gidY * grad_quadstep + x) << 1 ]       = mag * (1.f - ang);
-        grad[ ((gidY * grad_quadstep + x) << 1) + 1 ]   = mag * ang;
-    }
-}
diff --git a/modules/ocl/src/opencl/operator_convertTo.cl b/modules/ocl/src/opencl/operator_convertTo.cl
deleted file mode 100644
index ca38bd5..0000000
--- a/modules/ocl/src/opencl/operator_convertTo.cl
+++ /dev/null
@@ -1,64 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void convert_to(
-        __global const srcT* restrict srcMat,
-        __global dstT* dstMat,
-        int cols1, int rows,
-        int sstep1, int soffset1,
-        int dstep1, int doffset1,
-        float alpha, float beta)
-{
-        int x = get_global_id(0);
-        int y = get_global_id(1);
-
-        int srcidx = mad24(y, sstep1, x + soffset1);
-        int dstidx = mad24(y, dstep1, x + doffset1);
-
-        if ( (x < cols1) && (y < rows) )
-        {
-            float temp_src = convert_float(srcMat[srcidx]);
-            dstMat[dstidx] = convertToDstType(temp_src*alpha+beta);
-        }
-}
diff --git a/modules/ocl/src/opencl/operator_copyToM.cl b/modules/ocl/src/opencl/operator_copyToM.cl
deleted file mode 100644
index 69e1798..0000000
--- a/modules/ocl/src/opencl/operator_copyToM.cl
+++ /dev/null
@@ -1,71 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void copy_to_with_mask(
-        __global const GENTYPE* restrict srcMat,
-        __global GENTYPE* dstMat,
-        __global const uchar* restrict maskMat,
-        int cols,
-        int rows,
-        int srcStep_in_pixel,
-        int srcoffset_in_pixel,
-        int dstStep_in_pixel,
-        int dstoffset_in_pixel,
-        int maskStep,
-        int maskoffset)
-{
-    int x=get_global_id(0);
-    int y=get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int maskidx = mad24(y,maskStep,x+ maskoffset);
-        if ( maskMat[maskidx])
-        {
-            int srcidx = mad24(y,srcStep_in_pixel,x+ srcoffset_in_pixel);
-            int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel);
-            dstMat[dstidx] = srcMat[srcidx];
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/operator_setTo.cl b/modules/ocl/src/opencl/operator_setTo.cl
deleted file mode 100644
index 20c5cf2..0000000
--- a/modules/ocl/src/opencl/operator_setTo.cl
+++ /dev/null
@@ -1,95 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void set_to_without_mask_C1_D0(__global uchar * scalar,__global uchar * dstMat,
-        int cols,int rows,int dstStep_in_pixel,int offset_in_pixel)
-{
-        int x=get_global_id(0)<<2;
-        int y=get_global_id(1);
-        int idx = mad24(y,dstStep_in_pixel,x+ offset_in_pixel);
-        uchar4 out;
-        out.x = out.y = out.z = out.w = scalar[0];
-
-        if ( (x+3 < cols) && (y < rows)&& ((offset_in_pixel&3) == 0))
-        {
-            *(__global uchar4*)(dstMat+idx) = out;
-        }
-        else
-        {
-             if((x+3 < cols) && (y < rows))
-             {
-                dstMat[idx] = out.x;
-                dstMat[idx+1] = out.y;
-                dstMat[idx+2] = out.z;
-                dstMat[idx+3] = out.w;
-             }
-             if((x+2 < cols) && (y < rows))
-             {
-                dstMat[idx] = out.x;
-                dstMat[idx+1] = out.y;
-                dstMat[idx+2] = out.z;
-             }
-             else if((x+1 < cols) && (y < rows))
-             {
-                dstMat[idx] = out.x;
-                dstMat[idx+1] = out.y;
-             }
-             else if((x < cols) && (y < rows))
-             {
-                dstMat[idx] = out.x;
-             }
-        }
-}
-
-__kernel void set_to_without_mask(__global GENTYPE * scalar,__global GENTYPE * dstMat,
-        int cols, int rows, int dstStep_in_pixel, int offset_in_pixel)
-{
-        int x = get_global_id(0);
-        int y = get_global_id(1);
-        if ( (x < cols) & (y < rows))
-        {
-            int idx = mad24(y, dstStep_in_pixel, x + offset_in_pixel);
-            dstMat[idx] = scalar[0];
-        }
-}
diff --git a/modules/ocl/src/opencl/operator_setToM.cl b/modules/ocl/src/opencl/operator_setToM.cl
deleted file mode 100644
index afaa2e6..0000000
--- a/modules/ocl/src/opencl/operator_setToM.cl
+++ /dev/null
@@ -1,68 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-__kernel void set_to_with_mask(
-        __global GENTYPE * scalar,
-        __global GENTYPE * dstMat,
-        int cols,
-        int rows,
-        int dstStep_in_pixel,
-        int dstoffset_in_pixel,
-        __global const uchar * restrict maskMat,
-        int maskStep,
-        int maskoffset)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int maskidx = mad24(y,maskStep,x+ maskoffset);
-        if (maskMat[maskidx])
-        {
-            int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel);
-            dstMat[dstidx] = scalar[0];
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/optical_flow_farneback.cl b/modules/ocl/src/opencl/optical_flow_farneback.cl
deleted file mode 100644
index 4725662..0000000
--- a/modules/ocl/src/opencl/optical_flow_farneback.cl
+++ /dev/null
@@ -1,450 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-#define tx  (int)get_local_id(0)
-#define ty  get_local_id(1)
-#define bx  get_group_id(0)
-#define bdx (int)get_local_size(0)
-
-#define BORDER_SIZE 5
-#define MAX_KSIZE_HALF 100
-
-#ifndef polyN
-#define polyN 5
-#endif
-
-__kernel void polynomialExpansion(__global float * dst,
-                                  __global __const float * src,
-                                  __global __const float * c_g,
-                                  __global __const float * c_xg,
-                                  __global __const float * c_xxg,
-                                  __local float * smem,
-                                  const float4 ig,
-                                  const int height, const int width,
-                                  int dstStep, int srcStep)
-{
-    const int y = get_global_id(1);
-    const int x = bx * (bdx - 2*polyN) + tx - polyN;
-
-    dstStep /= sizeof(*dst);
-    srcStep /= sizeof(*src);
-
-    int xWarped;
-    __local float *row = smem + tx;
-
-    if (y < height && y >= 0)
-    {
-        xWarped = min(max(x, 0), width - 1);
-
-        row[0] = src[mad24(y, srcStep, xWarped)] * c_g[0];
-        row[bdx] = 0.f;
-        row[2*bdx] = 0.f;
-
-#pragma unroll
-        for (int k = 1; k <= polyN; ++k)
-        {
-            float t0 = src[mad24(max(y - k, 0), srcStep, xWarped)];
-            float t1 = src[mad24(min(y + k, height - 1), srcStep, xWarped)];
-
-            row[0] += c_g[k] * (t0 + t1);
-            row[bdx] += c_xg[k] * (t1 - t0);
-            row[2*bdx] += c_xxg[k] * (t0 + t1);
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (y < height && y >= 0 && tx >= polyN && tx + polyN < bdx && x < width)
-    {
-        float b1 = c_g[0] * row[0];
-        float b3 = c_g[0] * row[bdx];
-        float b5 = c_g[0] * row[2*bdx];
-        float b2 = 0, b4 = 0, b6 = 0;
-
-#pragma unroll
-        for (int k = 1; k <= polyN; ++k)
-        {
-            b1 += (row[k] + row[-k]) * c_g[k];
-            b4 += (row[k] + row[-k]) * c_xxg[k];
-            b2 += (row[k] - row[-k]) * c_xg[k];
-            b3 += (row[k + bdx] + row[-k + bdx]) * c_g[k];
-            b6 += (row[k + bdx] - row[-k + bdx]) * c_xg[k];
-            b5 += (row[k + 2*bdx] + row[-k + 2*bdx]) * c_g[k];
-        }
-
-        dst[mad24(y, dstStep, xWarped)] = b3*ig.s0;
-        dst[mad24(height + y, dstStep, xWarped)] = b2*ig.s0;
-        dst[mad24(2*height + y, dstStep, xWarped)] = b1*ig.s1 + b5*ig.s2;
-        dst[mad24(3*height + y, dstStep, xWarped)] = b1*ig.s1 + b4*ig.s2;
-        dst[mad24(4*height + y, dstStep, xWarped)] = b6*ig.s3;
-    }
-}
-
-inline int idx_row_low(const int y, const int last_row)
-{
-    return abs(y) % (last_row + 1);
-}
-
-inline int idx_row_high(const int y, const int last_row)
-{
-    return abs(last_row - abs(last_row - y)) % (last_row + 1);
-}
-
-inline int idx_row(const int y, const int last_row)
-{
-    return idx_row_low(idx_row_high(y, last_row), last_row);
-}
-
-inline int idx_col_low(const int x, const int last_col)
-{
-    return abs(x) % (last_col + 1);
-}
-
-inline int idx_col_high(const int x, const int last_col)
-{
-    return abs(last_col - abs(last_col - x)) % (last_col + 1);
-}
-
-inline int idx_col(const int x, const int last_col)
-{
-    return idx_col_low(idx_col_high(x, last_col), last_col);
-}
-
-__kernel void gaussianBlur(__global float * dst,
-                           __global const float * src,
-                           __global const float * c_gKer,
-                           __local float * smem,
-                           const int height,  const int width,
-                           int dstStep, int srcStep,
-                           const int ksizeHalf)
-{
-    const int y = get_global_id(1);
-    const int x = get_global_id(0);
-
-    dstStep /= sizeof(*dst);
-    srcStep /= sizeof(*src);
-
-    __local float *row = smem + ty * (bdx + 2*ksizeHalf);
-
-    if (y < height)
-    {
-        // Vertical pass
-        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
-        {
-            int xExt = (int)(bx * bdx) + i - ksizeHalf;
-            xExt = idx_col(xExt, width - 1);
-            row[i] = src[mad24(y, srcStep, xExt)] * c_gKer[0];
-            for (int j = 1; j <= ksizeHalf; ++j)
-                row[i] += (src[mad24(idx_row_low(y - j, height - 1), srcStep, xExt)]
-                           + src[mad24(idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j];
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (y < height && y >= 0 && x < width && x >= 0)
-    {
-        // Horizontal pass
-        row += tx + ksizeHalf;
-        float res = row[0] * c_gKer[0];
-        for (int i = 1; i <= ksizeHalf; ++i)
-            res += (row[-i] + row[i]) * c_gKer[i];
-
-        dst[mad24(y, dstStep, x)] = res;
-    }
-}
-
-__constant float c_border[BORDER_SIZE + 1] = { 0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f };
-
-__kernel void updateMatrices(__global float * M,
-                             __global const float * flowx, __global const float * flowy,
-                             __global const float * R0, __global const float * R1,
-                             const int height, const int width,
-                             int mStep, int xStep,  int yStep, int R0Step, int R1Step)
-{
-    const int y = get_global_id(1);
-    const int x = get_global_id(0);
-
-    mStep /= sizeof(*M);
-    xStep /= sizeof(*flowx);
-    yStep /= sizeof(*flowy);
-    R0Step /= sizeof(*R0);
-    R1Step /= sizeof(*R1);
-
-    if (y < height && y >= 0 && x < width && x >= 0)
-    {
-        float dx = flowx[mad24(y, xStep, x)];
-        float dy = flowy[mad24(y, yStep, x)];
-        float fx = x + dx;
-        float fy = y + dy;
-
-        int x1 = convert_int(floor(fx));
-        int y1 = convert_int(floor(fy));
-        fx -= x1;
-        fy -= y1;
-
-        float r2, r3, r4, r5, r6;
-
-        if (x1 >= 0 && y1 >= 0 && x1 < width - 1 && y1 < height - 1)
-        {
-            float a00 = (1.f - fx) * (1.f - fy);
-            float a01 = fx * (1.f - fy);
-            float a10 = (1.f - fx) * fy;
-            float a11 = fx * fy;
-
-            r2 = a00 * R1[mad24(y1, R1Step, x1)] +
-                 a01 * R1[mad24(y1, R1Step, x1 + 1)] +
-                 a10 * R1[mad24(y1 + 1, R1Step, x1)] +
-                 a11 * R1[mad24(y1 + 1, R1Step, x1 + 1)];
-
-            r3 = a00 * R1[mad24(height + y1, R1Step, x1)] +
-                 a01 * R1[mad24(height + y1, R1Step, x1 + 1)] +
-                 a10 * R1[mad24(height + y1 + 1, R1Step, x1)] +
-                 a11 * R1[mad24(height + y1 + 1, R1Step, x1 + 1)];
-
-            r4 = a00 * R1[mad24(2*height + y1, R1Step, x1)] +
-                 a01 * R1[mad24(2*height + y1, R1Step, x1 + 1)] +
-                 a10 * R1[mad24(2*height + y1 + 1, R1Step, x1)] +
-                 a11 * R1[mad24(2*height + y1 + 1, R1Step, x1 + 1)];
-
-            r5 = a00 * R1[mad24(3*height + y1, R1Step, x1)] +
-                 a01 * R1[mad24(3*height + y1, R1Step, x1 + 1)] +
-                 a10 * R1[mad24(3*height + y1 + 1, R1Step, x1)] +
-                 a11 * R1[mad24(3*height + y1 + 1, R1Step, x1 + 1)];
-
-            r6 = a00 * R1[mad24(4*height + y1, R1Step, x1)] +
-                 a01 * R1[mad24(4*height + y1, R1Step, x1 + 1)] +
-                 a10 * R1[mad24(4*height + y1 + 1, R1Step, x1)] +
-                 a11 * R1[mad24(4*height + y1 + 1, R1Step, x1 + 1)];
-
-            r4 = (R0[mad24(2*height + y, R0Step, x)] + r4) * 0.5f;
-            r5 = (R0[mad24(3*height + y, R0Step, x)] + r5) * 0.5f;
-            r6 = (R0[mad24(4*height + y, R0Step, x)] + r6) * 0.25f;
-        }
-        else
-        {
-            r2 = r3 = 0.f;
-            r4 = R0[mad24(2*height + y, R0Step, x)];
-            r5 = R0[mad24(3*height + y, R0Step, x)];
-            r6 = R0[mad24(4*height + y, R0Step, x)] * 0.5f;
-        }
-
-        r2 = (R0[mad24(y, R0Step, x)] - r2) * 0.5f;
-        r3 = (R0[mad24(height + y, R0Step, x)] - r3) * 0.5f;
-
-        r2 += r4*dy + r6*dx;
-        r3 += r6*dy + r5*dx;
-
-        float scale =
-            c_border[min(x, BORDER_SIZE)] *
-            c_border[min(y, BORDER_SIZE)] *
-            c_border[min(width - x - 1, BORDER_SIZE)] *
-            c_border[min(height - y - 1, BORDER_SIZE)];
-
-        r2 *= scale;
-        r3 *= scale;
-        r4 *= scale;
-        r5 *= scale;
-        r6 *= scale;
-
-        M[mad24(y, mStep, x)] = r4*r4 + r6*r6;
-        M[mad24(height + y, mStep, x)] = (r4 + r5)*r6;
-        M[mad24(2*height + y, mStep, x)] = r5*r5 + r6*r6;
-        M[mad24(3*height + y, mStep, x)] = r4*r2 + r6*r3;
-        M[mad24(4*height + y, mStep, x)] = r6*r2 + r5*r3;
-    }
-}
-
-__kernel void boxFilter5(__global float * dst,
-                         __global const float * src,
-                         __local float * smem,
-                         const int height,  const int width,
-                         int dstStep, int srcStep,
-                         const int ksizeHalf)
-{
-    const int y = get_global_id(1);
-    const int x = get_global_id(0);
-
-    const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
-    const int smw = bdx + 2*ksizeHalf; // shared memory "width"
-    __local float *row = smem + 5 * ty * smw;
-
-    dstStep /= sizeof(*dst);
-    srcStep /= sizeof(*src);
-
-    if (y < height)
-    {
-        // Vertical pass
-        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
-        {
-            int xExt = (int)(bx * bdx) + i - ksizeHalf;
-            xExt = min(max(xExt, 0), width - 1);
-
-#pragma unroll
-            for (int k = 0; k < 5; ++k)
-                row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)];
-
-            for (int j = 1; j <= ksizeHalf; ++j)
-#pragma unroll
-                for (int k = 0; k < 5; ++k)
-                    row[k*smw + i] +=
-                        src[mad24(k*height + max(y - j, 0), srcStep, xExt)] +
-                        src[mad24(k*height + min(y + j, height - 1), srcStep, xExt)];
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (y < height && y >= 0 && x < width && x >= 0)
-    {
-        // Horizontal pass
-
-        row += tx + ksizeHalf;
-        float res[5];
-
-#pragma unroll
-        for (int k = 0; k < 5; ++k)
-            res[k] = row[k*smw];
-
-        for (int i = 1; i <= ksizeHalf; ++i)
-#pragma unroll
-            for (int k = 0; k < 5; ++k)
-                res[k] += row[k*smw - i] + row[k*smw + i];
-
-#pragma unroll
-        for (int k = 0; k < 5; ++k)
-            dst[mad24(k*height + y, dstStep, x)] = res[k] * boxAreaInv;
-    }
-}
-
-__kernel void updateFlow(__global float4 * flowx, __global float4 * flowy,
-                         __global const float4 * M,
-                         const int height, const int width,
-                         int xStep, int yStep, int mStep)
-{
-    const int y = get_global_id(1);
-    const int x = get_global_id(0);
-
-    xStep /= sizeof(*flowx);
-    yStep /= sizeof(*flowy);
-    mStep /= sizeof(*M);
-
-    if (y < height && y >= 0 && x < width && x >= 0)
-    {
-        float4 g11 = M[mad24(y, mStep, x)];
-        float4 g12 = M[mad24(height + y, mStep, x)];
-        float4 g22 = M[mad24(2*height + y, mStep, x)];
-        float4 h1 =  M[mad24(3*height + y, mStep, x)];
-        float4 h2 =  M[mad24(4*height + y, mStep, x)];
-
-        float4 detInv = (float4)(1.f) / (g11*g22 - g12*g12 + (float4)(1e-3f));
-
-        flowx[mad24(y, xStep, x)] = (g11*h2 - g12*h1) * detInv;
-        flowy[mad24(y, yStep, x)] = (g22*h1 - g12*h2) * detInv;
-    }
-}
-
-__kernel void gaussianBlur5(__global float * dst,
-                            __global const float * src,
-                            __global const float * c_gKer,
-                            __local float * smem,
-                            const int height,  const int width,
-                            int dstStep, int srcStep,
-                            const int ksizeHalf)
-{
-    const int y = get_global_id(1);
-    const int x = get_global_id(0);
-
-    const int smw = bdx + 2*ksizeHalf; // shared memory "width"
-    __local volatile float *row = smem + 5 * ty * smw;
-
-    dstStep /= sizeof(*dst);
-    srcStep /= sizeof(*src);
-
-    if (y < height)
-    {
-        // Vertical pass
-        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
-        {
-            int xExt = (int)(bx * bdx) + i - ksizeHalf;
-            xExt = idx_col(xExt, width - 1);
-
-#pragma unroll
-            for (int k = 0; k < 5; ++k)
-                row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)] * c_gKer[0];
-
-            for (int j = 1; j <= ksizeHalf; ++j)
-#pragma unroll
-                for (int k = 0; k < 5; ++k)
-                    row[k*smw + i] +=
-                        (src[mad24(k*height + idx_row_low(y - j, height - 1), srcStep, xExt)] +
-                         src[mad24(k*height + idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j];
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (y < height && y >= 0 && x < width && x >= 0)
-    {
-        // Horizontal pass
-
-        row += tx + ksizeHalf;
-        float res[5];
-
-#pragma unroll
-        for (int k = 0; k < 5; ++k)
-            res[k] = row[k*smw] * c_gKer[0];
-
-        for (int i = 1; i <= ksizeHalf; ++i)
-#pragma unroll
-            for (int k = 0; k < 5; ++k)
-                res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];
-
-#pragma unroll
-        for (int k = 0; k < 5; ++k)
-            dst[mad24(k*height + y, dstStep, x)] = res[k];
-    }
-}
diff --git a/modules/ocl/src/opencl/orb.cl b/modules/ocl/src/opencl/orb.cl
deleted file mode 100644
index 3617602..0000000
--- a/modules/ocl/src/opencl/orb.cl
+++ /dev/null
@@ -1,503 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Peter Andreas Entschev, peter@entschev.com
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define CV_PI M_PI
-#else
-#define CV_PI M_PI_F
-#endif
-
-#define X_ROW 0
-#define Y_ROW 1
-#define RESPONSE_ROW 2
-#define ANGLE_ROW 3
-#define OCTAVE_ROW 4
-#define SIZE_ROW 5
-#define ROWS_COUNT 6
-
-
-#ifdef CPU
-void reduce_32(volatile __local int* smem, volatile int* val, int tid)
-{
-#define op(A, B) (*A)+(B)
-
-    smem[tid] = *val;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for(int i = 16; i > 0; i >>= 1)
-    {
-        if(tid < i)
-        {
-            smem[tid] = *val = op(val, smem[tid + i]);
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-#undef op
-}
-#else
-void reduce_32(volatile __local int* smem, volatile int* val, int tid)
-{
-#define op(A, B) (*A)+(B)
-
-    smem[tid] = *val;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-#ifndef WAVE_SIZE
-#define WAVE_SIZE 1
-#endif
-    if (tid < 16)
-    {
-        smem[tid] = *val = op(val, smem[tid + 16]);
-#if WAVE_SIZE < 16
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8)
-    {
-#endif
-        smem[tid] = *val = op(val, smem[tid + 8]);
-#if WAVE_SIZE < 8
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 4)
-    {
-#endif
-        smem[tid] = *val = op(val, smem[tid + 4]);
-#if WAVE_SIZE < 4
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 2)
-    {
-#endif
-        smem[tid] = *val = op(val, smem[tid + 2]);
-#if WAVE_SIZE < 2
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 1)
-    {
-#endif
-        smem[tid] = *val = op(val, smem[tid + 1]);
-    }
-#undef WAVE_SIZE
-#undef op
-}
-#endif
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-// HarrisResponses
-
-__kernel
-void HarrisResponses(__global const uchar* img,
-                     __global float* keypoints,
-                     const int npoints,
-                     const int blockSize,
-                     const float harris_k,
-                     const int img_step,
-                     const int keypoints_step)
-{
-    __local int smem0[8 * 32];
-    __local int smem1[8 * 32];
-    __local int smem2[8 * 32];
-
-    const int ptidx = mad24(get_group_id(0), get_local_size(1), get_local_id(1));
-
-    if (ptidx < npoints)
-    {
-        const int pt_x = keypoints[mad24(keypoints_step, X_ROW, ptidx)];
-        const int pt_y = keypoints[mad24(keypoints_step, Y_ROW, ptidx)];
-
-        const int r = blockSize / 2;
-        const int x0 = pt_x - r;
-        const int y0 = pt_y - r;
-
-        int a = 0, b = 0, c = 0;
-
-        for (int ind = get_local_id(0); ind < blockSize * blockSize; ind += get_local_size(0))
-        {
-            const int i = ind / blockSize;
-            const int j = ind % blockSize;
-
-            int center = mad24(y0+i, img_step, x0+j);
-
-            int Ix = (img[center+1] - img[center-1]) * 2 +
-                     (img[center-img_step+1] - img[center-img_step-1]) +
-                     (img[center+img_step+1] - img[center+img_step-1]);
-
-            int Iy = (img[center+img_step] - img[center-img_step]) * 2 +
-                     (img[center+img_step-1] - img[center-img_step-1]) +
-                     (img[center+img_step+1] - img[center-img_step+1]);
-
-            a += Ix * Ix;
-            b += Iy * Iy;
-            c += Ix * Iy;
-        }
-
-        __local int* srow0 = smem0 + get_local_id(1) * get_local_size(0);
-        __local int* srow1 = smem1 + get_local_id(1) * get_local_size(0);
-        __local int* srow2 = smem2 + get_local_id(1) * get_local_size(0);
-
-        reduce_32(srow0, &a, get_local_id(0));
-        reduce_32(srow1, &b, get_local_id(0));
-        reduce_32(srow2, &c, get_local_id(0));
-
-        if (get_local_id(0) == 0)
-        {
-            float scale = (1 << 2) * blockSize * 255.0f;
-            scale = 1.0f / scale;
-            const float scale_sq_sq = scale * scale * scale * scale;
-
-            float response = ((float)a * b - (float)c * c - harris_k * ((float)a + b) * ((float)a + b)) * scale_sq_sq;
-            keypoints[mad24(keypoints_step, RESPONSE_ROW, ptidx)] = response;
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-// IC_Angle
-
-__kernel
-void IC_Angle(__global const uchar* img,
-              __global float* keypoints_,
-              __global const int* u_max,
-              const int npoints,
-              const int half_k,
-              const int img_step,
-              const int keypoints_step)
-{
-    __local int smem0[8 * 32];
-    __local int smem1[8 * 32];
-
-    __local int* srow0 = smem0 + get_local_id(1) * get_local_size(0);
-    __local int* srow1 = smem1 + get_local_id(1) * get_local_size(0);
-
-    const int ptidx = mad24(get_group_id(0), get_local_size(1), get_local_id(1));
-
-    if (ptidx < npoints)
-    {
-        int m_01 = 0, m_10 = 0;
-
-        const int pt_x = keypoints_[mad24(keypoints_step, X_ROW, ptidx)];
-        const int pt_y = keypoints_[mad24(keypoints_step, Y_ROW, ptidx)];
-
-        // Treat the center line differently, v=0
-        for (int u = get_local_id(0) - half_k; u <= half_k; u += get_local_size(0))
-            m_10 += u * img[mad24(pt_y, img_step, pt_x+u)];
-
-        reduce_32(srow0, &m_10, get_local_id(0));
-
-        for (int v = 1; v <= half_k; ++v)
-        {
-            // Proceed over the two lines
-            int v_sum = 0;
-            int m_sum = 0;
-            const int d = u_max[v];
-
-            for (int u = get_local_id(0) - d; u <= d; u += get_local_size(0))
-            {
-                int val_plus = img[mad24(pt_y+v, img_step, pt_x+u)];
-                int val_minus = img[mad24(pt_y-v, img_step, pt_x+u)];
-
-                v_sum += (val_plus - val_minus);
-                m_sum += u * (val_plus + val_minus);
-            }
-
-            reduce_32(srow0, &v_sum, get_local_id(0));
-            reduce_32(srow1, &m_sum, get_local_id(0));
-
-            m_10 += m_sum;
-            m_01 += v * v_sum;
-        }
-
-        if (get_local_id(0) == 0)
-        {
-            float kp_dir = atan2((float)m_01, (float)m_10);
-            kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
-            kp_dir *= 180.0f / CV_PI;
-
-            keypoints_[mad24(keypoints_step, ANGLE_ROW, ptidx)] = kp_dir;
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-// computeOrbDescriptor
-
-#define GET_VALUE(idx) \
-    img[mad24(loc.y + (int)round(pattern[idx] * sina + pattern[pattern_step+idx] * cosa), img_step, \
-         loc.x + (int)round(pattern[idx] * cosa - pattern[pattern_step+idx] * sina))]
-
-int calcOrbDescriptor_2(__global const uchar* img,
-                        __global const int* pattern,
-                        const int2 loc,
-                        const float sina,
-                        const float cosa,
-                        const int i,
-                        const int img_step,
-                        const int pattern_step)
-{
-    pattern += 16 * i;
-
-    int t0, t1, val;
-
-    t0 = GET_VALUE(0); t1 = GET_VALUE(1);
-    val = t0 < t1;
-
-    t0 = GET_VALUE(2); t1 = GET_VALUE(3);
-    val |= (t0 < t1) << 1;
-
-    t0 = GET_VALUE(4); t1 = GET_VALUE(5);
-    val |= (t0 < t1) << 2;
-
-    t0 = GET_VALUE(6); t1 = GET_VALUE(7);
-    val |= (t0 < t1) << 3;
-
-    t0 = GET_VALUE(8); t1 = GET_VALUE(9);
-    val |= (t0 < t1) << 4;
-
-    t0 = GET_VALUE(10); t1 = GET_VALUE(11);
-    val |= (t0 < t1) << 5;
-
-    t0 = GET_VALUE(12); t1 = GET_VALUE(13);
-    val |= (t0 < t1) << 6;
-
-    t0 = GET_VALUE(14); t1 = GET_VALUE(15);
-    val |= (t0 < t1) << 7;
-
-    return val;
-}
-
-int calcOrbDescriptor_3(__global const uchar* img,
-                        __global const int* pattern,
-                        const int2 loc,
-                        const float sina,
-                        const float cosa,
-                        const int i,
-                        const int img_step,
-                        const int pattern_step)
-{
-    pattern += 12 * i;
-
-    int t0, t1, t2, val;
-
-    t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
-    val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
-
-    t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
-    val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
-
-    t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
-    val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
-
-    t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
-    val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
-
-    return val;
-}
-
-int calcOrbDescriptor_4(__global const uchar* img,
-                        __global const int* pattern,
-                        const int2 loc,
-                        const float sina,
-                        const float cosa,
-                        const int i,
-                        const int img_step,
-                        const int pattern_step)
-{
-    pattern += 16 * i;
-
-    int t0, t1, t2, t3, k, val;
-    int a, b;
-
-    t0 = GET_VALUE(0); t1 = GET_VALUE(1);
-    t2 = GET_VALUE(2); t3 = GET_VALUE(3);
-    a = 0, b = 2;
-    if( t1 > t0 ) t0 = t1, a = 1;
-    if( t3 > t2 ) t2 = t3, b = 3;
-    k = t0 > t2 ? a : b;
-    val = k;
-
-    t0 = GET_VALUE(4); t1 = GET_VALUE(5);
-    t2 = GET_VALUE(6); t3 = GET_VALUE(7);
-    a = 0, b = 2;
-    if( t1 > t0 ) t0 = t1, a = 1;
-    if( t3 > t2 ) t2 = t3, b = 3;
-    k = t0 > t2 ? a : b;
-    val |= k << 2;
-
-    t0 = GET_VALUE(8); t1 = GET_VALUE(9);
-    t2 = GET_VALUE(10); t3 = GET_VALUE(11);
-    a = 0, b = 2;
-    if( t1 > t0 ) t0 = t1, a = 1;
-    if( t3 > t2 ) t2 = t3, b = 3;
-    k = t0 > t2 ? a : b;
-    val |= k << 4;
-
-    t0 = GET_VALUE(12); t1 = GET_VALUE(13);
-    t2 = GET_VALUE(14); t3 = GET_VALUE(15);
-    a = 0, b = 2;
-    if( t1 > t0 ) t0 = t1, a = 1;
-    if( t3 > t2 ) t2 = t3, b = 3;
-    k = t0 > t2 ? a : b;
-    val |= k << 6;
-
-    return val;
-}
-
-#undef GET_VALUE
-
-__kernel
-void computeOrbDescriptor(__global const uchar* img,
-                          __global const float* keypoints,
-                          __global const int* pattern,
-                          __global uchar* desc,
-                          const int npoints,
-                          const int dsize,
-                          const int WTA_K,
-                          const int offset,
-                          const int img_step,
-                          const int keypoints_step,
-                          const int pattern_step,
-                          const int desc_step)
-{
-    const int descidx = mad24(get_group_id(0), get_local_size(0), get_local_id(0));
-    const int ptidx = mad24(get_group_id(1), get_local_size(1), get_local_id(1));
-
-    if (ptidx < npoints && descidx < dsize)
-    {
-        int2 loc = {(int)keypoints[mad24(keypoints_step, X_ROW, ptidx)],
-                    (int)keypoints[mad24(keypoints_step, Y_ROW, ptidx)]};
-
-        float angle = keypoints[mad24(keypoints_step, ANGLE_ROW, ptidx)];
-        angle *= (float)(CV_PI / 180.f);
-
-        float sina = sin(angle);
-        float cosa = cos(angle);
-
-        if (WTA_K == 2)
-            desc[mad24(ptidx+offset, desc_step, descidx)] = calcOrbDescriptor_2(img, pattern, loc, sina, cosa, descidx, img_step, pattern_step);
-        else if (WTA_K == 3)
-            desc[mad24(ptidx+offset, desc_step, descidx)] = calcOrbDescriptor_3(img, pattern, loc, sina, cosa, descidx, img_step, pattern_step);
-        else if (WTA_K == 4)
-            desc[mad24(ptidx+offset, desc_step, descidx)] = calcOrbDescriptor_4(img, pattern, loc, sina, cosa, descidx, img_step, pattern_step);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-// mergeLocation
-
-__kernel
-void mergeLocation(__global const float* keypoints_in,
-                   __global float* keypoints_out,
-                   const int npoints,
-                   const int offset,
-                   const float scale,
-                   const int octave,
-                   const float size,
-                   const int keypoints_in_step,
-                   const int keypoints_out_step)
-{
-    //const int ptidx = blockIdx.x * blockDim.x + threadIdx.x;
-    const int ptidx = mad24(get_group_id(0), get_local_size(0), get_local_id(0));
-
-    if (ptidx < npoints)
-    {
-        float pt_x = keypoints_in[mad24(keypoints_in_step, X_ROW, ptidx)] * scale;
-        float pt_y = keypoints_in[mad24(keypoints_in_step, Y_ROW, ptidx)] * scale;
-        float response = keypoints_in[mad24(keypoints_in_step, RESPONSE_ROW, ptidx)];
-        float angle = keypoints_in[mad24(keypoints_in_step, ANGLE_ROW, ptidx)];
-
-        keypoints_out[mad24(keypoints_out_step, X_ROW, ptidx+offset)] = pt_x;
-        keypoints_out[mad24(keypoints_out_step, Y_ROW, ptidx+offset)] = pt_y;
-        keypoints_out[mad24(keypoints_out_step, RESPONSE_ROW, ptidx+offset)] = response;
-        keypoints_out[mad24(keypoints_out_step, ANGLE_ROW, ptidx+offset)] = angle;
-        keypoints_out[mad24(keypoints_out_step, OCTAVE_ROW, ptidx+offset)] = (float)octave;
-        keypoints_out[mad24(keypoints_out_step, SIZE_ROW, ptidx+offset)] = size;
-    }
-}
-
-__kernel
-void convertRowsToChannels(__global const float* keypoints_in,
-                           __global float* keypoints_out,
-                           const int npoints,
-                           const int keypoints_in_step,
-                           const int keypoints_out_step)
-{
-    const int ptidx = mad24(get_group_id(0), get_local_size(0), get_local_id(0));
-
-    if (ptidx < npoints)
-    {
-        const int pt_x = keypoints_in[mad24(keypoints_in_step, X_ROW, ptidx)];
-        const int pt_y = keypoints_in[mad24(keypoints_in_step, Y_ROW, ptidx)];
-
-        keypoints_out[ptidx*2] = pt_x;
-        keypoints_out[ptidx*2+1] = pt_y;
-    }
-}
-
-__kernel
-void convertChannelsToRows(__global const float* keypoints_pos,
-                           __global const float* keypoints_resp,
-                           __global float* keypoints_out,
-                           const int npoints,
-                           const int keypoints_pos_step,
-                           const int keypoints_resp_step,
-                           const int keypoints_out_step)
-{
-    const int ptidx = mad24(get_group_id(0), get_local_size(0), get_local_id(0));
-
-    if (ptidx < npoints)
-    {
-        const float pt_x = keypoints_pos[ptidx*2];
-        const float pt_y = keypoints_pos[ptidx*2+1];
-        const float resp = keypoints_resp[ptidx];
-
-        keypoints_out[mad24(keypoints_out_step, X_ROW, ptidx)] = pt_x;
-        keypoints_out[mad24(keypoints_out_step, Y_ROW, ptidx)] = pt_y;
-        keypoints_out[mad24(keypoints_out_step, RESPONSE_ROW, ptidx)] = resp;
-    }
-}
diff --git a/modules/ocl/src/opencl/pyr_down.cl b/modules/ocl/src/opencl/pyr_down.cl
deleted file mode 100644
index 6f10067..0000000
--- a/modules/ocl/src/opencl/pyr_down.cl
+++ /dev/null
@@ -1,1010 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Dachuan Zhao, dachuan@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-inline int idx_row_low(int y, int last_row)
-{
-    return abs(y) % (last_row + 1);
-}
-
-inline int idx_row_high(int y, int last_row)
-{
-    return abs(last_row - (int)abs(last_row - y)) % (last_row + 1);
-}
-
-inline int idx_row(int y, int last_row)
-{
-    return idx_row_low(idx_row_high(y, last_row), last_row);
-}
-
-inline int idx_col_low(int x, int last_col)
-{
-    return abs(x) % (last_col + 1);
-}
-
-inline int idx_col_high(int x, int last_col)
-{
-    return abs(last_col - (int)abs(last_col - x)) % (last_col + 1);
-}
-
-inline int idx_col(int x, int last_col)
-{
-    return idx_col_low(idx_col_high(x, last_col), last_col);
-}
-
-///////////////////////////////////////////////////////////////////////
-//////////////////////////  CV_8UC1 ///////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstCols)
-{
-    const int x = get_global_id(0);
-    const int y = get_group_id(1);
-
-    __local float smem[256 + 4];
-
-    float sum;
-
-    const int src_y = 2*y;
-    const int last_row = srcRows - 1;
-    const int last_col = srcCols - 1;
-
-    if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
-    {
-        sum =       0.0625f * (((srcData + (src_y - 2) * srcStep))[x]);
-        sum = sum + 0.25f   * (((srcData + (src_y - 1) * srcStep))[x]);
-        sum = sum + 0.375f  * (((srcData + (src_y    ) * srcStep))[x]);
-        sum = sum + 0.25f   * (((srcData + (src_y + 1) * srcStep))[x]);
-        sum = sum + 0.0625f * (((srcData + (src_y + 2) * srcStep))[x]);
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            sum =       0.0625f * (((srcData + (src_y - 2) * srcStep))[left_x]);
-            sum = sum + 0.25f   * (((srcData + (src_y - 1) * srcStep))[left_x]);
-            sum = sum + 0.375f  * (((srcData + (src_y    ) * srcStep))[left_x]);
-            sum = sum + 0.25f   * (((srcData + (src_y + 1) * srcStep))[left_x]);
-            sum = sum + 0.0625f * (((srcData + (src_y + 2) * srcStep))[left_x]);
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            sum =       0.0625f * (((srcData + (src_y - 2) * srcStep))[right_x]);
-            sum = sum + 0.25f   * (((srcData + (src_y - 1) * srcStep))[right_x]);
-            sum = sum + 0.375f  * (((srcData + (src_y    ) * srcStep))[right_x]);
-            sum = sum + 0.25f   * (((srcData + (src_y + 1) * srcStep))[right_x]);
-            sum = sum + 0.0625f * (((srcData + (src_y + 2) * srcStep))[right_x]);
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-    else
-    {
-        int col = idx_col(x, last_col);
-
-        sum =       0.0625f * (((srcData + idx_row(src_y - 2, last_row) * srcStep))[col]);
-        sum = sum + 0.25f   * (((srcData + idx_row(src_y - 1, last_row) * srcStep))[col]);
-        sum = sum + 0.375f  * (((srcData + idx_row(src_y    , last_row) * srcStep))[col]);
-        sum = sum + 0.25f   * (((srcData + idx_row(src_y + 1, last_row) * srcStep))[col]);
-        sum = sum + 0.0625f * (((srcData + idx_row(src_y + 2, last_row) * srcStep))[col]);
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            col = idx_col(left_x, last_col);
-
-            sum =       0.0625f * (((srcData + idx_row(src_y - 2, last_row) * srcStep))[col]);
-            sum = sum + 0.25f   * (((srcData + idx_row(src_y - 1, last_row) * srcStep))[col]);
-            sum = sum + 0.375f  * (((srcData + idx_row(src_y    , last_row) * srcStep))[col]);
-            sum = sum + 0.25f   * (((srcData + idx_row(src_y + 1, last_row) * srcStep))[col]);
-            sum = sum + 0.0625f * (((srcData + idx_row(src_y + 2, last_row) * srcStep))[col]);
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
-
-            sum =       0.0625f * (((srcData + idx_row(src_y - 2, last_row) * srcStep))[col]);
-            sum = sum + 0.25f   * (((srcData + idx_row(src_y - 1, last_row) * srcStep))[col]);
-            sum = sum + 0.375f  * (((srcData + idx_row(src_y    , last_row) * srcStep))[col]);
-            sum = sum + 0.25f   * (((srcData + idx_row(src_y + 1, last_row) * srcStep))[col]);
-            sum = sum + 0.0625f * (((srcData + idx_row(src_y + 2, last_row) * srcStep))[col]);
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (get_local_id(0) < 128)
-    {
-        const int tid2 = get_local_id(0) * 2;
-
-        sum =       0.0625f * smem[2 + tid2 - 2];
-        sum = sum + 0.25f   * smem[2 + tid2 - 1];
-        sum = sum + 0.375f  * smem[2 + tid2    ];
-        sum = sum + 0.25f   * smem[2 + tid2 + 1];
-        sum = sum + 0.0625f * smem[2 + tid2 + 2];
-
-        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
-
-        if (dst_x < dstCols)
-            dst[y * dstStep + dst_x] = convert_uchar_sat_rte(sum);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////
-//////////////////////////  CV_8UC4 ///////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstCols)
-{
-    const int x = get_global_id(0);
-    const int y = get_group_id(1);
-
-    __local float4 smem[256 + 4];
-
-    float4 sum;
-
-    const int src_y = 2*y;
-    const int last_row = srcRows - 1;
-    const int last_col = srcCols - 1;
-
-    float4 co1 = 0.375f;
-    float4 co2 = 0.25f;
-    float4 co3 = 0.0625f;
-
-    if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
-    {
-        sum =       co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[x]));
-        sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[x]));
-        sum = sum + co1 * convert_float4((((srcData + (src_y    ) * srcStep / 4))[x]));
-        sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[x]));
-        sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[x]));
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            sum =       co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[left_x]));
-            sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[left_x]));
-            sum = sum + co1 * convert_float4((((srcData + (src_y    ) * srcStep / 4))[left_x]));
-            sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[left_x]));
-            sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[left_x]));
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            sum =       co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[right_x]));
-            sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[right_x]));
-            sum = sum + co1 * convert_float4((((srcData + (src_y    ) * srcStep / 4))[right_x]));
-            sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[right_x]));
-            sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[right_x]));
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-    else
-    {
-        int col = idx_col(x, last_col);
-
-        sum =       co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]));
-        sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
-        sum = sum + co1 * convert_float4((((srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]));
-        sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
-        sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]));
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            col = idx_col(left_x, last_col);
-
-            sum =       co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]));
-            sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
-            sum = sum + co1 * convert_float4((((srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]));
-            sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
-            sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]));
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
-
-            sum =       co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]));
-            sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
-            sum = sum + co1 * convert_float4((((srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]));
-            sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
-            sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]));
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (get_local_id(0) < 128)
-    {
-        const int tid2 = get_local_id(0) * 2;
-
-        sum =       co3 * smem[2 + tid2 - 2];
-        sum = sum + co2 * smem[2 + tid2 - 1];
-        sum = sum + co1 * smem[2 + tid2    ];
-        sum = sum + co2 * smem[2 + tid2 + 1];
-        sum = sum + co3 * smem[2 + tid2 + 2];
-
-        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
-
-        if (dst_x < dstCols)
-            dst[y * dstStep / 4 + dst_x] = convert_uchar4_sat_rte(sum);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////
-//////////////////////////  CV_16UC1 //////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrDown_C1_D2(__global ushort * srcData, int srcStep, int srcRows, int srcCols, __global ushort *dst, int dstStep, int dstCols)
-{
-    const int x = get_global_id(0);
-    const int y = get_group_id(1);
-
-    __local float smem[256 + 4];
-
-    float sum;
-
-    const int src_y = 2*y;
-    const int last_row = srcRows - 1;
-    const int last_col = srcCols - 1;
-
-    if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
-    {
-        sum =       0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[x];
-        sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[x];
-        sum = sum + 0.375f  * ((__global ushort*)((__global char*)srcData + (src_y    ) * srcStep))[x];
-        sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[x];
-        sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[x];
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            sum =       0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x];
-            sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x];
-            sum = sum + 0.375f  * ((__global ushort*)((__global char*)srcData + (src_y    ) * srcStep))[left_x];
-            sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x];
-            sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x];
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            sum =       0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x];
-            sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x];
-            sum = sum + 0.375f  * ((__global ushort*)((__global char*)srcData + (src_y    ) * srcStep))[right_x];
-            sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x];
-            sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x];
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-    else
-    {
-        int col = idx_col(x, last_col);
-
-        sum =       0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-        sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-        sum = sum + 0.375f  * ((__global ushort*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-        sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-        sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            col = idx_col(left_x, last_col);
-
-            sum =       0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-            sum = sum + 0.375f  * ((__global ushort*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-            sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
-
-            sum =       0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-            sum = sum + 0.375f  * ((__global ushort*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-            sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (get_local_id(0) < 128)
-    {
-        const int tid2 = get_local_id(0) * 2;
-
-        sum =       0.0625f * smem[2 + tid2 - 2];
-        sum = sum + 0.25f   * smem[2 + tid2 - 1];
-        sum = sum + 0.375f  * smem[2 + tid2    ];
-        sum = sum + 0.25f   * smem[2 + tid2 + 1];
-        sum = sum + 0.0625f * smem[2 + tid2 + 2];
-
-        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
-
-        if (dst_x < dstCols)
-            dst[y * dstStep / 2 + dst_x] = convert_ushort_sat_rte(sum);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////
-//////////////////////////  CV_16UC4 //////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrDown_C4_D2(__global ushort4 * srcData, int srcStep, int srcRows, int srcCols, __global ushort4 *dst, int dstStep, int dstCols)
-{
-    const int x = get_global_id(0);
-    const int y = get_group_id(1);
-
-    __local float4 smem[256 + 4];
-
-    float4 sum;
-
-    const int src_y = 2*y;
-    const int last_row = srcRows - 1;
-    const int last_col = srcCols - 1;
-
-    float4 co1 = 0.375f;
-    float4 co2 = 0.25f;
-    float4 co3 = 0.0625f;
-
-    if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
-    {
-        sum =       co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]);
-        sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]);
-        sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[x]);
-        sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]);
-        sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]);
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            sum =       co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]);
-            sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]);
-            sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[left_x]);
-            sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]);
-            sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]);
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            sum =       co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]);
-            sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]);
-            sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[right_x]);
-            sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]);
-            sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]);
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-    else
-    {
-        int col = idx_col(x, last_col);
-
-        sum =       co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
-        sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
-        sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]);
-        sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
-        sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            col = idx_col(left_x, last_col);
-
-            sum =       co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
-            sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
-            sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]);
-            sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
-            sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
-
-            sum =       co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
-            sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
-            sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]);
-            sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
-            sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (get_local_id(0) < 128)
-    {
-        const int tid2 = get_local_id(0) * 2;
-
-        sum =       co3 * smem[2 + tid2 - 2];
-        sum = sum + co2 * smem[2 + tid2 - 1];
-        sum = sum + co1 * smem[2 + tid2    ];
-        sum = sum + co2 * smem[2 + tid2 + 1];
-        sum = sum + co3 * smem[2 + tid2 + 2];
-
-        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
-
-        if (dst_x < dstCols)
-            dst[y * dstStep / 8 + dst_x] = convert_ushort4_sat_rte(sum);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////
-//////////////////////////  CV_16SC1 //////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrDown_C1_D3(__global short * srcData, int srcStep, int srcRows, int srcCols, __global short *dst, int dstStep, int dstCols)
-{
-    const int x = get_global_id(0);
-    const int y = get_group_id(1);
-
-    __local float smem[256 + 4];
-
-    float sum;
-
-    const int src_y = 2*y;
-    const int last_row = srcRows - 1;
-    const int last_col = srcCols - 1;
-
-    if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
-    {
-        sum =       0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[x];
-        sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[x];
-        sum = sum + 0.375f  * ((__global short*)((__global char*)srcData + (src_y    ) * srcStep))[x];
-        sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[x];
-        sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[x];
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            sum =       0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x];
-            sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x];
-            sum = sum + 0.375f  * ((__global short*)((__global char*)srcData + (src_y    ) * srcStep))[left_x];
-            sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x];
-            sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x];
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            sum =       0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x];
-            sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x];
-            sum = sum + 0.375f  * ((__global short*)((__global char*)srcData + (src_y    ) * srcStep))[right_x];
-            sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x];
-            sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x];
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-    else
-    {
-        int col = idx_col(x, last_col);
-
-        sum =       0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-        sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-        sum = sum + 0.375f  * ((__global short*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-        sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-        sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            col = idx_col(left_x, last_col);
-
-            sum =       0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-            sum = sum + 0.375f  * ((__global short*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-            sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
-
-            sum =       0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-            sum = sum + 0.375f  * ((__global short*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-            sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (get_local_id(0) < 128)
-    {
-        const int tid2 = get_local_id(0) * 2;
-
-        sum =       0.0625f * smem[2 + tid2 - 2];
-        sum = sum + 0.25f   * smem[2 + tid2 - 1];
-        sum = sum + 0.375f  * smem[2 + tid2    ];
-        sum = sum + 0.25f   * smem[2 + tid2 + 1];
-        sum = sum + 0.0625f * smem[2 + tid2 + 2];
-
-        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
-
-        if (dst_x < dstCols)
-            dst[y * dstStep / 2 + dst_x] = convert_short_sat_rte(sum);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////
-//////////////////////////  CV_16SC4 //////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrDown_C4_D3(__global short4 * srcData, int srcStep, int srcRows, int srcCols, __global short4 *dst, int dstStep, int dstCols)
-{
-    const int x = get_global_id(0);
-    const int y = get_group_id(1);
-
-    __local float4 smem[256 + 4];
-
-    float4 sum;
-
-    const int src_y = 2*y;
-    const int last_row = srcRows - 1;
-    const int last_col = srcCols - 1;
-
-    float4 co1 = 0.375f;
-    float4 co2 = 0.25f;
-    float4 co3 = 0.0625f;
-
-    if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
-    {
-        sum =       co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]);
-        sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]);
-        sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[x]);
-        sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]);
-        sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]);
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            sum =       co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]);
-            sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]);
-            sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[left_x]);
-            sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]);
-            sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]);
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            sum =       co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]);
-            sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]);
-            sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[right_x]);
-            sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]);
-            sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]);
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-    else
-    {
-        int col = idx_col(x, last_col);
-
-        sum =       co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
-        sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
-        sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]);
-        sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
-        sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            col = idx_col(left_x, last_col);
-
-            sum =       co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
-            sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
-            sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]);
-            sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
-            sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
-
-            sum =       co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
-            sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
-            sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col]);
-            sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
-            sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (get_local_id(0) < 128)
-    {
-        const int tid2 = get_local_id(0) * 2;
-
-        sum =       co3 * smem[2 + tid2 - 2];
-        sum = sum + co2 * smem[2 + tid2 - 1];
-        sum = sum + co1 * smem[2 + tid2    ];
-        sum = sum + co2 * smem[2 + tid2 + 1];
-        sum = sum + co3 * smem[2 + tid2 + 2];
-
-        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
-
-        if (dst_x < dstCols)
-            dst[y * dstStep / 8 + dst_x] = convert_short4_sat_rte(sum);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////
-//////////////////////////  CV_32FC1 //////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcRows, int srcCols, __global float *dst, int dstStep, int dstCols)
-{
-    const int x = get_global_id(0);
-    const int y = get_group_id(1);
-
-    __local float smem[256 + 4];
-
-    float sum;
-
-    const int src_y = 2*y;
-    const int last_row = srcRows - 1;
-    const int last_col = srcCols - 1;
-
-    if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
-    {
-        sum =       0.0625f * ((__global float*)((__global char*)srcData + (src_y - 2) * srcStep))[x];
-        sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + (src_y - 1) * srcStep))[x];
-        sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + (src_y    ) * srcStep))[x];
-        sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + (src_y + 1) * srcStep))[x];
-        sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + (src_y + 2) * srcStep))[x];
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            sum =       0.0625f * ((__global float*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x];
-            sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x];
-            sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + (src_y    ) * srcStep))[left_x];
-            sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x];
-            sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x];
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            sum =       0.0625f * ((__global float*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x];
-            sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x];
-            sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + (src_y    ) * srcStep))[right_x];
-            sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x];
-            sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x];
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-    else
-    {
-        int col = idx_col(x, last_col);
-
-        sum =       0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-        sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-        sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-        sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-        sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            col = idx_col(left_x, last_col);
-
-            sum =       0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-            sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-            sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
-
-            sum =       0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
-            sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[col];
-            sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
-            sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (get_local_id(0) < 128)
-    {
-        const int tid2 = get_local_id(0) * 2;
-
-        sum =       0.0625f * smem[2 + tid2 - 2];
-        sum = sum + 0.25f   * smem[2 + tid2 - 1];
-        sum = sum + 0.375f  * smem[2 + tid2    ];
-        sum = sum + 0.25f   * smem[2 + tid2 + 1];
-        sum = sum + 0.0625f * smem[2 + tid2 + 2];
-
-        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
-
-        if (dst_x < dstCols)
-            dst[y * dstStep / 4 + dst_x] = sum;
-    }
-}
-
-///////////////////////////////////////////////////////////////////////
-//////////////////////////  CV_32FC4 //////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstCols)
-{
-    const int x = get_global_id(0);
-    const int y = get_group_id(1);
-
-    __local float4 smem[256 + 4];
-
-    float4 sum;
-
-    const int src_y = 2*y;
-    const int last_row = srcRows - 1;
-    const int last_col = srcCols - 1;
-
-    float4 co1 = 0.375f;
-    float4 co2 = 0.25f;
-    float4 co3 = 0.0625f;
-
-    if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
-    {
-        sum =       co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x];
-        sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x];
-        sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[x];
-        sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x];
-        sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x];
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            sum =       co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x];
-            sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x];
-            sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[left_x];
-            sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x];
-            sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x];
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            sum =       co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x];
-            sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x];
-            sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y    ) * srcStep / 4))[right_x];
-            sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x];
-            sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x];
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-    else
-    {
-        int col = idx_col(x, last_col);
-
-        sum =       co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col];
-        sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
-        sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col];
-        sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
-        sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col];
-
-        smem[2 + get_local_id(0)] = sum;
-
-        if (get_local_id(0) < 2)
-        {
-            const int left_x = x - 2;
-
-            col = idx_col(left_x, last_col);
-
-            sum =       co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col];
-            sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
-            sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col];
-            sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
-            sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col];
-
-            smem[get_local_id(0)] = sum;
-        }
-
-        if (get_local_id(0) > 253)
-        {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
-
-            sum =       co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col];
-            sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
-            sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[col];
-            sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
-            sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col];
-
-            smem[4 + get_local_id(0)] = sum;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (get_local_id(0) < 128)
-    {
-        const int tid2 = get_local_id(0) * 2;
-
-        sum =       co3 * smem[2 + tid2 - 2];
-        sum = sum + co2 * smem[2 + tid2 - 1];
-        sum = sum + co1 * smem[2 + tid2    ];
-        sum = sum + co2 * smem[2 + tid2 + 1];
-        sum = sum + co3 * smem[2 + tid2 + 2];
-
-        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
-
-        if (dst_x < dstCols)
-            dst[y * dstStep / 16 + dst_x] = sum;
-    }
-}
diff --git a/modules/ocl/src/opencl/pyr_up.cl b/modules/ocl/src/opencl/pyr_up.cl
deleted file mode 100644
index 157fee8..0000000
--- a/modules/ocl/src/opencl/pyr_up.cl
+++ /dev/null
@@ -1,146 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Zhang Chunpeng	chunpeng@multicorewareinc.com
-//    Dachuan Zhao, dachuan@multicorewareinc.com
-//    Yao Wang, yao@multicorewareinc.com
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-///////////////////////////////////////////////////////////////////////
-////////////////////////  Generic PyrUp  //////////////////////////////
-///////////////////////////////////////////////////////////////////////
-
-__kernel void pyrUp(__global Type* src, __global Type* dst,
-                          int srcRows, int dstRows, int srcCols, int dstCols,
-                          int srcOffset, int dstOffset, int srcStep, int dstStep)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    const int lsizex = get_local_size(0);
-    const int lsizey = get_local_size(1);
-
-    const int tidx = get_local_id(0);
-    const int tidy = get_local_id(1);
-
-    __local floatType s_srcPatch[10][10];
-    __local floatType s_dstPatch[20][16];
-
-    if( tidx < 10 && tidy < 10 )
-    {
-        int srcx = mad24((int)get_group_id(0), lsizex>>1, tidx) - 1;
-        int srcy = mad24((int)get_group_id(1), lsizey>>1, tidy) - 1;
-
-        srcx = abs(srcx);
-        srcx = min(srcCols - 1,srcx);
-
-        srcy = abs(srcy);
-        srcy = min(srcRows -1 ,srcy);
-
-        s_srcPatch[tidy][tidx] = convertToFloat(src[srcx + srcy * srcStep]);
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    floatType sum = (floatType)0;
-    const floatType evenFlag = (floatType)((tidx & 1) == 0);
-    const floatType oddFlag = (floatType)((tidx & 1) != 0);
-    const bool  eveny = ((tidy & 1) == 0);
-
-    const floatType co1 = (floatType)0.375f;
-    const floatType co2 = (floatType)0.25f;
-    const floatType co3 = (floatType)0.0625f;
-
-    if(eveny)
-    {
-        sum =       ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 2) >> 1)];
-        sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 1) >> 1)];
-        sum = sum + ( evenFlag* co1 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx    ) >> 1)];
-        sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 1) >> 1)];
-        sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 2) >> 1)];
-    }
-
-    s_dstPatch[2 + tidy][tidx] = sum;
-
-    if (tidy < 2)
-    {
-        sum = 0;
-
-        if (eveny)
-        {
-            sum =       (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
-            sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
-            sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx    ) >> 1)];
-            sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
-            sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
-        }
-
-        s_dstPatch[tidy][tidx] = sum;
-    }
-
-    if (tidy > 13)
-    {
-        sum = 0;
-
-        if (eveny)
-        {
-            sum =       (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx - 2) >> 1)];
-            sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx - 1) >> 1)];
-            sum = sum + (evenFlag * co1) * s_srcPatch[lsizey-7][1 + ((tidx    ) >> 1)];
-            sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx + 1) >> 1)];
-            sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx + 2) >> 1)];
-        }
-        s_dstPatch[4 + tidy][tidx] = sum;
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    sum =       co3 * s_dstPatch[2 + tidy - 2][tidx];
-    sum = sum + co2 * s_dstPatch[2 + tidy - 1][tidx];
-    sum = sum + co1 * s_dstPatch[2 + tidy    ][tidx];
-    sum = sum + co2 * s_dstPatch[2 + tidy + 1][tidx];
-    sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
-
-    if ((x < dstCols) && (y < dstRows))
-        dst[x + y * dstStep] = convertToType(4.0f * sum);
-}
diff --git a/modules/ocl/src/opencl/pyrlk.cl b/modules/ocl/src/opencl/pyrlk.cl
deleted file mode 100644
index f34aee9..0000000
--- a/modules/ocl/src/opencl/pyrlk.cl
+++ /dev/null
@@ -1,1019 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Dachuan Zhao, dachuan@multicorewareinc.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-//    Xiaopeng Fu, fuxiaopeng2222@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#define	BUFFER	64
-#define	BUFFER2	BUFFER>>1
-#ifndef WAVE_SIZE
-#define WAVE_SIZE 1
-#endif
-#ifdef CPU
-
-inline void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local float* smem2,  __local float* smem3, int tid)
-{
-    smem1[tid] = val1;
-    smem2[tid] = val2;
-    smem3[tid] = val3;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for(int i = BUFFER2; i > 0; i >>= 1)
-    {
-        if(tid < i)
-        {
-            smem1[tid] += smem1[tid + i];
-            smem2[tid] += smem2[tid + i];
-            smem3[tid] += smem3[tid + i];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-
-inline void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
-{
-    smem1[tid] = val1;
-    smem2[tid] = val2;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for(int i = BUFFER2; i > 0; i >>= 1)
-    {
-        if(tid < i)
-        {
-            smem1[tid] += smem1[tid + i];
-            smem2[tid] += smem2[tid + i];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-
-inline void reduce1(float val1, volatile __local float* smem1, int tid)
-{
-    smem1[tid] = val1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for(int i = BUFFER2; i > 0; i >>= 1)
-    {
-        if(tid < i)
-        {
-            smem1[tid] += smem1[tid + i];
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-}
-#else
-inline void reduce3(float val1, float val2, float val3,
-             __local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
-{
-    smem1[tid] = val1;
-    smem2[tid] = val2;
-    smem3[tid] = val3;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 32)
-    {
-        smem1[tid] += smem1[tid + 32];
-        smem2[tid] += smem2[tid + 32];
-        smem3[tid] += smem3[tid + 32];
-#if WAVE_SIZE < 32
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16)
-    {
-#endif
-        smem1[tid] += smem1[tid + 16];
-        smem2[tid] += smem2[tid + 16];
-        smem3[tid] += smem3[tid + 16];
-#if WAVE_SIZE <16
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8)
-    {
-#endif
-        smem1[tid] += smem1[tid + 8];
-        smem2[tid] += smem2[tid + 8];
-        smem3[tid] += smem3[tid + 8];
-
-        smem1[tid] += smem1[tid + 4];
-        smem2[tid] += smem2[tid + 4];
-        smem3[tid] += smem3[tid + 4];
-
-        smem1[tid] += smem1[tid + 2];
-        smem2[tid] += smem2[tid + 2];
-        smem3[tid] += smem3[tid + 2];
-
-        smem1[tid] += smem1[tid + 1];
-        smem2[tid] += smem2[tid + 1];
-        smem3[tid] += smem3[tid + 1];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-}
-
-inline void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
-{
-    smem1[tid] = val1;
-    smem2[tid] = val2;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 32)
-    {
-        smem1[tid] += smem1[tid + 32];
-        smem2[tid] += smem2[tid + 32];
-#if WAVE_SIZE < 32
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16)
-    {
-#endif
-        smem1[tid] += smem1[tid + 16];
-        smem2[tid] += smem2[tid + 16];
-#if WAVE_SIZE <16
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8)
-    {
-#endif
-        smem1[tid] += smem1[tid + 8];
-        smem2[tid] += smem2[tid + 8];
-
-        smem1[tid] += smem1[tid + 4];
-        smem2[tid] += smem2[tid + 4];
-
-        smem1[tid] += smem1[tid + 2];
-        smem2[tid] += smem2[tid + 2];
-
-        smem1[tid] += smem1[tid + 1];
-        smem2[tid] += smem2[tid + 1];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-}
-
-inline void reduce1(float val1, __local volatile float* smem1, int tid)
-{
-    smem1[tid] = val1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 32)
-    {
-        smem1[tid] += smem1[tid + 32];
-#if WAVE_SIZE < 32
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16)
-    {
-#endif
-        smem1[tid] += smem1[tid + 16];
-#if WAVE_SIZE <16
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8)
-    {
-#endif
-        smem1[tid] += smem1[tid + 8];
-        smem1[tid] += smem1[tid + 4];
-        smem1[tid] += smem1[tid + 2];
-        smem1[tid] += smem1[tid + 1];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-}
-#endif
-
-#define SCALE (1.0f / (1 << 20))
-#define	THRESHOLD	0.01f
-
-// Image read mode
-__constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
-
-inline void SetPatch(image2d_t I, float x, float y,
-              float* Pch, float* Dx, float* Dy,
-              float* A11, float* A12, float* A22)
-{
-    *Pch = read_imagef(I, sampler, (float2)(x, y)).x;
-
-    float dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
-                 (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x);
-
-    float dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
-                 (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x);
-
-
-    *Dx = dIdx;
-    *Dy = dIdy;
-
-    *A11 += dIdx * dIdx;
-    *A12 += dIdx * dIdy;
-    *A22 += dIdy * dIdy;
-}
-
-inline void GetPatch(image2d_t J, float x, float y,
-              float* Pch, float* Dx, float* Dy,
-              float* b1, float* b2)
-{
-    float J_val = read_imagef(J, sampler, (float2)(x, y)).x;
-    float diff = (J_val - *Pch) * 32.0f;
-    *b1 += diff**Dx;
-    *b2 += diff**Dy;
-}
-
-inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval)
-{
-    float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;
-    *errval += fabs(diff);
-}
-
-inline void SetPatch4(image2d_t I, const float x, const float y,
-               float4* Pch, float4* Dx, float4* Dy,
-               float* A11, float* A12, float* A22)
-{
-    *Pch = read_imagef(I, sampler, (float2)(x, y));
-
-    float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
-                  (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)));
-
-    float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
-                  (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)));
-
-
-    *Dx = dIdx;
-    *Dy = dIdy;
-    float4 sqIdx = dIdx * dIdx;
-    *A11 += sqIdx.x + sqIdx.y + sqIdx.z;
-    sqIdx = dIdx * dIdy;
-    *A12 += sqIdx.x + sqIdx.y + sqIdx.z;
-    sqIdx = dIdy * dIdy;
-    *A22 += sqIdx.x + sqIdx.y + sqIdx.z;
-}
-
-inline void GetPatch4(image2d_t J, const float x, const float y,
-               const float4* Pch, const float4* Dx, const float4* Dy,
-               float* b1, float* b2)
-{
-    float4 J_val = read_imagef(J, sampler, (float2)(x, y));
-    float4 diff = (J_val - *Pch) * 32.0f;
-    float4 xdiff = diff* *Dx;
-    *b1 += xdiff.x + xdiff.y + xdiff.z;
-    xdiff = diff* *Dy;
-    *b2 += xdiff.x + xdiff.y + xdiff.z;
-}
-
-inline void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
-{
-    float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch;
-    *errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
-}
-
-#define	GRIDSIZE	3
-__kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
-                             __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
-                             const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
-{
-    __local float smem1[BUFFER];
-    __local float smem2[BUFFER];
-    __local float smem3[BUFFER];
-
-    unsigned int xid=get_local_id(0);
-    unsigned int yid=get_local_id(1);
-    unsigned int gid=get_group_id(0);
-    unsigned int xsize=get_local_size(0);
-    unsigned int ysize=get_local_size(1);
-    int xBase, yBase, k;
-
-    float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
-
-    const int tid = mad24(yid, xsize, xid);
-
-    float2 prevPt = prevPts[gid] / (float2)(1 << level);
-
-    if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
-    {
-        if (tid == 0 && level == 0)
-        {
-            status[gid] = 0;
-        }
-
-        return;
-    }
-    prevPt -= c_halfWin;
-
-    // extract the patch from the first image, compute covariation matrix of derivatives
-
-    float A11 = 0;
-    float A12 = 0;
-    float A22 = 0;
-
-    float I_patch[GRIDSIZE][GRIDSIZE];
-    float dIdx_patch[GRIDSIZE][GRIDSIZE];
-    float dIdy_patch[GRIDSIZE][GRIDSIZE];
-
-    yBase=yid;
-    {
-        xBase=xid;
-        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                 &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
-                 &A11, &A12, &A22);
-
-
-        xBase+=xsize;
-        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                 &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
-                 &A11, &A12, &A22);
-
-        xBase+=xsize;
-        if(xBase<c_winSize_x)
-            SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
-                     &A11, &A12, &A22);
-    }
-    yBase+=ysize;
-    {
-        xBase=xid;
-        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                 &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
-                 &A11, &A12, &A22);
-
-
-        xBase+=xsize;
-        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                 &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
-                 &A11, &A12, &A22);
-
-        xBase+=xsize;
-        if(xBase<c_winSize_x)
-            SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
-                     &A11, &A12, &A22);
-    }
-    yBase+=ysize;
-    if(yBase<c_winSize_y)
-    {
-        xBase=xid;
-        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                 &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
-                 &A11, &A12, &A22);
-
-
-        xBase+=xsize;
-        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                 &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
-                 &A11, &A12, &A22);
-
-        xBase+=xsize;
-        if(xBase<c_winSize_x)
-            SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
-                     &A11, &A12, &A22);
-    }
-
-    reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
-
-    A11 = smem1[0];
-    A12 = smem2[0];
-    A22 = smem3[0];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    float D = A11 * A22 - A12 * A12;
-
-    if (D < 1.192092896e-07f)
-    {
-        if (tid == 0 && level == 0)
-            status[gid] = 0;
-
-        return;
-    }
-
-    A11 /= D;
-    A12 /= D;
-    A22 /= D;
-
-    prevPt = nextPts[gid] * 2.0f - c_halfWin;
-
-    for (k = 0; k < c_iters; ++k)
-    {
-        if (prevPt.x < -c_halfWin.x || prevPt.x >= cols || prevPt.y < -c_halfWin.y || prevPt.y >= rows)
-        {
-            if (tid == 0 && level == 0)
-                status[gid] = 0;
-            return;
-        }
-
-        float b1 = 0;
-        float b2 = 0;
-
-        yBase=yid;
-        {
-            xBase=xid;
-            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
-                     &b1, &b2);
-
-
-            xBase+=xsize;
-            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
-                     &b1, &b2);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                         &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
-                         &b1, &b2);
-        }
-        yBase+=ysize;
-        {
-            xBase=xid;
-            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
-                     &b1, &b2);
-
-
-            xBase+=xsize;
-            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
-                     &b1, &b2);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                         &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
-                         &b1, &b2);
-        }
-        yBase+=ysize;
-        if(yBase<c_winSize_y)
-        {
-            xBase=xid;
-            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
-                     &b1, &b2);
-
-
-            xBase+=xsize;
-            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
-                     &b1, &b2);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                         &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
-                         &b1, &b2);
-        }
-
-        reduce2(b1, b2, smem1, smem2, tid);
-
-        b1 = smem1[0];
-        b2 = smem2[0];
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        float2 delta;
-        delta.x = A12 * b2 - A22 * b1;
-        delta.y = A12 * b1 - A11 * b2;
-
-        prevPt += delta;
-
-        if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
-            break;
-    }
-
-    D = 0.0f;
-    if (calcErr)
-    {
-        yBase=yid;
-        {
-            xBase=xid;
-            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[0][0], &D);
-
-
-            xBase+=xsize;
-            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[0][1], &D);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                         &I_patch[0][2], &D);
-        }
-        yBase+=ysize;
-        {
-            xBase=xid;
-            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[1][0], &D);
-
-
-            xBase+=xsize;
-            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[1][1], &D);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                         &I_patch[1][2], &D);
-        }
-        yBase+=ysize;
-        if(yBase<c_winSize_y)
-        {
-            xBase=xid;
-            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[2][0], &D);
-
-
-            xBase+=xsize;
-            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                     &I_patch[2][1], &D);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                         &I_patch[2][2], &D);
-        }
-
-        reduce1(D, smem1, tid);
-    }
-
-    if (tid == 0)
-    {
-        prevPt += c_halfWin;
-
-        nextPts[gid] = prevPt;
-
-        if (calcErr)
-            err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
-    }
-}
-
-
-__kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
-                             __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
-                             const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
-{
-    __local float smem1[BUFFER];
-    __local float smem2[BUFFER];
-    __local float smem3[BUFFER];
-
-    unsigned int xid=get_local_id(0);
-    unsigned int yid=get_local_id(1);
-    unsigned int gid=get_group_id(0);
-    unsigned int xsize=get_local_size(0);
-    unsigned int ysize=get_local_size(1);
-    int xBase, yBase, k;
-
-    float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
-
-    const int tid = mad24(yid, xsize, xid);
-
-    float2 nextPt = prevPts[gid]/(float2)(1<<level);
-
-    if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows)
-    {
-        if (tid == 0 && level == 0)
-        {
-            status[gid] = 0;
-        }
-
-        return;
-    }
-
-    nextPt -= c_halfWin;
-
-    // extract the patch from the first image, compute covariation matrix of derivatives
-
-    float A11 = 0.0f;
-    float A12 = 0.0f;
-    float A22 = 0.0f;
-
-    float4 I_patch[8];
-    float4 dIdx_patch[8];
-    float4 dIdy_patch[8];
-    float4 I_add,Dx_add,Dy_add;
-
-    yBase=yid;
-    {
-        xBase=xid;
-        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                  &I_patch[0], &dIdx_patch[0], &dIdy_patch[0],
-                  &A11, &A12, &A22);
-
-
-        xBase+=xsize;
-        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                  &I_patch[1], &dIdx_patch[1], &dIdy_patch[1],
-                  &A11, &A12, &A22);
-
-        xBase+=xsize;
-        if(xBase<c_winSize_x)
-            SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[2], &dIdx_patch[2], &dIdy_patch[2],
-                      &A11, &A12, &A22);
-
-    }
-    yBase+=ysize;
-    {
-        xBase=xid;
-        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                  &I_patch[3], &dIdx_patch[3], &dIdy_patch[3],
-                  &A11, &A12, &A22);
-
-
-        xBase+=xsize;
-        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                  &I_patch[4], &dIdx_patch[4], &dIdy_patch[4],
-                  &A11, &A12, &A22);
-
-        xBase+=xsize;
-        if(xBase<c_winSize_x)
-            SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[5], &dIdx_patch[5], &dIdy_patch[5],
-                      &A11, &A12, &A22);
-    }
-    yBase+=ysize;
-    if(yBase<c_winSize_y)
-    {
-        xBase=xid;
-        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                  &I_patch[6], &dIdx_patch[6], &dIdy_patch[6],
-                  &A11, &A12, &A22);
-
-
-        xBase+=xsize;
-        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                  &I_patch[7], &dIdx_patch[7], &dIdy_patch[7],
-                  &A11, &A12, &A22);
-
-        xBase+=xsize;
-        if(xBase<c_winSize_x)
-            SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_add, &Dx_add, &Dy_add,
-                      &A11, &A12, &A22);
-    }
-
-    reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
-
-    A11 = smem1[0];
-    A12 = smem2[0];
-    A22 = smem3[0];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    float D = A11 * A22 - A12 * A12;
-
-    if (D < 1.192092896e-07f)
-    {
-        if (tid == 0 && level == 0)
-            status[gid] = 0;
-
-        return;
-    }
-
-    A11 /= D;
-    A12 /= D;
-    A22 /= D;
-
-    nextPt = nextPts[gid] * 2.0f - c_halfWin;
-
-    for (k = 0; k < c_iters; ++k)
-    {
-        if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows)
-        {
-            if (tid == 0 && level == 0)
-                status[gid] = 0;
-            return;
-        }
-
-        float b1 = 0;
-        float b2 = 0;
-
-        yBase=yid;
-        {
-            xBase=xid;
-            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[0], &dIdx_patch[0], &dIdy_patch[0],
-                      &b1, &b2);
-
-
-            xBase+=xsize;
-            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[1], &dIdx_patch[1], &dIdy_patch[1],
-                      &b1, &b2);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                          &I_patch[2], &dIdx_patch[2], &dIdy_patch[2],
-                          &b1, &b2);
-        }
-        yBase+=ysize;
-        {
-            xBase=xid;
-            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[3], &dIdx_patch[3], &dIdy_patch[3],
-                      &b1, &b2);
-
-
-            xBase+=xsize;
-            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[4], &dIdx_patch[4], &dIdy_patch[4],
-                      &b1, &b2);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                          &I_patch[5], &dIdx_patch[5], &dIdy_patch[5],
-                          &b1, &b2);
-        }
-        yBase+=ysize;
-        if(yBase<c_winSize_y)
-        {
-            xBase=xid;
-            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[6], &dIdx_patch[6], &dIdy_patch[6],
-                      &b1, &b2);
-
-
-            xBase+=xsize;
-            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[7], &dIdx_patch[7], &dIdy_patch[7],
-                      &b1, &b2);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                          &I_add, &Dx_add, &Dy_add,
-                          &b1, &b2);
-        }
-
-        reduce2(b1, b2, smem1, smem2, tid);
-
-        b1 = smem1[0];
-        b2 = smem2[0];
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        float2 delta;
-        delta.x = A12 * b2 - A22 * b1;
-        delta.y = A12 * b1 - A11 * b2;
-
-        nextPt +=delta;
-
-        if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
-            break;
-    }
-
-    D = 0.0f;
-    if (calcErr)
-    {
-        yBase=yid;
-        {
-            xBase=xid;
-            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[0], &D);
-
-
-            xBase+=xsize;
-            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[1], &D);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                          &I_patch[2], &D);
-        }
-        yBase+=ysize;
-        {
-            xBase=xid;
-            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[3], &D);
-
-
-            xBase+=xsize;
-            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[4], &D);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                          &I_patch[5], &D);
-        }
-        yBase+=ysize;
-        if(yBase<c_winSize_y)
-        {
-            xBase=xid;
-            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[6], &D);
-
-
-            xBase+=xsize;
-            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                      &I_patch[7], &D);
-
-            xBase+=xsize;
-            if(xBase<c_winSize_x)
-                GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                          &I_add, &D);
-        }
-
-        reduce1(D, smem1, tid);
-    }
-
-    if (tid == 0)
-    {
-        nextPt += c_halfWin;
-        nextPts[gid] = nextPt;
-
-        if (calcErr)
-            err[gid] = smem1[0] / (float)(3 * c_winSize_x * c_winSize_y);
-    }
-}
-
-__kernel void lkDense_C1_D0(image2d_t I, image2d_t J, __global float* u, int uStep, __global float* v, int vStep, __global const float* prevU, int prevUStep, __global const float* prevV, int prevVStep,
-                            const int rows, const int cols, /*__global float* err, int errStep, int cn,*/ int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
-{
-    int c_halfWin_x = (c_winSize_x - 1) / 2;
-    int c_halfWin_y = (c_winSize_y - 1) / 2;
-
-    const int patchWidth  = get_local_size(0) + 2 * c_halfWin_x;
-    const int patchHeight = get_local_size(1) + 2 * c_halfWin_y;
-
-    __local int smem[8192];
-
-    __local int* I_patch = smem;
-    __local int* dIdx_patch = I_patch + patchWidth * patchHeight;
-    __local int* dIdy_patch = dIdx_patch + patchWidth * patchHeight;
-
-    const int xBase = get_group_id(0) * get_local_size(0);
-    const int yBase = get_group_id(1) * get_local_size(1);
-
-    sampler_t sampleri    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
-
-    for (int i = get_local_id(1); i < patchHeight; i += get_local_size(1))
-    {
-        for (int j = get_local_id(0); j < patchWidth; j += get_local_size(0))
-        {
-            float x = xBase - c_halfWin_x + j + 0.5f;
-            float y = yBase - c_halfWin_y + i + 0.5f;
-
-            I_patch[i * patchWidth + j] = read_imagei(I, sampleri, (float2)(x, y)).x;
-
-            // Sharr Deriv
-
-            dIdx_patch[i * patchWidth + j] = 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x+1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y+1)).x -
-                                             (3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x-1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x);
-
-            dIdy_patch[i * patchWidth + j] = 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y+1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y+1)).x -
-                                             (3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y-1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x);
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // extract the patch from the first image, compute covariation matrix of derivatives
-
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (x >= cols || y >= rows)
-        return;
-
-    int A11i = 0;
-    int A12i = 0;
-    int A22i = 0;
-
-    for (int i = 0; i < c_winSize_y; ++i)
-    {
-        for (int j = 0; j < c_winSize_x; ++j)
-        {
-            int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
-            int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
-
-            A11i += dIdx * dIdx;
-            A12i += dIdx * dIdy;
-            A22i += dIdy * dIdy;
-        }
-    }
-
-    float A11 = A11i;
-    float A12 = A12i;
-    float A22 = A22i;
-
-    float D = A11 * A22 - A12 * A12;
-
-    //if (calcErr && GET_MIN_EIGENVALS)
-    //    (err + y * errStep)[x] = minEig;
-
-    if (D < 1.192092896e-07f)
-    {
-        //if (calcErr)
-        //    err(y, x) = 3.402823466e+38f;
-
-        return;
-    }
-
-    D = 1.f / D;
-
-    A11 *= D;
-    A12 *= D;
-    A22 *= D;
-
-    float2 nextPt;
-    nextPt.x = x + prevU[y/2 * prevUStep / 4 + x/2] * 2.0f;
-    nextPt.y = y + prevV[y/2 * prevVStep / 4 + x/2] * 2.0f;
-
-    for (int k = 0; k < c_iters; ++k)
-    {
-        if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows)
-        {
-            //if (calcErr)
-            //    err(y, x) = 3.402823466e+38f;
-
-            return;
-        }
-
-        int b1 = 0;
-        int b2 = 0;
-
-        for (int i = 0; i < c_winSize_y; ++i)
-        {
-            for (int j = 0; j < c_winSize_x; ++j)
-            {
-                int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j];
-                int iJ = read_imagei(J, sampler, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f)).x;
-
-                int diff = (iJ - iI) * 32;
-
-                int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
-                int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
-
-                b1 += diff * dIdx;
-                b2 += diff * dIdy;
-            }
-        }
-
-        float2 delta;
-        delta.x = A12 * b2 - A22 * b1;
-        delta.y = A12 * b1 - A11 * b2;
-
-        nextPt.x += delta.x;
-        nextPt.y += delta.y;
-
-        if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f)
-            break;
-    }
-
-    u[y * uStep / 4 + x] = nextPt.x - x;
-    v[y * vStep / 4 + x] = nextPt.y - y;
-
-    if (calcErr)
-    {
-        int errval = 0;
-
-        for (int i = 0; i < c_winSize_y; ++i)
-        {
-            for (int j = 0; j < c_winSize_x; ++j)
-            {
-                int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j];
-                int iJ = read_imagei(J, sampler, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f)).x;
-
-                errval += abs(iJ - iI);
-            }
-        }
-
-        //err[y * errStep / 4 + x] = static_cast<float>(errval) / (c_winSize_x * c_winSize_y);
-    }
-}
diff --git a/modules/ocl/src/opencl/split_mat.cl b/modules/ocl/src/opencl/split_mat.cl
deleted file mode 100644
index b52b3c2..0000000
--- a/modules/ocl/src/opencl/split_mat.cl
+++ /dev/null
@@ -1,217 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#if DATA_DEPTH == 0
-#define BASE_TYPE uchar
-#elif DATA_DEPTH == 1
-#error data_depth char, use uchar datatype instead
-#elif DATA_DEPTH == 2
-#define BASE_TYPE ushort
-#elif DATA_DEPTH == 3
-#error data_depth short, use ushort datatype instead
-#elif DATA_DEPTH == 4
-#define BASE_TYPE int
-#elif DATA_DEPTH == 5
-#define BASE_TYPE float
-#elif DATA_DEPTH == 6
-#define BASE_TYPE double
-#else
-#error data_depth
-#endif
-
-#if DATA_CHAN == 2
-#define SRC_VEC_SIZE 2
-#elif DATA_CHAN == 3
-#define SRC_VEC_SIZE 4 // C3 is stored as C4
-#elif DATA_CHAN == 4
-#define SRC_VEC_SIZE 4
-#else
-#error data_chan
-#endif
-
-#define __CAT(x, y) x##y
-#define CAT(x, y) __CAT(x, y)
-
-#define uchar1 uchar
-#define char1 char
-#define ushort1 ushort
-#define short1 short
-#define int1 int
-#define float1 float
-#define double1 double
-
-#define TYPE BASE_TYPE
-
-#define SRC_TYPE CAT(BASE_TYPE, SRC_VEC_SIZE)
-
-#define DST_VEC_TYPE CAT(BASE_TYPE, VEC_SIZE)
-
-#define vstore1 vstore
-#define VSTORE CAT(vstore, VEC_SIZE)
-#define VSTORE_ALIGNED(ptr, v) *((__global DST_VEC_TYPE*)(ptr)) = (v)
-#define VSTORE_UNALIGNED(ptr, v) VSTORE((v), 0, (__global TYPE*)(ptr))
-
-#ifdef DST0_ALIGNED
-#define VSTORE_dst0 VSTORE_ALIGNED
-#else
-#define VSTORE_dst0 VSTORE_UNALIGNED
-#endif
-#ifdef DST1_ALIGNED
-#define VSTORE_dst1 VSTORE_ALIGNED
-#else
-#define VSTORE_dst1 VSTORE_UNALIGNED
-#endif
-#ifdef DST2_ALIGNED
-#define VSTORE_dst2 VSTORE_ALIGNED
-#else
-#define VSTORE_dst2 VSTORE_UNALIGNED
-#endif
-#ifdef DST3_ALIGNED
-#define VSTORE_dst3 VSTORE_ALIGNED
-#else
-#define VSTORE_dst3 VSTORE_UNALIGNED
-#endif
-
-__kernel void split_vector(
-        __global SRC_TYPE* src, int srcStepBytes, int2 srcOffset, // offset.x in bytes
-        __global TYPE* dst0, int dst0StepBytes, int2 dst0Offset,
-        __global TYPE* dst1, int dst1StepBytes, int2 dst1Offset,
-#if DATA_CHAN > 2
-        __global TYPE* dst2, int dst2StepBytes, int2 dst2Offset,
-#endif
-#if DATA_CHAN > 3
-        __global TYPE* dst3, int dst3StepBytes, int2 dst3Offset,
-#endif
-        int2 size)
-
-{
-    int x = get_global_id(0) * VEC_SIZE;
-    int y = get_global_id(1);
-
-    if (x < size.x && y < size.y)
-    {
-        SRC_TYPE srcData[VEC_SIZE];
-        int xOffsetLimitBytes = srcOffset.x + size.x * sizeof(SRC_TYPE);
-        int xOffsetBytes = srcOffset.x + x * sizeof(SRC_TYPE);
-        int yOffsetBytes = (srcOffset.y + y) * srcStepBytes;
-#pragma unroll
-        for (int i = 0; i < VEC_SIZE; i++, xOffsetBytes += sizeof(SRC_TYPE))
-        {
-            srcData[i] = (xOffsetBytes >= xOffsetLimitBytes) ? (SRC_TYPE)0 :
-                    *(__global SRC_TYPE*)((__global char*)src + yOffsetBytes + xOffsetBytes);
-        }
-
-#if VEC_SIZE == 1
-        TYPE dstC0 = srcData[0].s0;
-        TYPE dstC1 = srcData[0].s1;
-#if DATA_CHAN > 2
-        TYPE dstC2 = srcData[0].s2;
-#endif
-#if DATA_CHAN > 3
-        TYPE dstC3 = srcData[0].s3;
-#endif
-# define VEC_TO_ARRAY(v, a) TYPE a[1] = {v};
-#elif VEC_SIZE == 2
-        DST_VEC_TYPE dstC0 = (DST_VEC_TYPE)(srcData[0].s0, srcData[1].s0);
-        DST_VEC_TYPE dstC1 = (DST_VEC_TYPE)(srcData[0].s1, srcData[1].s1);
-#if DATA_CHAN > 2
-        DST_VEC_TYPE dstC2 = (DST_VEC_TYPE)(srcData[0].s2, srcData[1].s2);
-#endif
-#if DATA_CHAN > 3
-        DST_VEC_TYPE dstC3 = (DST_VEC_TYPE)(srcData[0].s3, srcData[1].s3);
-#endif
-# define VEC_TO_ARRAY(v, a) TYPE a[2] = {v.s0, v.s1};
-#elif VEC_SIZE == 4
-        DST_VEC_TYPE dstC0 = (DST_VEC_TYPE)(srcData[0].s0, srcData[1].s0, srcData[2].s0, srcData[3].s0);
-        DST_VEC_TYPE dstC1 = (DST_VEC_TYPE)(srcData[0].s1, srcData[1].s1, srcData[2].s1, srcData[3].s1);
-#if DATA_CHAN > 2
-        DST_VEC_TYPE dstC2 = (DST_VEC_TYPE)(srcData[0].s2, srcData[1].s2, srcData[2].s2, srcData[3].s2);
-#endif
-#if DATA_CHAN > 3
-        DST_VEC_TYPE dstC3 = (DST_VEC_TYPE)(srcData[0].s3, srcData[1].s3, srcData[2].s3, srcData[3].s3);
-#endif
-# define VEC_TO_ARRAY(v, a) TYPE a[4] = {v.s0, v.s1, v.s2, v.s3};
-#endif
-
-#ifndef BYPASS_VSTORE
-#define BYPASS_VSTORE false
-#endif
-
-#define WRITE_VEC_DST(dst, vecValue) \
-{ \
-        int dst ## xOffsetLimitBytes = dst ## Offset.x + size.x * sizeof(TYPE); \
-        int dst ## xOffsetBytes = dst ## Offset.x + x * sizeof(TYPE); \
-        int dst ## yOffsetBytes = (dst ## Offset.y + y) * dst ## StepBytes; \
-        if (!BYPASS_VSTORE && dst ## xOffsetBytes + (int)sizeof(DST_VEC_TYPE) <= dst ## xOffsetLimitBytes) \
-        { \
-            VSTORE_ ## dst(((__global char*)dst + dst ## yOffsetBytes + dst ## xOffsetBytes), vecValue); \
-        } \
-        else \
-        { \
-            VEC_TO_ARRAY(vecValue, vecValue##Array); \
-            for (int i = 0; i < VEC_SIZE; i++, dst ## xOffsetBytes += sizeof(TYPE)) \
-            { \
-                if (dst ## xOffsetBytes + (int)sizeof(TYPE) <= dst ## xOffsetLimitBytes) \
-                    *(__global TYPE*)((__global char*)dst + dst ## yOffsetBytes + dst ## xOffsetBytes) = vecValue##Array[i]; \
-                else \
-                    break; \
-            } \
-        } \
-}
-
-        WRITE_VEC_DST(dst0, dstC0);
-        WRITE_VEC_DST(dst1, dstC1);
-#if DATA_CHAN > 2
-        WRITE_VEC_DST(dst2, dstC2);
-#endif
-#if DATA_CHAN > 3
-        WRITE_VEC_DST(dst3, dstC3);
-#endif
-    }
-}
diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl
deleted file mode 100644
index d3efb5e..0000000
--- a/modules/ocl/src/opencl/stereobm.cl
+++ /dev/null
@@ -1,338 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Sen Liu, swjtuls1987@126.com
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#define ROWSperTHREAD 21     // the number of rows a thread will process
-#define BLOCK_W       128    // the thread block width (464)
-#define N_DISPARITIES 8
-
-#define STEREO_MIND 0                    // The minimum d range to check
-#define STEREO_DISP_STEP N_DISPARITIES   // the d step, must be <= 1 to avoid aliasing
-
-#ifndef radius
-#define radius 64
-#endif
-
-inline unsigned int CalcSSD(__local unsigned int *col_ssd)
-{
-    unsigned int cache = col_ssd[0];
-
-#pragma unroll
-    for(int i = 1; i <= (radius << 1); i++)
-        cache += col_ssd[i];
-
-    return cache;
-}
-
-inline uint2 MinSSD(__local unsigned int *col_ssd)
-{
-    unsigned int ssd[N_DISPARITIES];
-    const int win_size = (radius << 1);
-
-    //See above:  #define COL_SSD_SIZE (BLOCK_W + WIN_SIZE)
-    ssd[0] = CalcSSD(col_ssd + 0 * (BLOCK_W + win_size));
-    ssd[1] = CalcSSD(col_ssd + 1 * (BLOCK_W + win_size));
-    ssd[2] = CalcSSD(col_ssd + 2 * (BLOCK_W + win_size));
-    ssd[3] = CalcSSD(col_ssd + 3 * (BLOCK_W + win_size));
-    ssd[4] = CalcSSD(col_ssd + 4 * (BLOCK_W + win_size));
-    ssd[5] = CalcSSD(col_ssd + 5 * (BLOCK_W + win_size));
-    ssd[6] = CalcSSD(col_ssd + 6 * (BLOCK_W + win_size));
-    ssd[7] = CalcSSD(col_ssd + 7 * (BLOCK_W + win_size));
-
-    unsigned int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7])));
-
-    int bestIdx = 0;
-
-    for (int i = 0; i < N_DISPARITIES; i++)
-    {
-        if (mssd == ssd[i])
-            bestIdx = i;
-    }
-
-    return (uint2)(mssd, bestIdx);
-}
-
-inline void StepDown(int idx1, int idx2, __global unsigned char* imageL,
-              __global unsigned char* imageR, int d,   __local unsigned int *col_ssd)
-{
-    uint8 imgR1 = convert_uint8(vload8(0, imageR + (idx1 - d - 7)));
-    uint8 imgR2 = convert_uint8(vload8(0, imageR + (idx2 - d - 7)));
-    uint8 diff1 = (uint8)(imageL[idx1]) - imgR1;
-    uint8 diff2 = (uint8)(imageL[idx2]) - imgR2;
-    uint8 res = diff2 * diff2 - diff1 * diff1;
-    const int win_size = (radius << 1);
-    col_ssd[0 * (BLOCK_W + win_size)] += res.s7;
-    col_ssd[1 * (BLOCK_W + win_size)] += res.s6;
-    col_ssd[2 * (BLOCK_W + win_size)] += res.s5;
-    col_ssd[3 * (BLOCK_W + win_size)] += res.s4;
-    col_ssd[4 * (BLOCK_W + win_size)] += res.s3;
-    col_ssd[5 * (BLOCK_W + win_size)] += res.s2;
-    col_ssd[6 * (BLOCK_W + win_size)] += res.s1;
-    col_ssd[7 * (BLOCK_W + win_size)] += res.s0;
-}
-
-inline void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL,
-                __global unsigned char* imageR, int d,
-                 __local unsigned int *col_ssd)
-{
-    uint8 leftPixel1;
-    uint8 diffa = 0;
-    int idx = y_tex * im_pitch + x_tex;
-    const int win_size = (radius << 1);
-    for(int i = 0; i < (win_size + 1); i++)
-    {
-        leftPixel1 = (uint8)(imageL[idx]);
-        uint8 imgR = convert_uint8(vload8(0, imageR + (idx - d - 7)));
-        uint8 res = leftPixel1 - imgR;
-        diffa += res * res;
-
-        idx += im_pitch;
-    }
-    //See above:  #define COL_SSD_SIZE (BLOCK_W + WIN_SIZE)
-    col_ssd[0 * (BLOCK_W + win_size)] = diffa.s7;
-    col_ssd[1 * (BLOCK_W + win_size)] = diffa.s6;
-    col_ssd[2 * (BLOCK_W + win_size)] = diffa.s5;
-    col_ssd[3 * (BLOCK_W + win_size)] = diffa.s4;
-    col_ssd[4 * (BLOCK_W + win_size)] = diffa.s3;
-    col_ssd[5 * (BLOCK_W + win_size)] = diffa.s2;
-    col_ssd[6 * (BLOCK_W + win_size)] = diffa.s1;
-    col_ssd[7 * (BLOCK_W + win_size)] = diffa.s0;
-}
-
-__kernel void stereoKernel(__global unsigned char *left, __global unsigned char *right,
-                           __global unsigned int *cminSSDImage, int cminSSD_step,
-                           __global unsigned char *disp, int disp_step,int cwidth, int cheight,
-                           int img_step, int maxdisp,
-                           __local unsigned int *col_ssd_cache)
-{
-    __local unsigned int *col_ssd = col_ssd_cache + get_local_id(0);
-    __local unsigned int *col_ssd_extra = get_local_id(0) < (radius << 1) ? col_ssd + BLOCK_W : 0;
-
-    int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
-
-#define Y (int)(get_group_id(1) * ROWSperTHREAD + radius)
-
-    __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
-    __global unsigned char* disparImage = disp + X + Y * disp_step;
-
-    int end_row = ROWSperTHREAD < (cheight - Y) ? ROWSperTHREAD:(cheight - Y);
-    int y_tex;
-    int x_tex = X - radius;
-
-    //if (x_tex >= cwidth)
-    //    return;
-
-    for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP)
-    {
-        y_tex = Y - radius;
-
-        InitColSSD(x_tex, y_tex, img_step, left, right, d, col_ssd);
-        if (col_ssd_extra > 0)
-            if (x_tex + BLOCK_W < cwidth)
-                InitColSSD(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra);
-
-        barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function
-
-        uint2 minSSD = MinSSD(col_ssd);
-        if (X < cwidth - radius && Y < cheight - radius)
-        {
-            if (minSSD.x < minSSDImage[0])
-            {
-                disparImage[0] = (unsigned char)(d + minSSD.y);
-                minSSDImage[0] = minSSD.x;
-            }
-        }
-
-        for(int row = 1; row < end_row; row++)
-        {
-            int idx1 = y_tex * img_step + x_tex;
-            int idx2 = min(y_tex + ((radius << 1) + 1), cheight - 1) * img_step + x_tex;
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            StepDown(idx1, idx2, left, right, d, col_ssd);
-            if (col_ssd_extra > 0)
-                if (x_tex + BLOCK_W < cwidth)
-                    StepDown(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra);
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            uint2 minSSD = MinSSD(col_ssd);
-            if (X < cwidth - radius && row < cheight - radius - Y)
-            {
-                int idx = row * cminSSD_step;
-                if (minSSD.x < minSSDImage[idx])
-                {
-                    disparImage[disp_step * row] = (unsigned char)(d + minSSD.y);
-                    minSSDImage[idx] = minSSD.x;
-                }
-            }
-
-            y_tex++;
-        } // for row loop
-    } // for d loop
-}
-//////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////// Sobel Prefiler (signal channel)//////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned char *output,
-                               int rows, int cols, int prefilterCap)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < cols && y < rows)
-    {
-        int cov = input[(y-1) * cols + (x-1)] * (-1) + input[(y-1) * cols + (x+1)] * (1) +
-                  input[(y)   * cols + (x-1)] * (-2) + input[(y)   * cols + (x+1)] * (2) +
-                  input[(y+1) * cols + (x-1)] * (-1) + input[(y+1) * cols + (x+1)] * (1);
-
-        cov = min(min(max(-prefilterCap, cov), prefilterCap) + prefilterCap, 255);
-        output[y * cols + x] = cov & 0xFF;
-    }
-}
-
-
-//////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////// Textureness filtering ////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
-{
-    float conv = 0;
-    int y1 = y==0? 0 : y-1;
-    int x1 = x==0? 0 : x-1;
-    if(x < cols && y < rows && x > 0 && y > 0)
-    {
-        conv = (float)input[(y1)  * cols + (x1)] * (-1) + (float)input[(y1)  * cols + (x+1)] * (1) +
-               (float)input[(y)   * cols + (x1)] * (-2) + (float)input[(y)   * cols + (x+1)] * (2) +
-               (float)input[(y+1) * cols + (x1)] * (-1) + (float)input[(y+1) * cols + (x+1)] * (1);
-
-    }
-    return fabs(conv);
-}
-
-inline float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
-{
-    unsigned int cache = cols[0];
-
-    for(int i = 1; i <= winsz; i++)
-        cache += cols[i];
-
-    return cache;
-}
-
-#define RpT (2 * ROWSperTHREAD)  // got experimentally
-__kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, int disp_cols,
-                                 int disp_step, __global unsigned char *input, int input_rows,
-                                 int input_cols,int winsz, float threshold,
-                                 __local float *cols_cache)
-{
-    int winsz2 = winsz/2;
-    int n_dirty_pixels = (winsz2) * 2;
-
-    int local_id_x = get_local_id(0);
-    int group_size_x = get_local_size(0);
-    int group_id_y = get_group_id(1);
-
-    __local float *cols = cols_cache + group_size_x + local_id_x;
-    __local float *cols_extra = local_id_x < n_dirty_pixels ? cols + group_size_x : 0;
-
-    int x = get_global_id(0);
-    int beg_row = group_id_y * RpT;
-    int end_row = min(beg_row + RpT, disp_rows);
-
-
-    int y = beg_row;
-
-    float sum = 0;
-    float sum_extra = 0;
-
-    for(int i = y - winsz2; i <= y + winsz2; ++i)
-    {
-        sum += sobel(input, x - winsz2, i, input_rows, input_cols);
-        if (cols_extra)
-            sum_extra += sobel(input, x + group_size_x - winsz2, i, input_rows, input_cols);
-    }
-    *cols = sum;
-    if (cols_extra)
-        *cols_extra = sum_extra;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
-    if (sum_win < threshold)
-        disp[y * disp_step + x] = 0;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    for(int y = beg_row + 1; y < end_row; ++y)
-    {
-        sum = sum - sobel(input, x - winsz2, y - winsz2 - 1, input_rows, input_cols) +
-              sobel(input, x - winsz2, y + winsz2, input_rows, input_cols);
-        *cols = sum;
-
-        if (cols_extra)
-        {
-            sum_extra = sum_extra - sobel(input, x + group_size_x - winsz2, y - winsz2 - 1,input_rows, input_cols)
-                        + sobel(input, x + group_size_x - winsz2, y + winsz2, input_rows, input_cols);
-            *cols_extra = sum_extra;
-        }
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if (x < disp_cols)
-        {
-            float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
-            if (sum_win < threshold)
-                disp[y * disp_step + x] = 0;
-        }
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-}
diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl
deleted file mode 100644
index 5a1bf08..0000000
--- a/modules/ocl/src/opencl/stereobp.cl
+++ /dev/null
@@ -1,393 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Peng Xiao,   pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-
-#ifdef T_FLOAT
-#define T float
-#define T4 float4
-#else
-#define T short
-#define T4 short4
-#endif
-
-///////////////////////////////////////////////////////////////
-/////////////////common///////////////////////////////////////
-/////////////////////////////////////////////////////////////
-inline T saturate_cast(float v){
-#ifdef T_SHORT
-    return convert_short_sat_rte(v);
-#else
-    return v;
-#endif
-}
-
-inline T4 saturate_cast4(float4 v){
-#ifdef T_SHORT
-    return convert_short4_sat_rte(v);
-#else
-    return v;
-#endif
-}
-
-#define FLOAT_MAX 3.402823466e+38f
-typedef struct
-{
-    int   cndisp;
-    float cmax_data_term;
-    float cdata_weight;
-    float cmax_disc_term;
-    float cdisc_single_jump;
-}con_srtuct_t;
-///////////////////////////////////////////////////////////////
-////////////////////////// comp data //////////////////////////
-///////////////////////////////////////////////////////////////
-
-inline float pix_diff_1(const uchar4 l, __global const uchar *rs)
-{
-    return abs((int)(l.x) - *rs);
-}
-
-inline float pix_diff_4(const uchar4 l, __global const uchar *rs)
-{
-    uchar4 r;
-    r = *((__global uchar4 *)rs);
-
-    const float tr = 0.299f;
-    const float tg = 0.587f;
-    const float tb = 0.114f;
-
-    float val;
-
-    val  = tb * abs((int)l.x - r.x);
-    val += tg * abs((int)l.y - r.y);
-    val += tr * abs((int)l.z - r.z);
-
-    return val;
-}
-
-inline float pix_diff_3(const uchar4 l, __global const uchar *rs)
-{
-    return pix_diff_4(l, rs);
-}
-
-#ifndef CN
-#define CN 4
-#endif
-
-#ifndef CNDISP
-#define CNDISP 64
-#endif
-
-#define CAT(X,Y) X##Y
-#define CAT2(X,Y) CAT(X,Y)
-
-#define PIX_DIFF CAT2(pix_diff_, CN)
-
-__kernel void comp_data(__global uchar *left,  int left_rows,  int left_cols,  int left_step,
-                        __global uchar *right, int right_step,
-                        __global T *data, int data_step,
-                        __constant con_srtuct_t *con_st)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y > 0 && y < (left_rows - 1) && x > 0 && x < (left_cols - 1))
-    {
-        data_step /= sizeof(T);
-        const __global uchar* ls = left  + y * left_step  + x * CN;
-        const __global uchar* rs = right + y * right_step + x * CN;
-
-        __global T *ds = data + y * data_step + x;
-
-        const unsigned int disp_step = data_step * left_rows;
-        const float weightXterm = con_st -> cdata_weight * con_st -> cmax_data_term;
-        const uchar4 ls_data = vload4(0, ls);
-
-        for (int disp = 0; disp < con_st -> cndisp; disp++)
-        {
-            if (x - disp >= 1)
-            {
-                float val = 0;
-                val = PIX_DIFF(ls_data, rs - disp * CN);
-                ds[disp * disp_step] =  saturate_cast(fmin(con_st -> cdata_weight * val, weightXterm));
-            }
-            else
-            {
-                ds[disp * disp_step] =  saturate_cast(weightXterm);
-            }
-        }
-    }
-}
-
-///////////////////////////////////////////////////////////////
-//////////////////////// data step down ///////////////////////
-///////////////////////////////////////////////////////////////
-__kernel void data_step_down(__global T *src, int src_rows,
-                             __global T *dst, int dst_rows, int dst_cols,
-                             int src_step, int dst_step,
-                             int cndisp)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        src_step /= sizeof(T);
-        dst_step /= sizeof(T);
-        int4 coor_step = (int4)(src_rows * src_step);
-        int4 coor = (int4)(min(2*y+0, src_rows-1) * src_step + 2*x+0,
-                           min(2*y+1, src_rows-1) * src_step + 2*x+0,
-                           min(2*y+0, src_rows-1) * src_step + 2*x+1,
-                           min(2*y+1, src_rows-1) * src_step + 2*x+1);
-
-        for (int d = 0; d < cndisp; ++d)
-        {
-            float dst_reg;
-            dst_reg  = src[coor.x];
-            dst_reg += src[coor.y];
-            dst_reg += src[coor.z];
-            dst_reg += src[coor.w];
-            coor += coor_step;
-
-            dst[(d * dst_rows + y) * dst_step + x] = saturate_cast(dst_reg);
-        }
-    }
-}
-
-///////////////////////////////////////////////////////////////
-/////////////////// level up messages  ////////////////////////
-///////////////////////////////////////////////////////////////
-__kernel void level_up_message(__global T *src, int src_rows, int src_step,
-                               __global T *dst, int dst_rows, int dst_cols, int dst_step,
-                               int cndisp)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (x < dst_cols && y < dst_rows)
-    {
-        src_step /= sizeof(T);
-        dst_step /= sizeof(T);
-
-        const int dst_disp_step = dst_step * dst_rows;
-        const int src_disp_step = src_step * src_rows;
-
-        __global T       *dstr = dst + y * dst_step + x;
-        __global const T *srcr = src + (y / 2 * src_step) + (x / 2);
-
-        for (int d = 0; d < cndisp; ++d)
-            dstr[d * dst_disp_step] = srcr[d * src_disp_step];
-    }
-}
-
-///////////////////////////////////////////////////////////////
-////////////////////  calc all iterations /////////////////////
-///////////////////////////////////////////////////////////////
-inline void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
-              const __global T *dt,
-              int u_step, int msg_disp_step, int data_disp_step,
-              float4 cmax_disc_term, float4 cdisc_single_jump)
-{
-    __global T *us = us_ + u_step;
-    __global T *ds = ds_ - u_step;
-    __global T *ls = ls_ + 1;
-    __global T *rs = rs_ - 1;
-
-    float4 minimum = (float4)(FLOAT_MAX);
-
-    T4 t_dst[CNDISP];
-    float4 dst_reg;
-    float4 prev;
-    float4 cur;
-
-    T t_us = us[0];
-    T t_ds = ds[0];
-    T t_ls = ls[0];
-    T t_rs = rs[0];
-    T t_dt = dt[0];
-
-    prev = (float4)(t_us + t_ls + t_rs + t_dt,
-                    t_ds + t_ls + t_rs + t_dt,
-                    t_us + t_ds + t_rs + t_dt,
-                    t_us + t_ds + t_ls + t_dt);
-
-    minimum = min(prev, minimum);
-
-    t_dst[0] = saturate_cast4(prev);
-
-    for(int i = 1, idx = msg_disp_step; i < CNDISP; ++i, idx+=msg_disp_step)
-    {
-        t_us = us[idx];
-        t_ds = ds[idx];
-        t_ls = ls[idx];
-        t_rs = rs[idx];
-        t_dt = dt[data_disp_step * i];
-
-        dst_reg = (float4)(t_us + t_ls + t_rs + t_dt,
-                           t_ds + t_ls + t_rs + t_dt,
-                           t_us + t_ds + t_rs + t_dt,
-                           t_us + t_ds + t_ls + t_dt);
-
-        minimum = min(dst_reg, minimum);
-
-        prev += cdisc_single_jump;
-        prev = min(prev, dst_reg);
-
-        t_dst[i] = saturate_cast4(prev);
-    }
-
-    minimum += cmax_disc_term;
-
-    float4 sum = (float4)(0);
-    prev = convert_float4(t_dst[CNDISP - 1]);
-    for (int disp = CNDISP - 2; disp >= 0; disp--)
-    {
-        prev += cdisc_single_jump;
-        cur = convert_float4(t_dst[disp]);
-        prev = min(prev, cur);
-        cur = min(prev, minimum);
-        sum += cur;
-
-        t_dst[disp] = saturate_cast4(cur);
-    }
-
-    dst_reg = convert_float4(t_dst[CNDISP - 1]);
-    dst_reg = min(dst_reg, minimum);
-    t_dst[CNDISP - 1] = saturate_cast4(dst_reg);
-    sum += dst_reg;
-
-    sum /= (float4)(CNDISP);
-#pragma unroll
-    for(int i = 0, idx = 0; i < CNDISP; ++i, idx+=msg_disp_step)
-    {
-        T4 dst = t_dst[i];
-        us_[idx] = dst.x - sum.x;
-        ds_[idx] = dst.y - sum.y;
-        rs_[idx] = dst.z - sum.z;
-        ls_[idx] = dst.w - sum.w;
-    }
-}
-__kernel void one_iteration(__global T *u,    int u_step,
-                            __global T *data, int data_step,
-                            __global T *d,    __global T *l, __global T *r,
-                            int t, int cols, int rows,
-                            float cmax_disc_term, float cdisc_single_jump)
-{
-    const int y = get_global_id(1);
-    const int x = ((get_global_id(0)) << 1) + ((y + t) & 1);
-
-    if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1))
-    {
-        u_step    /= sizeof(T);
-        data_step /= sizeof(T);
-
-        __global T *us = u + y * u_step + x;
-        __global T *ds = d + y * u_step + x;
-        __global T *ls = l + y * u_step + x;
-        __global T *rs = r + y * u_step + x;
-        const __global  T *dt = data + y * data_step + x;
-
-        int msg_disp_step = u_step * rows;
-        int data_disp_step = data_step * rows;
-
-        message(us, ds, ls, rs, dt,
-                u_step, msg_disp_step, data_disp_step,
-                (float4)(cmax_disc_term), (float4)(cdisc_single_jump));
-    }
-}
-
-///////////////////////////////////////////////////////////////
-/////////////////////////// output ////////////////////////////
-///////////////////////////////////////////////////////////////
-__kernel void output(const __global T *u, int u_step,
-                     const __global T *d, const __global T *l,
-                     const __global T *r, const __global T *data,
-                     __global T *disp, int disp_rows, int disp_cols, int disp_step,
-                     int cndisp)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (y > 0 && y < disp_rows - 1 && x > 0 && x < disp_cols - 1)
-    {
-        u_step    /= sizeof(T);
-        disp_step /= sizeof(T);
-        const __global T *us = u + (y + 1) * u_step + x;
-        const __global T *ds = d + (y - 1) * u_step + x;
-        const __global T *ls = l + y * u_step + (x + 1);
-        const __global T *rs = r + y * u_step + (x - 1);
-        const __global T *dt = data + y * u_step + x;
-
-        int disp_steps = disp_rows * u_step;
-
-        int best = 0;
-        float best_val = FLOAT_MAX;
-        for (int d = 0; d < cndisp; ++d)
-        {
-            float val;
-            val  = us[d * disp_steps];
-            val += ds[d * disp_steps];
-            val += ls[d * disp_steps];
-            val += rs[d * disp_steps];
-            val += dt[d * disp_steps];
-
-            if (val < best_val)
-            {
-                best_val = val;
-                best = d;
-            }
-        }
-
-        (disp + y * disp_step)[x] = convert_short_sat(best);
-    }
-}
diff --git a/modules/ocl/src/opencl/stereocsbp.cl b/modules/ocl/src/opencl/stereocsbp.cl
deleted file mode 100644
index 23fc814..0000000
--- a/modules/ocl/src/opencl/stereocsbp.cl
+++ /dev/null
@@ -1,1382 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Jin Ma, jin@multicorewareinc.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////get_first_k_initial_global//////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
-    __global short *ctemp, int h, int w, int nr_plane,
-    int cmsg_step1, int cdisp_step1, int cndisp)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        __global short *selected_disparity = selected_disp_pyr      + y * cmsg_step1 + x;
-        __global short *data_cost_selected = data_cost_selected_    + y * cmsg_step1 + x;
-        __global short *data_cost          = ctemp + y * cmsg_step1 + x;
-
-        for(int i = 0; i < nr_plane; i++)
-        {
-            short minimum = SHRT_MAX;
-            int id = 0;
-
-            for(int d = 0; d < cndisp; d++)
-            {
-                short cur = data_cost[d * cdisp_step1];
-                if(cur < minimum)
-                {
-                    minimum = cur;
-                    id = d;
-                }
-            }
-
-            data_cost_selected[i  * cdisp_step1] = minimum;
-            selected_disparity[i  * cdisp_step1] = id;
-            data_cost         [id * cdisp_step1] = SHRT_MAX;
-        }
-    }
-}
-
-__kernel void get_first_k_initial_global_1(__global  float *data_cost_selected_, __global float *selected_disp_pyr,
-    __global  float *ctemp, int h, int w, int nr_plane,
-    int cmsg_step1, int cdisp_step1, int cndisp)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        __global   float *selected_disparity = selected_disp_pyr      + y * cmsg_step1 + x;
-        __global   float *data_cost_selected = data_cost_selected_    + y * cmsg_step1 + x;
-        __global   float *data_cost          = ctemp + y * cmsg_step1 + x;
-
-        for(int i = 0; i < nr_plane; i++)
-        {
-            float minimum = FLT_MAX;
-            int id = 0;
-
-            for(int d = 0; d < cndisp; d++)
-            {
-                float cur = data_cost[d * cdisp_step1];
-                if(cur < minimum)
-                {
-                    minimum = cur;
-                    id = d;
-                }
-            }
-
-            data_cost_selected[i  * cdisp_step1] = minimum;
-            selected_disparity[i  * cdisp_step1] = id;
-            data_cost         [id * cdisp_step1] = FLT_MAX;
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_, __global short *selected_disp_pyr,
-    __global  short *ctemp,int h, int w, int nr_plane,
-    int cmsg_step1, int cdisp_step1, int cndisp)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        __global short *selected_disparity = selected_disp_pyr   + y * cmsg_step1 + x;
-        __global short *data_cost_selected = data_cost_selected_ + y * cmsg_step1 + x;
-        __global short *data_cost = ctemp + y * cmsg_step1 + x;
-
-        int nr_local_minimum = 0;
-
-        short prev = data_cost[0 * cdisp_step1];
-        short cur  = data_cost[1 * cdisp_step1];
-        short next = data_cost[2 * cdisp_step1];
-
-        for (int d = 1; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
-        {
-
-            if (cur < prev && cur < next)
-            {
-                data_cost_selected[nr_local_minimum * cdisp_step1] = cur;
-                selected_disparity[nr_local_minimum * cdisp_step1] = d;
-                data_cost[d * cdisp_step1] = SHRT_MAX;
-
-                nr_local_minimum++;
-            }
-
-            prev = cur;
-            cur = next;
-            next = data_cost[(d + 1) * cdisp_step1];
-        }
-
-        for (int i = nr_local_minimum; i < nr_plane; i++)
-        {
-            short minimum = SHRT_MAX;
-            int id = 0;
-
-            for (int d = 0; d < cndisp; d++)
-            {
-                cur = data_cost[d * cdisp_step1];
-                if (cur < minimum)
-                {
-                    minimum = cur;
-                    id = d;
-                }
-            }
-
-            data_cost_selected[i * cdisp_step1] = minimum;
-            selected_disparity[i * cdisp_step1] = id;
-            data_cost[id * cdisp_step1] = SHRT_MAX;
-        }
-    }
-}
-
-__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
-    __global float *ctemp,int h, int w, int nr_plane,
-    int cmsg_step1,  int cdisp_step1, int cndisp)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        __global float *selected_disparity = selected_disp_pyr   + y * cmsg_step1 + x;
-        __global float *data_cost_selected = data_cost_selected_ + y * cmsg_step1 + x;
-        __global float *data_cost = ctemp + y * cmsg_step1 + x;
-
-        int nr_local_minimum = 0;
-
-        float prev = data_cost[0 * cdisp_step1];
-        float cur  = data_cost[1 * cdisp_step1];
-        float next = data_cost[2 * cdisp_step1];
-
-        for (int d = 1; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
-        {
-            if (cur < prev && cur < next)
-            {
-                data_cost_selected[nr_local_minimum * cdisp_step1] = cur;
-                selected_disparity[nr_local_minimum * cdisp_step1] = d;
-                data_cost[d * cdisp_step1] = FLT_MAX ;
-
-                nr_local_minimum++;
-            }
-
-            prev = cur;
-            cur = next;
-            next = data_cost[(d + 1) * cdisp_step1];
-        }
-
-
-        for (int i = nr_local_minimum; i < nr_plane; i++)
-        {
-            float minimum = FLT_MAX;
-            int id = 0;
-
-            for (int d = 0; d < cndisp; d++)
-            {
-                cur = data_cost[d * cdisp_step1];
-                if (cur < minimum)
-                {
-                    minimum = cur;
-                    id = d;
-                }
-            }
-
-            data_cost_selected[i * cdisp_step1] = minimum;
-            selected_disparity[i * cdisp_step1] = id;
-            data_cost[id * cdisp_step1] = FLT_MAX;
-        }
-    }
-}
-
-///////////////////////////////////////////////////////////////
-/////////////////////// init data cost ////////////////////////
-///////////////////////////////////////////////////////////////
-
-inline float compute_3(__global uchar* left, __global uchar* right,
-    float cdata_weight,  float cmax_data_term)
-{
-    float tb = 0.114f * abs((int)left[0] - right[0]);
-    float tg = 0.587f * abs((int)left[1] - right[1]);
-    float tr = 0.299f * abs((int)left[2] - right[2]);
-
-    return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
-}
-
-inline float compute_1(__global uchar* left, __global uchar* right,
-    float cdata_weight,  float cmax_data_term)
-{
-    return fmin(cdata_weight * abs((int)*left - (int)*right), cdata_weight * cmax_data_term);
-}
-
-inline short round_short(float v)
-{
-    return convert_short_sat_rte(v);
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////init_data_cost///////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
-    int h, int w, int level, int channels,
-    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
-    int cth, int cimg_step, int cndisp)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        int y0 = y << level;
-        int yt = (y + 1) << level;
-
-        int x0 = x << level;
-        int xt = (x + 1) << level;
-
-        __global short *data_cost = ctemp + y * cmsg_step1 + x;
-
-        for(int d = 0; d < cndisp; ++d)
-        {
-            float val = 0.0f;
-            for(int yi = y0; yi < yt; yi++)
-            {
-                for(int xi = x0; xi < xt; xi++)
-                {
-                    int xr = xi - d;
-                    if(d < cth || xr < 0)
-                        val += cdata_weight * cmax_data_term;
-                    else
-                    {
-                        __global uchar *lle = cleft  + yi * cimg_step + xi * channels;
-                        __global uchar *lri = cright + yi * cimg_step + xr * channels;
-
-                        if(channels == 1)
-                            val += compute_1(lle, lri, cdata_weight, cmax_data_term);
-                        else
-                            val += compute_3(lle, lri, cdata_weight, cmax_data_term);
-                    }
-                }
-            }
-            data_cost[cdisp_step1 * d] = round_short(val);
-        }
-    }
-}
-
-__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
-    int h, int w, int level, int channels,
-    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
-    int cth, int cimg_step, int cndisp)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        int y0 = y << level;
-        int yt = (y + 1) << level;
-
-        int x0 = x << level;
-        int xt = (x + 1) << level;
-
-        __global float *data_cost = ctemp + y * cmsg_step1 + x;
-
-        for(int d = 0; d < cndisp; ++d)
-        {
-            float val = 0.0f;
-            for(int yi = y0; yi < yt; yi++)
-            {
-                for(int xi = x0; xi < xt; xi++)
-                {
-                    int xr = xi - d;
-                    if(d < cth || xr < 0)
-                        val += cdata_weight * cmax_data_term;
-                    else
-                    {
-                        __global uchar* lle = cleft  + yi * cimg_step + xi * channels;
-                        __global uchar* lri = cright + yi * cimg_step + xr * channels;
-
-                        if(channels == 1)
-                            val += compute_1(lle, lri, cdata_weight, cmax_data_term);
-                        else
-                            val += compute_3(lle, lri, cdata_weight, cmax_data_term);
-                    }
-                }
-            }
-            data_cost[cdisp_step1 * d] = val;
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
-    __local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
-    int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
-    int cdisp_step1, int cmsg_step1)
-{
-    int x_out = get_group_id(0);
-    int y_out = get_group_id(1) % h;
-    //int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
-    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
-
-    int tid = get_local_id(0);
-
-    if (d < cndisp)
-    {
-        int x0 = x_out << level;
-        int y0 = y_out << level;
-
-        int len = min(y0 + winsz, rows) - y0;
-
-        float val = 0.0f;
-        if (x0 + tid < cols)
-        {
-            if (x0 + tid - d < 0 || d < cth)
-                val = cdata_weight * cmax_data_term * len;
-            else
-            {
-                __global uchar* lle =  cleft + y0 * cimg_step + channels * (x0 + tid    );
-                __global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
-
-                for(int y = 0; y < len; ++y)
-                {
-                    if(channels == 1)
-                        val += compute_1(lle, lri, cdata_weight, cmax_data_term);
-                    else
-                        val += compute_3(lle, lri, cdata_weight, cmax_data_term);
-
-                    lle += cimg_step;
-                    lri += cimg_step;
-                }
-            }
-        }
-
-        __local float* dline = smem + winsz * get_local_id(2);
-
-        dline[tid] = val;
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 256)
-        {
-            if (tid < 128)
-                dline[tid] += dline[tid + 128];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 128)
-        {
-            if (tid <  64)
-                dline[tid] += dline[tid + 64];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64)
-            if (tid < 32)
-                vdline[tid] += vdline[tid + 32];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32)
-            if (tid < 16)
-                vdline[tid] += vdline[tid + 16];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d<cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16)
-            if (tid <  8)
-                vdline[tid] += vdline[tid + 8];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d<cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 8)
-            if (tid <  4)
-                vdline[tid] += vdline[tid + 4];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d<cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 4)
-            if (tid <  2)
-                vdline[tid] += vdline[tid + 2];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d<cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 2)
-            if (tid <  1)
-                vdline[tid] += vdline[tid + 1];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local float* dline = smem + winsz * get_local_id(2);
-        __global short* data_cost = ctemp + y_out * cmsg_step1 + x_out;
-        if (tid == 0)
-            data_cost[cdisp_step1 * d] = convert_short_sat_rte(dline[0]);
-    }
-}
-
-__kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
-    __local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
-    int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
-    int cdisp_step1, int cmsg_step1)
-{
-    int x_out = get_group_id(0);
-    int y_out = get_group_id(1) % h;
-    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
-
-    int tid = get_local_id(0);
-
-    if (d < cndisp)
-    {
-        int x0 = x_out << level;
-        int y0 = y_out << level;
-
-        int len = min(y0 + winsz, rows) - y0;
-
-        float val = 0.0f;
-        //float val = 528.0f;
-
-        if (x0 + tid < cols)
-        {
-            if (x0 + tid - d < 0 || d < cth)
-                val = cdata_weight * cmax_data_term * len;
-            else
-            {
-                __global uchar* lle =  cleft + y0 * cimg_step + channels * (x0 + tid    );
-                __global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
-
-                for(int y = 0; y < len; ++y)
-                {
-                    if(channels == 1)
-                        val += compute_1(lle, lri, cdata_weight, cmax_data_term);
-                    else
-                        val += compute_3(lle, lri, cdata_weight, cmax_data_term);
-
-                    lle += cimg_step;
-                    lri += cimg_step;
-                }
-            }
-        }
-
-        __local float* dline = smem + winsz * get_local_id(2);
-
-        dline[tid] = val;
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 256)
-            if (tid < 128)
-                dline[tid] += dline[tid + 128];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 128)
-            if (tid < 64)
-                dline[tid] += dline[tid + 64];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64)
-            if (tid < 32)
-                vdline[tid] += vdline[tid + 32];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32)
-            if (tid < 16)
-                vdline[tid] += vdline[tid + 16];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16)
-            if (tid < 8)
-                vdline[tid] += vdline[tid + 8];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 8)
-            if (tid < 4)
-                vdline[tid] += vdline[tid + 4];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 4)
-            if (tid < 2)
-                vdline[tid] += vdline[tid + 2];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 2)
-            if (tid < 1)
-                vdline[tid] += vdline[tid + 1];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < cndisp)
-    {
-        __global float *data_cost = ctemp + y_out * cmsg_step1 + x_out;
-        __local float* dline = smem + winsz * get_local_id(2);
-        if (tid == 0)
-            data_cost[cdisp_step1 * d] =  dline[0];
-    }
-}
-
-///////////////////////////////////////////////////////////////
-////////////////////// compute data cost //////////////////////
-///////////////////////////////////////////////////////////////
-
-__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
-    __global uchar *cleft, __global uchar *cright,
-    int h, int w, int level, int nr_plane, int channels,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
-    float cmax_data_term, int cimg_step, int cth)
-{
-
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        int y0 = y << level;
-        int yt = (y + 1) << level;
-
-        int x0 = x << level;
-        int xt = (x + 1) << level;
-
-        __global const short *selected_disparity = selected_disp_pyr + y/2 * cmsg_step2 + x/2;
-        __global       short *data_cost          = data_cost_ + y * cmsg_step1 + x;
-
-        for(int d = 0; d < nr_plane; d++)
-        {
-            float val = 0.0f;
-            for(int yi = y0; yi < yt; yi++)
-            {
-                for(int xi = x0; xi < xt; xi++)
-                {
-                    int sel_disp = selected_disparity[d * cdisp_step2];
-                    int xr = xi - sel_disp;
-
-                    if (xr < 0 || sel_disp < cth)
-                        val += cdata_weight * cmax_data_term;
-
-                    else
-                    {
-                        __global uchar* left_x  = cleft + yi * cimg_step + xi * channels;
-                        __global uchar* right_x = cright + yi * cimg_step + xr * channels;
-
-                        if(channels == 1)
-                            val += compute_1(left_x, right_x, cdata_weight, cmax_data_term);
-                        else
-                            val += compute_3(left_x, right_x, cdata_weight, cmax_data_term);
-                    }
-                }
-            }
-            data_cost[cdisp_step1 * d] = convert_short_sat_rte(val);
-        }
-    }
-}
-
-__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
-    __global uchar *cleft, __global uchar *cright,
-    int h, int w, int level, int nr_plane, int channels,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
-    float cmax_data_term, int cimg_step, int cth)
-{
-
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        int y0 = y << level;
-        int yt = (y + 1) << level;
-
-        int x0 = x << level;
-        int xt = (x + 1) << level;
-
-        __global const float *selected_disparity = selected_disp_pyr + y/2 * cmsg_step2 + x/2;
-        __global       float *data_cost          = data_cost_ + y * cmsg_step1 + x;
-
-        for(int d = 0; d < nr_plane; d++)
-        {
-            float val = 0.0f;
-            for(int yi = y0; yi < yt; yi++)
-            {
-                for(int xi = x0; xi < xt; xi++)
-                {
-                    int sel_disp = selected_disparity[d * cdisp_step2];
-                    int xr = xi - sel_disp;
-
-                    if (xr < 0 || sel_disp < cth)
-                        val += cdata_weight * cmax_data_term;
-                    else
-                    {
-                        __global uchar* left_x  = cleft + yi * cimg_step + xi * channels;
-                        __global uchar* right_x = cright + yi * cimg_step + xr * channels;
-
-                        if(channels == 1)
-                            val += compute_1(left_x, right_x, cdata_weight, cmax_data_term);
-                        else
-                            val += compute_3(left_x, right_x, cdata_weight, cmax_data_term);
-                    }
-                }
-            }
-            data_cost[cdisp_step1 * d] = val;
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
-    __global uchar *cleft, __global uchar *cright,__local float *smem,
-    int level, int rows, int cols, int h, int nr_plane,
-    int channels, int winsz,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
-    float cdata_weight,  float cmax_data_term, int cimg_step,int cth)
-
-{
-    int x_out = get_group_id(0);
-    int y_out = get_group_id(1) % h;
-    int d = (get_group_id(1)/ h) * get_local_size(2) + get_local_id(2);
-
-    int tid = get_local_id(0);
-
-    __global const short* selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step2 + x_out/2;
-    __global short* data_cost = data_cost_ + y_out * cmsg_step1 + x_out;
-
-    if (d < nr_plane)
-    {
-        int sel_disp = selected_disparity[d * cdisp_step2];
-
-        int x0 = x_out << level;
-        int y0 = y_out << level;
-
-        int len = min(y0 + winsz, rows) - y0;
-
-        float val = 0.0f;
-        if (x0 + tid < cols)
-        {
-            if (x0 + tid - sel_disp < 0 || sel_disp < cth)
-                val = cdata_weight * cmax_data_term * len;
-            else
-            {
-                __global uchar* lle =  cleft + y0 * cimg_step + channels * (x0 + tid    );
-                __global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - sel_disp);
-
-                for(int y = 0; y < len; ++y)
-                {
-                    if(channels == 1)
-                        val += compute_1(lle, lri, cdata_weight, cmax_data_term);
-                    else
-                        val += compute_3(lle, lri, cdata_weight, cmax_data_term);
-
-                    lle += cimg_step;
-                    lri += cimg_step;
-                }
-            }
-        }
-
-        __local float* dline = smem + winsz * get_local_id(2);
-
-        dline[tid] = val;
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    // if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
-    //if (winsz >= 128) { if (tid <  64) { dline[tid] += dline[tid +  64]; } barrier(CLK_LOCAL_MEM_FENCE); }
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64)
-        {
-            if (tid < 32)
-                vdline[tid] += vdline[tid + 32];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32)
-        {
-            if (tid < 16)
-                vdline[tid] += vdline[tid + 16];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16)
-        {
-            if (tid < 8)
-                vdline[tid] += vdline[tid + 8];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 8)
-        {
-            if (tid < 4)
-                vdline[tid] += vdline[tid + 4];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 4)
-        {
-            if (tid < 2)
-                vdline[tid] += vdline[tid + 2];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 2)
-        {
-            if (tid < 1)
-                vdline[tid] += vdline[tid + 1];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (tid == 0)
-            data_cost[cdisp_step1 * d] = convert_short_sat_rte(vdline[0]);
-    }
-}
-
-__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
-    __global uchar *cleft, __global uchar *cright, __local float *smem,
-    int level, int rows, int cols, int h, int nr_plane,
-    int channels, int winsz,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
-    float cmax_data_term, int cimg_step, int cth)
-
-{
-    int x_out = get_group_id(0);
-    int y_out = get_group_id(1) % h;
-    int d = (get_group_id(1)/ h) * get_local_size(2) + get_local_id(2);
-
-    int tid = get_local_id(0);
-
-    __global const float *selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step2 + x_out/2;
-    __global float *data_cost = data_cost_ + y_out * cmsg_step1 + x_out;
-
-    if (d < nr_plane)
-    {
-        int sel_disp = selected_disparity[d * cdisp_step2];
-
-        int x0 = x_out << level;
-        int y0 = y_out << level;
-
-        int len = min(y0 + winsz, rows) - y0;
-
-        float val = 0.0f;
-        if (x0 + tid < cols)
-        {
-            if (x0 + tid - sel_disp < 0 || sel_disp < cth)
-                val = cdata_weight * cmax_data_term * len;
-            else
-            {
-                __global uchar* lle =  cleft + y0 * cimg_step + channels * (x0 + tid    );
-                __global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - sel_disp);
-
-                for(int y = 0; y < len; ++y)
-                {
-                    if(channels == 1)
-                        val += compute_1(lle, lri, cdata_weight, cmax_data_term);
-                    else
-                        val += compute_3(lle, lri, cdata_weight, cmax_data_term);
-
-                    lle += cimg_step;
-                    lri += cimg_step;
-                }
-            }
-        }
-
-        __local float* dline = smem + winsz * get_local_id(2);
-
-        dline[tid] = val;
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64)
-        {
-            if (tid < 32)
-                vdline[tid] += vdline[tid + 32];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32)
-        {
-            if (tid < 16)
-                vdline[tid] += vdline[tid + 16];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16)
-        {
-            if (tid <  8)
-                vdline[tid] += vdline[tid + 8];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  8)
-        {
-            if (tid <  4)
-                vdline[tid] += vdline[tid + 4];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  4)
-        {
-            if (tid <  2)
-                vdline[tid] += vdline[tid + 2];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  2)
-        {
-            if (tid <  1)
-                vdline[tid] += vdline[tid + 1];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(d < nr_plane)
-    {
-        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (tid == 0)
-            data_cost[cdisp_step1 * d] = vdline[0];
-    }
-}
-
-///////////////////////////////////////////////////////////////
-//////////////////////// init message /////////////////////////
-///////////////////////////////////////////////////////////////
-
-inline void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
-    __global short *r_new, __global const short *u_cur, __global const short *d_cur,
-    __global const short *l_cur, __global const short *r_cur,
-    __global short *data_cost_selected, __global short *disparity_selected_new,
-    __global short *data_cost_new, __global const short* data_cost_cur,
-    __global const short *disparity_selected_cur,
-    int nr_plane, int nr_plane2,
-    int cdisp_step1, int cdisp_step2)
-{
-    for(int i = 0; i < nr_plane; i++)
-    {
-        short minimum = SHRT_MAX;
-        int id = 0;
-        for(int j = 0; j < nr_plane2; j++)
-        {
-            short cur = data_cost_new[j * cdisp_step1];
-            if(cur < minimum)
-            {
-                minimum = cur;
-                id = j;
-            }
-        }
-
-        data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
-        disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
-
-        u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
-        d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
-        l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
-        r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
-
-        data_cost_new[id * cdisp_step1] = SHRT_MAX;
-    }
-}
-
-__kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
-    __global short *r_new_, __global  short *u_cur_, __global const short *d_cur_,
-    __global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
-    __global short *selected_disp_pyr_new, __global const short *selected_disp_pyr_cur,
-    __global short *data_cost_selected_, __global const short *data_cost_,
-    int h, int w, int nr_plane, int h2, int w2, int nr_plane2,
-    int cdisp_step1, int cdisp_step2, int cmsg_step1, int cmsg_step2)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y < h && x < w)
-    {
-        __global const short *u_cur = u_cur_ + min(h2-1, y/2 + 1) * cmsg_step2 + x/2;
-        __global const short *d_cur = d_cur_ + max(0, y/2 - 1)    * cmsg_step2 + x/2;
-        __global const short *l_cur = l_cur_ + y/2                * cmsg_step2 + min(w2-1, x/2 + 1);
-        __global const short *r_cur = r_cur_ + y/2                * cmsg_step2 + max(0, x/2 - 1);
-
-        __global short *data_cost_new = ctemp + y * cmsg_step1 + x;
-
-        __global const short *disparity_selected_cur = selected_disp_pyr_cur + y/2 * cmsg_step2 + x/2;
-        __global const short *data_cost = data_cost_ + y * cmsg_step1 + x;
-
-        for(int d = 0; d < nr_plane2; d++)
-        {
-            int idx2 = d * cdisp_step2;
-
-            short val  = data_cost[d * cdisp_step1] + u_cur[idx2] + d_cur[idx2] + l_cur[idx2] + r_cur[idx2];
-            data_cost_new[d * cdisp_step1] = val;
-        }
-
-        __global short *data_cost_selected = data_cost_selected_ + y * cmsg_step1 + x;
-        __global short *disparity_selected_new = selected_disp_pyr_new + y * cmsg_step1 + x;
-
-        __global short *u_new = u_new_ + y * cmsg_step1 + x;
-        __global short *d_new = d_new_ + y * cmsg_step1 + x;
-        __global short *l_new = l_new_ + y * cmsg_step1 + x;
-        __global short *r_new = r_new_ + y * cmsg_step1 + x;
-
-        u_cur = u_cur_ + y/2 * cmsg_step2 + x/2;
-        d_cur = d_cur_ + y/2 * cmsg_step2 + x/2;
-        l_cur = l_cur_ + y/2 * cmsg_step2 + x/2;
-        r_cur = r_cur_ + y/2 * cmsg_step2 + x/2;
-
-        get_first_k_element_increase_0(u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur,
-            data_cost_selected, disparity_selected_new, data_cost_new,
-            data_cost, disparity_selected_cur, nr_plane, nr_plane2,
-            cdisp_step1, cdisp_step2);
-    }
-}
-
-__kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
-    __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
-    __global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
-    __global float *selected_disp_pyr_new, __global const float *selected_disp_pyr_cur,
-    __global float *data_cost_selected_, __global const float *data_cost_,
-    int h, int w, int nr_plane, int h2, int w2, int nr_plane2,
-    int cdisp_step1, int cdisp_step2, int cmsg_step1, int cmsg_step2)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-
-    __global const float *u_cur = u_cur_ + min(h2-1, y/2 + 1) * cmsg_step2 + x/2;
-    __global const float *d_cur = d_cur_ + max(0, y/2 - 1)    * cmsg_step2 + x/2;
-    __global const float *l_cur = l_cur_ + y/2                * cmsg_step2 + min(w2-1, x/2 + 1);
-    __global const float *r_cur = r_cur_ + y/2                * cmsg_step2 + max(0, x/2 - 1);
-
-    __global float *data_cost_new = ctemp + y * cmsg_step1 + x;
-
-    __global const float *disparity_selected_cur = selected_disp_pyr_cur + y/2 * cmsg_step2 + x/2;
-    __global const float *data_cost = data_cost_ + y * cmsg_step1 + x;
-
-    if (y < h && x < w)
-    {
-        for(int d = 0; d < nr_plane2; d++)
-        {
-            int idx2 = d * cdisp_step2;
-
-            float val  = data_cost[d * cdisp_step1] + u_cur[idx2] + d_cur[idx2] + l_cur[idx2] + r_cur[idx2];
-            data_cost_new[d * cdisp_step1] = val;
-        }
-    }
-
-    __global float *data_cost_selected = data_cost_selected_ + y * cmsg_step1 + x;
-    __global float *disparity_selected_new = selected_disp_pyr_new + y * cmsg_step1 + x;
-
-    __global float *u_new = u_new_ + y * cmsg_step1 + x;
-    __global float *d_new = d_new_ + y * cmsg_step1 + x;
-    __global float *l_new = l_new_ + y * cmsg_step1 + x;
-    __global float *r_new = r_new_ + y * cmsg_step1 + x;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if(y < h && x < w)
-    {
-        u_cur = u_cur_ + y/2 * cmsg_step2 + x/2;
-        d_cur = d_cur_ + y/2 * cmsg_step2 + x/2;
-        l_cur = l_cur_ + y/2 * cmsg_step2 + x/2;
-        r_cur = r_cur_ + y/2 * cmsg_step2 + x/2;
-
-        for(int i = 0; i < nr_plane; i++)
-        {
-            float minimum = FLT_MAX;
-            int id = 0;
-
-            for(int j = 0; j < nr_plane2; j++)
-            {
-                float cur = data_cost_new[j * cdisp_step1];
-                if(cur < minimum)
-                {
-                    minimum = cur;
-                    id = j;
-                }
-            }
-            data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];
-            disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
-            u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
-            d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
-            l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
-            r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
-            data_cost_new[id * cdisp_step1] = FLT_MAX;
-        }
-    }
-}
-
-///////////////////////////////////////////////////////////////
-////////////////////  calc all iterations /////////////////////
-///////////////////////////////////////////////////////////////
-
-inline void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
-    __global const short *msg2, __global const short *msg3,
-    __global const short *dst_disp, __global const short *src_disp,
-    int nr_plane, __global short *temp,
-    float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
-{
-    short minimum = SHRT_MAX;
-    for(int d = 0; d < nr_plane; d++)
-    {
-        int idx = d * cdisp_step1;
-        short val  = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
-
-        if(val < minimum)
-            minimum = val;
-
-        msg_dst[idx] = val;
-    }
-
-    float sum = 0;
-    for(int d = 0; d < nr_plane; d++)
-    {
-        float cost_min = minimum + cmax_disc_term;
-        short src_disp_reg = src_disp[d * cdisp_step1];
-
-        for(int d2 = 0; d2 < nr_plane; d2++)
-            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
-            cdisc_single_jump * abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));
-
-        temp[d * cdisp_step1] = convert_short_sat_rte(cost_min);
-        sum += cost_min;
-    }
-    sum /= nr_plane;
-
-    for(int d = 0; d < nr_plane; d++)
-        msg_dst[d * cdisp_step1] = convert_short_sat_rte(temp[d * cdisp_step1] - sum);
-}
-
-inline void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
-    __global const float *msg2, __global const float *msg3,
-    __global const float *dst_disp, __global const float *src_disp,
-    int nr_plane, __global float *temp,
-    float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
-{
-    float minimum = FLT_MAX;
-    for(int d = 0; d < nr_plane; d++)
-    {
-        int idx = d * cdisp_step1;
-        float val  = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
-
-        if(val < minimum)
-            minimum = val;
-
-        msg_dst[idx] = val;
-    }
-
-    float sum = 0;
-    for(int d = 0; d < nr_plane; d++)
-    {
-        float cost_min = minimum + cmax_disc_term;
-        float src_disp_reg = src_disp[d * cdisp_step1];
-
-        for(int d2 = 0; d2 < nr_plane; d2++)
-            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
-            cdisc_single_jump * fabs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));
-
-        temp[d * cdisp_step1] = cost_min;
-        sum += cost_min;
-    }
-    sum /= nr_plane;
-
-    for(int d = 0; d < nr_plane; d++)
-        msg_dst[d * cdisp_step1] = temp[d * cdisp_step1] - sum;
-}
-
-__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_,
-    __global const short *data_cost_selected, __global const short *selected_disp_pyr_cur,
-    __global short *ctemp, int h, int w, int nr_plane, int i,
-    float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
-{
-    int y = get_global_id(1);
-    int x = ((get_global_id(0)) << 1) + ((y + i) & 1);
-
-    if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
-    {
-        __global const short *data = data_cost_selected + y * cmsg_step1 + x;
-
-        __global short *u = u_ + y * cmsg_step1 + x;
-        __global short *d = d_ + y * cmsg_step1 + x;
-        __global short *l = l_ + y * cmsg_step1 + x;
-        __global short *r = r_ + y * cmsg_step1 + x;
-
-        __global const short *disp = selected_disp_pyr_cur + y * cmsg_step1 + x;
-
-        __global short *temp = ctemp + y * cmsg_step1 + x;
-
-        message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
-            cmax_disc_term, cdisp_step1, cdisc_single_jump);
-        message_per_pixel_0(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
-            cmax_disc_term, cdisp_step1, cdisc_single_jump);
-        message_per_pixel_0(data, l, u + cmsg_step1, d - cmsg_step1, l + 1, disp, disp - 1, nr_plane, temp,
-            cmax_disc_term, cdisp_step1, cdisc_single_jump);
-        message_per_pixel_0(data, r, u + cmsg_step1, d - cmsg_step1, r - 1, disp, disp + 1, nr_plane, temp,
-            cmax_disc_term, cdisp_step1, cdisc_single_jump);
-    }
-}
-
-__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_,
-    __global const float *data_cost_selected, __global const float *selected_disp_pyr_cur,
-    __global float *ctemp, int h, int w, int nr_plane, int i,
-    float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
-{
-    int y = get_global_id(1);
-    int x = ((get_global_id(0)) << 1) + ((y + i) & 1);
-
-    if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
-    {
-        __global const float *data = data_cost_selected + y * cmsg_step1 + x;
-
-        __global float *u = u_ + y * cmsg_step1 + x;
-        __global float *d = d_ + y * cmsg_step1 + x;
-        __global float *l = l_ + y * cmsg_step1 + x;
-        __global float *r = r_ + y * cmsg_step1 + x;
-
-        __global const float *disp = selected_disp_pyr_cur + y * cmsg_step1 + x;
-        __global float *temp = ctemp + y * cmsg_step1 + x;
-
-        message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
-            cmax_disc_term, cdisp_step1, cdisc_single_jump);
-        message_per_pixel_1(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
-            cmax_disc_term, cdisp_step1, cdisc_single_jump);
-        message_per_pixel_1(data, l, u + cmsg_step1, d - cmsg_step1, l + 1, disp, disp - 1, nr_plane, temp,
-            cmax_disc_term, cdisp_step1, cdisc_single_jump);
-        message_per_pixel_1(data, r, u + cmsg_step1, d - cmsg_step1, r - 1, disp, disp + 1, nr_plane, temp,
-            cmax_disc_term, cdisp_step1, cdisc_single_jump);
-    }
-}
-
-///////////////////////////////////////////////////////////////
-/////////////////////////// output ////////////////////////////
-///////////////////////////////////////////////////////////////
-
-__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_,
-    __global const short *r_, __global const short * data_cost_selected,
-    __global const short *disp_selected_pyr,
-    __global short* disp,
-    int res_step, int cols, int rows, int nr_plane,
-    int cmsg_step1, int cdisp_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y > 0 && y < rows - 1 && x > 0 && x < cols - 1)
-    {
-        __global const short *data = data_cost_selected + y * cmsg_step1 + x;
-        __global const short *disp_selected = disp_selected_pyr + y * cmsg_step1 + x;
-
-        __global const short *u = u_ + (y+1) * cmsg_step1 + (x+0);
-        __global const short *d = d_ + (y-1) * cmsg_step1 + (x+0);
-        __global const short *l = l_ + (y+0) * cmsg_step1 + (x+1);
-        __global const short *r = r_ + (y+0) * cmsg_step1 + (x-1);
-
-        short best = 0;
-        short best_val = SHRT_MAX;
-
-        for (int i = 0; i < nr_plane; ++i)
-        {
-            int idx = i * cdisp_step1;
-            short val = data[idx]+ u[idx] + d[idx] + l[idx] + r[idx];
-
-            if (val < best_val)
-            {
-                best_val = val;
-                best = disp_selected[idx];
-            }
-        }
-        disp[res_step * y + x] = best;
-    }
-}
-
-__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_,
-    __global const float *r_, __global const float *data_cost_selected,
-    __global const float *disp_selected_pyr,
-    __global short *disp,
-    int res_step, int cols, int rows, int nr_plane,
-    int cmsg_step1, int cdisp_step1)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (y > 0 && y < rows - 1 && x > 0 && x < cols - 1)
-    {
-        __global const float *data = data_cost_selected + y * cmsg_step1 + x;
-        __global const float *disp_selected = disp_selected_pyr + y * cmsg_step1 + x;
-
-        __global const float *u = u_ + (y+1) * cmsg_step1 + (x+0);
-        __global const float *d = d_ + (y-1) * cmsg_step1 + (x+0);
-        __global const float *l = l_ + (y+0) * cmsg_step1 + (x+1);
-        __global const float *r = r_ + (y+0) * cmsg_step1 + (x-1);
-
-        short best = 0;
-        short best_val = SHRT_MAX;
-        for (int i = 0; i < nr_plane; ++i)
-        {
-            int idx = i * cdisp_step1;
-            float val = data[idx]+ u[idx] + d[idx] + l[idx] + r[idx];
-
-            if (val < best_val)
-            {
-                best_val = val;
-                best = convert_short_sat_rte(disp_selected[idx]);
-            }
-        }
-        disp[res_step * y + x] = best;
-    }
-}
diff --git a/modules/ocl/src/opencl/svm.cl b/modules/ocl/src/opencl/svm.cl
deleted file mode 100644
index c104940..0000000
--- a/modules/ocl/src/opencl/svm.cl
+++ /dev/null
@@ -1,211 +0,0 @@
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Erping Pang, erping@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-#ifdef DOUBLE_SUPPORT
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#define TYPE double
-#else
-#define TYPE float
-#endif
-#if defined ADDEXP
-#define EXP(X) exp(X)
-#else
-#define EXP(X) X
-#endif
-#if defined ADDPOW
-#define POW(X,Y) pow(fabs(X),(Y))
-#else
-#define POW(X,Y) X
-#endif
-#define MAX_VAL   (FLT_MAX*1e-3)
-
-#define BLOCK_SIZE 16
-
-__kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
-                         int width, TYPE alpha, TYPE beta)
-{
-    const int  col = get_global_id(0);
-    const int  row = get_global_id(1);
-
-    if(row < src_rows && col < src2_cols)
-    {
-        int t = 0;
-        TYPE temp = 0.0;
-        for(t = 0; t < width - BLOCK_SIZE; t += BLOCK_SIZE)
-        {
-            float16 t0 = vload16(0, src + row * src_step + t);
-            float16 t1 = vload16(0, src2 + col * src2_step + t);
-            t0 *= t1;
-            temp += t0.s0 + t0.s1 + t0.s2 + t0.s3 + t0.s4 + t0.s5 + t0.s6 + t0.s7 +
-                    t0.s8 + t0.s9 + t0.sa + t0.sb + t0.sc + t0.sd + t0.se + t0.sf;
-        }
-        for(; t < width; t++)
-        {
-            temp += src[row * src_step + t] * src2[col * src2_step + t];
-        }
-
-        TYPE temp1 = (TYPE) (temp * alpha + beta);
-
-        if( temp1 > MAX_VAL )
-        {
-            dst[row * dst_step + col] = MAX_VAL;
-        }
-        else
-        {
-            dst[row * dst_step + col] = temp1;
-        }
-
-    }
-
-}
-__kernel void svm_sigmod(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
-                         int width, TYPE alpha, TYPE beta)
-{
-    const int  col = get_global_id(0);
-    const int  row = get_global_id(1);
-
-    if(row < src_rows && col < src2_cols)
-    {
-        int t = 0;
-        TYPE temp = 0.0;
-        for(t = 0; t < width - BLOCK_SIZE; t += BLOCK_SIZE)
-        {
-            float16 t0 = vload16(0, src + row * src_step + t);
-            float16 t1 = vload16(0, src2 + col * src2_step + t);
-            t0 *= t1;
-            temp += t0.s0 + t0.s1 + t0.s2 + t0.s3 + t0.s4 + t0.s5 + t0.s6 + t0.s7 +
-                    t0.s8 + t0.s9 + t0.sa + t0.sb + t0.sc + t0.sd + t0.se + t0.sf;
-        }
-        for(; t < width; t++)
-        {
-            temp += src[row * src_step + t] * src2[col * src2_step + t];
-        }
-        TYPE tp = (TYPE) (temp * alpha + beta);
-        TYPE e = exp(-fabs(tp));
-        TYPE temp1;
-        if(tp > 0)
-        {
-            temp1 = (TYPE)((1. - e) / (1. + e));
-        }
-        else
-        {
-            temp1 = (TYPE)((e - 1.) / (e + 1.));
-        }
-
-        if( temp1 > MAX_VAL )
-        {
-            dst[row * dst_step + col] = MAX_VAL;
-        }
-        else
-        {
-            dst[row * dst_step + col] = temp1;
-        }
-    }
-
-}
-__kernel void svm_poly(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
-                       int width, TYPE alpha, TYPE beta, TYPE degree)
-{
-    const int  col = get_global_id(0);
-    const int  row = get_global_id(1);
-
-    if(row < src_rows && col < src2_cols)
-    {
-        int t = 0;
-        TYPE temp = 0.0;
-        for(t = 0; t < width - BLOCK_SIZE; t += BLOCK_SIZE)
-        {
-            float16 t0 = vload16(0, src + row * src_step + t);
-            float16 t1 = vload16(0, src2 + col * src2_step + t);
-            t0 *= t1;
-            temp += t0.s0 + t0.s1 + t0.s2 + t0.s3 + t0.s4 + t0.s5 + t0.s6 + t0.s7 +
-                    t0.s8 + t0.s9 + t0.sa + t0.sb + t0.sc + t0.sd + t0.se + t0.sf;
-        }
-        for(; t < width; t++)
-        {
-            temp += src[row * src_step + t] * src2[col * src2_step + t];
-        }
-        TYPE temp1 = (TYPE)(POW((temp * alpha + beta), degree));
-
-        if( temp1 > MAX_VAL )
-        {
-            dst[row * dst_step + col] = MAX_VAL;
-        }
-        else
-        {
-            dst[row * dst_step + col] = temp1;
-        }
-    }
-
-}
-__kernel void svm_rbf(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
-                      int width, TYPE gamma)
-{
-    const int  col = get_global_id(0);
-    const int  row = get_global_id(1);
-
-    if(row < src_rows && col < src2_cols)
-    {
-        int t = 0;
-        TYPE temp = 0.0;
-        for(t = 0; t < width - BLOCK_SIZE; t += BLOCK_SIZE)
-        {
-            float16 t0 = vload16(0, src + row * src_step + t);
-            float16 t1 = vload16(0, src2 + col * src2_step + t);
-            t0 = (t0 - t1) * (t0 - t1);
-            temp += t0.s0 + t0.s1 + t0.s2 + t0.s3 + t0.s4 + t0.s5 + t0.s6 + t0.s7 +
-                    t0.s8 + t0.s9 + t0.sa + t0.sb + t0.sc + t0.sd + t0.se + t0.sf;
-        }
-        for(; t < width; t++)
-        {
-            temp += (src[row * src_step + t] - src2[col * src2_step + t]) * (src[row * src_step + t] - src2[col * src2_step + t]);
-        }
-        TYPE temp1 = EXP((TYPE)(temp * gamma));
-
-        if( temp1 > MAX_VAL )
-        {
-            dst[row * dst_step + col] = MAX_VAL;
-        }
-        else
-        {
-            dst[row * dst_step + col] = temp1;
-        }
-    }
-}
diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl
deleted file mode 100644
index b488e89..0000000
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ /dev/null
@@ -1,386 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
-                                     __global float* dx, __global float* dy, int dx_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if((x < src_col)&&(y < src_row))
-    {
-        int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
-        int src_x2 = (x - 1) > 0 ? (x -1) : 0;
-        dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
-
-        int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
-        int src_y2 = (y - 1) > 0 ? (y - 1) : 0;
-        dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]);
-    }
-
-}
-
-inline float bicubicCoeff(float x_)
-{
-
-    float x = fabs(x_);
-    if (x <= 1.0f)
-        return x * x * (1.5f * x - 2.5f) + 1.0f;
-    else if (x < 2.0f)
-        return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
-    else
-        return 0.0f;
-}
-
-__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
-    image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
-    __global const float* u1, int u1_step,
-    __global const float* u2,
-    __global float* I1w,
-    __global float* I1wx, /*int I1wx_step,*/
-    __global float* I1wy, /*int I1wy_step,*/
-    __global float* grad, /*int grad_step,*/
-    __global float* rho,
-    int I1w_step,
-    int u2_step,
-    int u1_offset_x,
-    int u1_offset_y,
-    int u2_offset_x,
-    int u2_offset_y)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < I0_col&&y < I0_row)
-    {
-        //float u1Val = u1(y, x);
-        float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-        //float u2Val = u2(y, x);
-        float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
-
-        float wx = x + u1Val;
-        float wy = y + u2Val;
-
-        int xmin = ceil(wx - 2.0f);
-        int xmax = floor(wx + 2.0f);
-
-        int ymin = ceil(wy - 2.0f);
-        int ymax = floor(wy + 2.0f);
-
-        float sum  = 0.0f;
-        float sumx = 0.0f;
-        float sumy = 0.0f;
-        float wsum = 0.0f;
-        sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
-
-        for (int cy = ymin; cy <= ymax; ++cy)
-        {
-            for (int cx = xmin; cx <= xmax; ++cx)
-            {
-                float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
-
-                //sum  += w * tex2D(tex_I1 , cx, cy);
-                int2 cood = (int2)(cx, cy);
-                sum += w * read_imagef(tex_I1, sampleri, cood).x;
-                //sumx += w * tex2D(tex_I1x, cx, cy);
-                sumx += w * read_imagef(tex_I1x, sampleri, cood).x;
-                //sumy += w * tex2D(tex_I1y, cx, cy);
-                sumy += w * read_imagef(tex_I1y, sampleri, cood).x;
-
-                wsum += w;
-            }
-        }
-
-        float coeff = 1.0f / wsum;
-
-        float I1wVal  = sum  * coeff;
-        float I1wxVal = sumx * coeff;
-        float I1wyVal = sumy * coeff;
-
-        I1w[y * I1w_step + x]  = I1wVal;
-        I1wx[y * I1w_step + x] = I1wxVal;
-        I1wy[y * I1w_step + x] = I1wyVal;
-
-        float Ix2 = I1wxVal * I1wxVal;
-        float Iy2 = I1wyVal * I1wyVal;
-
-        // store the |Grad(I1)|^2
-        grad[y * I1w_step + x] = Ix2 + Iy2;
-
-        // compute the constant part of the rho function
-        float I0Val = I0[y * I0_step + x];
-        rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
-    }
-
-}
-
-inline float readImage(__global float *image,  int x,  int y,  int rows,  int cols, int elemCntPerRow)
-{
-    int i0 = clamp(x, 0, cols - 1);
-    int j0 = clamp(y, 0, rows - 1);
-
-    return image[j0 * elemCntPerRow + i0];
-}
-
-__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,
-    __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
-    __global const float* u1, int u1_step,
-    __global const float* u2,
-    __global float* I1w,
-    __global float* I1wx, /*int I1wx_step,*/
-    __global float* I1wy, /*int I1wy_step,*/
-    __global float* grad, /*int grad_step,*/
-    __global float* rho,
-    int I1w_step,
-    int u2_step,
-    int I1_step,
-    int I1x_step)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < I0_col&&y < I0_row)
-    {
-        //float u1Val = u1(y, x);
-        float u1Val = u1[y * u1_step + x];
-        //float u2Val = u2(y, x);
-        float u2Val = u2[y * u2_step + x];
-
-        float wx = x + u1Val;
-        float wy = y + u2Val;
-
-        int xmin = ceil(wx - 2.0f);
-        int xmax = floor(wx + 2.0f);
-
-        int ymin = ceil(wy - 2.0f);
-        int ymax = floor(wy + 2.0f);
-
-        float sum  = 0.0f;
-        float sumx = 0.0f;
-        float sumy = 0.0f;
-        float wsum = 0.0f;
-
-        for (int cy = ymin; cy <= ymax; ++cy)
-        {
-            for (int cx = xmin; cx <= xmax; ++cx)
-            {
-                float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
-
-                int2 cood = (int2)(cx, cy);
-                sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
-                sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);
-                sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);
-                wsum += w;
-            }
-        }
-
-        float coeff = 1.0f / wsum;
-
-        float I1wVal  = sum  * coeff;
-        float I1wxVal = sumx * coeff;
-        float I1wyVal = sumy * coeff;
-
-        I1w[y * I1w_step + x]  = I1wVal;
-        I1wx[y * I1w_step + x] = I1wxVal;
-        I1wy[y * I1w_step + x] = I1wyVal;
-
-        float Ix2 = I1wxVal * I1wxVal;
-        float Iy2 = I1wyVal * I1wyVal;
-
-        // store the |Grad(I1)|^2
-        grad[y * I1w_step + x] = Ix2 + Iy2;
-
-        // compute the constant part of the rho function
-        float I0Val = I0[y * I0_step + x];
-        rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
-    }
-
-}
-
-
-__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
-    __global const float* u2,
-    __global float* p11, int p11_step,
-    __global float* p12,
-    __global float* p21,
-    __global float* p22,
-    float taut,
-    int u2_step,
-    int u1_offset_x,
-    int u1_offset_y,
-    int u2_offset_x,
-    int u2_offset_y)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < u1_col && y < u1_row)
-    {
-        int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
-        float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-
-        int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
-        float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-
-        int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
-        float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
-
-        int src_y2 = (y + 1) <  (u1_row - 1) ? (y + 1) : (u1_row - 1);
-        float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
-
-        float g1 = hypot(u1x, u1y);
-        float g2 = hypot(u2x, u2y);
-
-        float ng1 = 1.0f + taut * g1;
-        float ng2 = 1.0f + taut * g2;
-
-        p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;
-        p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;
-        p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2;
-        p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2;
-    }
-
-}
-
-inline float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
-{
-
-    if (x > 0 && y > 0)
-    {
-        float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
-        float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
-        return v1x + v2y;
-    }
-    else
-    {
-        if (y > 0)
-            return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0];
-        else
-        {
-            if (x > 0)
-                return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x];
-            else
-                return v1[0 * v1_step + 0] + v2[0 * v2_step + 0];
-        }
-    }
-
-}
-
-__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,
-    __global const float* I1wy, /*int I1wy_step,*/
-    __global const float* grad, /*int grad_step,*/
-    __global const float* rho_c, /*int rho_c_step,*/
-    __global const float* p11, /*int p11_step,*/
-    __global const float* p12, /*int p12_step,*/
-    __global const float* p21, /*int p21_step,*/
-    __global const float* p22, /*int p22_step,*/
-    __global float* u1, int u1_step,
-    __global float* u2,
-    __global float* error, float l_t, float theta, int u2_step,
-    int u1_offset_x,
-    int u1_offset_y,
-    int u2_offset_x,
-    int u2_offset_y,
-    char calc_error)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < I1wx_col && y < I1wx_row)
-    {
-        float I1wxVal = I1wx[y * I1wx_step + x];
-        float I1wyVal = I1wy[y * I1wx_step + x];
-        float gradVal = grad[y * I1wx_step + x];
-        float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-        float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
-
-        float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
-
-        // estimate the values of the variable (v1, v2) (thresholding operator TH)
-
-        float d1 = 0.0f;
-        float d2 = 0.0f;
-
-        if (rho < -l_t * gradVal)
-        {
-            d1 = l_t * I1wxVal;
-            d2 = l_t * I1wyVal;
-        }
-        else if (rho > l_t * gradVal)
-        {
-            d1 = -l_t * I1wxVal;
-            d2 = -l_t * I1wyVal;
-        }
-        else if (gradVal > 1.192092896e-07f)
-        {
-            float fi = -rho / gradVal;
-            d1 = fi * I1wxVal;
-            d2 = fi * I1wyVal;
-        }
-
-        float v1 = u1OldVal + d1;
-        float v2 = u2OldVal + d2;
-
-        // compute the divergence of the dual variable (p1, p2)
-
-        float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
-        float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
-
-        // estimate the values of the optical flow (u1, u2)
-
-        float u1NewVal = v1 + theta * div_p1;
-        float u2NewVal = v2 + theta * div_p2;
-
-        u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
-        u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
-
-        if(calc_error)
-        {
-            float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
-            float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
-            error[y * I1wx_step + x] = n1 + n2;
-        }
-    }
-}
diff --git a/modules/ocl/src/optical_flow_farneback.cpp b/modules/ocl/src/optical_flow_farneback.cpp
deleted file mode 100644
index 198f910..0000000
--- a/modules/ocl/src/optical_flow_farneback.cpp
+++ /dev/null
@@ -1,542 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//      Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-#include "opencv2/video/tracking.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-#define MIN_SIZE 32
-
-namespace cv {
-namespace ocl {
-namespace optflow_farneback
-{
-oclMat g;
-oclMat xg;
-oclMat xxg;
-oclMat gKer;
-
-float ig[4];
-
-inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
-{
-    cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
-    gKer.upload(t_gKer);
-}
-
-static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
-{
-    String kernelName("gaussianBlur");
-#ifdef ANDROID
-    size_t localThreads[3] = { 128, 1, 1 };
-#else
-    size_t localThreads[3] = { 256, 1, 1 };
-#endif
-    size_t globalThreads[3] = { src.cols, src.rows, 1 };
-    int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
-
-    CV_Assert(dst.size() == src.size());
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
-    args.push_back(std::make_pair(smem_size, (void *)NULL));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
-
-    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
-                        globalThreads, localThreads, args, -1, -1);
-}
-
-static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
-{
-    String kernelName("polynomialExpansion");
-
-#ifdef ANDROID
-    size_t localThreads[3] = { 128, 1, 1 };
-#else
-    size_t localThreads[3] = { 256, 1, 1 };
-#endif
-    size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
-    int smem_size = 3 * localThreads[0] * sizeof(float);
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data));
-    args.push_back(std::make_pair(smem_size, (void *)NULL));
-    args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
-
-    char opt [128];
-    sprintf(opt, "-D polyN=%d", polyN);
-
-    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
-                        globalThreads, localThreads, args, -1, -1, opt);
-}
-
-static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
-{
-    String kernelName("updateMatrices");
-#ifdef ANDROID
-    size_t localThreads[3] = { 32, 4, 1 };
-#else
-    size_t localThreads[3] = { 32, 8, 1 };
-#endif
-    size_t globalThreads[3] = { flowx.cols, flowx.rows, 1 };
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step));
-
-    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
-                        globalThreads, localThreads, args, -1, -1);
-}
-
-static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
-{
-    String kernelName("boxFilter5");
-    int height = src.rows / 5;
-#ifdef ANDROID
-    size_t localThreads[3] = { 128, 1, 1 };
-#else
-    size_t localThreads[3] = { 256, 1, 1 };
-#endif
-    size_t globalThreads[3] = { src.cols, height, 1 };
-    int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back(std::make_pair(smem_size, (void *)NULL));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
-
-    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
-                        globalThreads, localThreads, args, -1, -1);
-}
-
-static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
-{
-    String kernelName("updateFlow");
-    int cols = divUp(flowx.cols, 4);
-#ifdef ANDROID
-    size_t localThreads[3] = { 32, 4, 1 };
-#else
-    size_t localThreads[3] = { 32, 8, 1 };
-#endif
-    size_t globalThreads[3] = { cols, flowx.rows, 1 };
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
-
-    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
-                        globalThreads, localThreads, args, -1, -1);
-}
-
-static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
-{
-    String kernelName("gaussianBlur5");
-    int height = src.rows / 5;
-#ifdef ANDROID
-    size_t localThreads[3] = { 128, 1, 1 };
-#else
-    size_t localThreads[3] = { 256, 1, 1 };
-#endif
-    size_t globalThreads[3] = { src.cols, height, 1 };
-    int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
-    args.push_back(std::make_pair(smem_size, (void *)NULL));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
-
-    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
-                        globalThreads, localThreads, args, -1, -1);
-}
-}
-}
-} // namespace cv { namespace ocl { namespace optflow_farneback
-
-static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
-{
-    if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
-        return mat(Rect(0, 0, cols, rows));
-    return mat = oclMat(rows, cols, type);
-}
-
-cv::ocl::FarnebackOpticalFlow::FarnebackOpticalFlow()
-{
-    numLevels = 5;
-    pyrScale = 0.5;
-    fastPyramids = false;
-    winSize = 13;
-    numIters = 10;
-    polyN = 5;
-    polySigma = 1.1;
-    flags = 0;
-}
-
-void cv::ocl::FarnebackOpticalFlow::releaseMemory()
-{
-    frames_[0].release();
-    frames_[1].release();
-    pyrLevel_[0].release();
-    pyrLevel_[1].release();
-    M_.release();
-    bufM_.release();
-    R_[0].release();
-    R_[1].release();
-    blurredFrame_[0].release();
-    blurredFrame_[1].release();
-    pyramid0_.clear();
-    pyramid1_.clear();
-}
-
-void cv::ocl::FarnebackOpticalFlow::prepareGaussian(
-    int n, double sigma, float *g, float *xg, float *xxg,
-    double &ig11, double &ig03, double &ig33, double &ig55)
-{
-    double s = 0.;
-    for (int x = -n; x <= n; x++)
-    {
-        g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
-        s += g[x];
-    }
-
-    s = 1./s;
-    for (int x = -n; x <= n; x++)
-    {
-        g[x] = (float)(g[x]*s);
-        xg[x] = (float)(x*g[x]);
-        xxg[x] = (float)(x*x*g[x]);
-    }
-
-    Mat_<double> G(6, 6);
-    G.setTo(0);
-
-    for (int y = -n; y <= n; y++)
-    {
-        for (int x = -n; x <= n; x++)
-        {
-            G(0,0) += g[y]*g[x];
-            G(1,1) += g[y]*g[x]*x*x;
-            G(3,3) += g[y]*g[x]*x*x*x*x;
-            G(5,5) += g[y]*g[x]*x*x*y*y;
-        }
-    }
-
-    //G[0][0] = 1.;
-    G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
-    G(4,4) = G(3,3);
-    G(3,4) = G(4,3) = G(5,5);
-
-    // invG:
-    // [ x        e  e    ]
-    // [    y             ]
-    // [       y          ]
-    // [ e        z       ]
-    // [ e           z    ]
-    // [                u ]
-    Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
-
-    ig11 = invG(1,1);
-    ig03 = invG(0,3);
-    ig33 = invG(3,3);
-    ig55 = invG(5,5);
-}
-
-void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
-{
-    std::vector<float> buf(n*6 + 3);
-    float* g = &buf[0] + n;
-    float* xg = g + n*2 + 1;
-    float* xxg = xg + n*2 + 1;
-
-    if (sigma < FLT_EPSILON)
-        sigma = n*0.3;
-
-    double ig11, ig03, ig33, ig55;
-    prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
-
-    cv::Mat t_g(1, n + 1, CV_32FC1, g);
-    cv::Mat t_xg(1, n + 1, CV_32FC1, xg);
-    cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg);
-
-    optflow_farneback::g.upload(t_g);
-    optflow_farneback::xg.upload(t_xg);
-    optflow_farneback::xxg.upload(t_xxg);
-
-    optflow_farneback::ig[0] = static_cast<float>(ig11);
-    optflow_farneback::ig[1] = static_cast<float>(ig03);
-    optflow_farneback::ig[2] = static_cast<float>(ig33);
-    optflow_farneback::ig[3] = static_cast<float>(ig55);
-}
-
-void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
-    const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
-    oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
-{
-    optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM);
-
-    swap(M, bufM);
-
-    optflow_farneback::updateFlowOcl(M, flowx, flowy);
-
-    if (updateMatrices)
-        optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
-}
-
-
-void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
-    const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
-    oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
-{
-    optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM);
-
-    swap(M, bufM);
-
-    optflow_farneback::updateFlowOcl(M, flowx, flowy);
-
-    if (updateMatrices)
-        optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
-}
-
-
-void cv::ocl::FarnebackOpticalFlow::operator ()(
-    const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
-{
-    CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
-    CV_Assert(frame0.size() == frame1.size());
-    CV_Assert(polyN == 5 || polyN == 7);
-    CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
-
-    Size size = frame0.size();
-    oclMat prevFlowX, prevFlowY, curFlowX, curFlowY;
-
-    flowx.create(size, CV_32F);
-    flowy.create(size, CV_32F);
-    oclMat flowx0 = flowx;
-    oclMat flowy0 = flowy;
-
-    // Crop unnecessary levels
-    double scale = 1;
-    int numLevelsCropped = 0;
-    for (; numLevelsCropped < numLevels; numLevelsCropped++)
-    {
-        scale *= pyrScale;
-        if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
-            break;
-    }
-
-    frame0.convertTo(frames_[0], CV_32F);
-    frame1.convertTo(frames_[1], CV_32F);
-
-    if (fastPyramids)
-    {
-        // Build Gaussian pyramids using pyrDown()
-        pyramid0_.resize(numLevelsCropped + 1);
-        pyramid1_.resize(numLevelsCropped + 1);
-        pyramid0_[0] = frames_[0];
-        pyramid1_[0] = frames_[1];
-        for (int i = 1; i <= numLevelsCropped; ++i)
-        {
-            pyrDown(pyramid0_[i - 1], pyramid0_[i]);
-            pyrDown(pyramid1_[i - 1], pyramid1_[i]);
-        }
-    }
-
-    setPolynomialExpansionConsts(polyN, polySigma);
-
-    for (int k = numLevelsCropped; k >= 0; k--)
-    {
-        scale = 1;
-        for (int i = 0; i < k; i++)
-            scale *= pyrScale;
-
-        double sigma = (1./scale - 1) * 0.5;
-        int smoothSize = cvRound(sigma*5) | 1;
-        smoothSize = std::max(smoothSize, 3);
-
-        int width = cvRound(size.width*scale);
-        int height = cvRound(size.height*scale);
-
-        if (fastPyramids)
-        {
-            width = pyramid0_[k].cols;
-            height = pyramid0_[k].rows;
-        }
-
-        if (k > 0)
-        {
-            curFlowX.create(height, width, CV_32F);
-            curFlowY.create(height, width, CV_32F);
-        }
-        else
-        {
-            curFlowX = flowx0;
-            curFlowY = flowy0;
-        }
-
-        if (!prevFlowX.data)
-        {
-            if (flags & cv::OPTFLOW_USE_INITIAL_FLOW)
-            {
-                resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
-                resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
-                multiply(scale, curFlowX, curFlowX);
-                multiply(scale, curFlowY, curFlowY);
-            }
-            else
-            {
-                curFlowX.setTo(0);
-                curFlowY.setTo(0);
-            }
-        }
-        else
-        {
-            resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
-            resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
-            multiply(1./pyrScale, curFlowX, curFlowX);
-            multiply(1./pyrScale, curFlowY, curFlowY);
-        }
-
-        oclMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
-        oclMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
-        oclMat R[2] =
-        {
-            allocMatFromBuf(5*height, width, CV_32F, R_[0]),
-            allocMatFromBuf(5*height, width, CV_32F, R_[1])
-        };
-
-        if (fastPyramids)
-        {
-            optflow_farneback::polynomialExpansionOcl(pyramid0_[k], polyN, R[0]);
-            optflow_farneback::polynomialExpansionOcl(pyramid1_[k], polyN, R[1]);
-        }
-        else
-        {
-            oclMat blurredFrame[2] =
-            {
-                allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
-                allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
-            };
-            oclMat pyrLevel[2] =
-            {
-                allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
-                allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
-            };
-
-            Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
-            optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
-
-            for (int i = 0; i < 2; i++)
-            {
-                optflow_farneback::gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]);
-                resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR);
-                optflow_farneback::polynomialExpansionOcl(pyrLevel[i], polyN, R[i]);
-            }
-        }
-
-        optflow_farneback::updateMatricesOcl(curFlowX, curFlowY, R[0], R[1], M);
-
-        if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
-        {
-            Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
-            optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
-        }
-        for (int i = 0; i < numIters; i++)
-        {
-            if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
-                updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
-            else
-                updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
-        }
-
-        prevFlowX = curFlowX;
-        prevFlowY = curFlowY;
-    }
-
-    flowx = curFlowX;
-    flowy = curFlowY;
-}
diff --git a/modules/ocl/src/orb.cpp b/modules/ocl/src/orb.cpp
deleted file mode 100644
index 4bd022c..0000000
--- a/modules/ocl/src/orb.cpp
+++ /dev/null
@@ -1,916 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Peter Andreas Entschev, peter@entschev.com
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace
-{
-    const float HARRIS_K = 0.04f;
-    const int DESCRIPTOR_SIZE = 32;
-
-    const int bit_pattern_31_[256 * 4] =
-    {
-        8,-3, 9,5/*mean (0), correlation (0)*/,
-        4,2, 7,-12/*mean (1.12461e-05), correlation (0.0437584)*/,
-        -11,9, -8,2/*mean (3.37382e-05), correlation (0.0617409)*/,
-        7,-12, 12,-13/*mean (5.62303e-05), correlation (0.0636977)*/,
-        2,-13, 2,12/*mean (0.000134953), correlation (0.085099)*/,
-        1,-7, 1,6/*mean (0.000528565), correlation (0.0857175)*/,
-        -2,-10, -2,-4/*mean (0.0188821), correlation (0.0985774)*/,
-        -13,-13, -11,-8/*mean (0.0363135), correlation (0.0899616)*/,
-        -13,-3, -12,-9/*mean (0.121806), correlation (0.099849)*/,
-        10,4, 11,9/*mean (0.122065), correlation (0.093285)*/,
-        -13,-8, -8,-9/*mean (0.162787), correlation (0.0942748)*/,
-        -11,7, -9,12/*mean (0.21561), correlation (0.0974438)*/,
-        7,7, 12,6/*mean (0.160583), correlation (0.130064)*/,
-        -4,-5, -3,0/*mean (0.228171), correlation (0.132998)*/,
-        -13,2, -12,-3/*mean (0.00997526), correlation (0.145926)*/,
-        -9,0, -7,5/*mean (0.198234), correlation (0.143636)*/,
-        12,-6, 12,-1/*mean (0.0676226), correlation (0.16689)*/,
-        -3,6, -2,12/*mean (0.166847), correlation (0.171682)*/,
-        -6,-13, -4,-8/*mean (0.101215), correlation (0.179716)*/,
-        11,-13, 12,-8/*mean (0.200641), correlation (0.192279)*/,
-        4,7, 5,1/*mean (0.205106), correlation (0.186848)*/,
-        5,-3, 10,-3/*mean (0.234908), correlation (0.192319)*/,
-        3,-7, 6,12/*mean (0.0709964), correlation (0.210872)*/,
-        -8,-7, -6,-2/*mean (0.0939834), correlation (0.212589)*/,
-        -2,11, -1,-10/*mean (0.127778), correlation (0.20866)*/,
-        -13,12, -8,10/*mean (0.14783), correlation (0.206356)*/,
-        -7,3, -5,-3/*mean (0.182141), correlation (0.198942)*/,
-        -4,2, -3,7/*mean (0.188237), correlation (0.21384)*/,
-        -10,-12, -6,11/*mean (0.14865), correlation (0.23571)*/,
-        5,-12, 6,-7/*mean (0.222312), correlation (0.23324)*/,
-        5,-6, 7,-1/*mean (0.229082), correlation (0.23389)*/,
-        1,0, 4,-5/*mean (0.241577), correlation (0.215286)*/,
-        9,11, 11,-13/*mean (0.00338507), correlation (0.251373)*/,
-        4,7, 4,12/*mean (0.131005), correlation (0.257622)*/,
-        2,-1, 4,4/*mean (0.152755), correlation (0.255205)*/,
-        -4,-12, -2,7/*mean (0.182771), correlation (0.244867)*/,
-        -8,-5, -7,-10/*mean (0.186898), correlation (0.23901)*/,
-        4,11, 9,12/*mean (0.226226), correlation (0.258255)*/,
-        0,-8, 1,-13/*mean (0.0897886), correlation (0.274827)*/,
-        -13,-2, -8,2/*mean (0.148774), correlation (0.28065)*/,
-        -3,-2, -2,3/*mean (0.153048), correlation (0.283063)*/,
-        -6,9, -4,-9/*mean (0.169523), correlation (0.278248)*/,
-        8,12, 10,7/*mean (0.225337), correlation (0.282851)*/,
-        0,9, 1,3/*mean (0.226687), correlation (0.278734)*/,
-        7,-5, 11,-10/*mean (0.00693882), correlation (0.305161)*/,
-        -13,-6, -11,0/*mean (0.0227283), correlation (0.300181)*/,
-        10,7, 12,1/*mean (0.125517), correlation (0.31089)*/,
-        -6,-3, -6,12/*mean (0.131748), correlation (0.312779)*/,
-        10,-9, 12,-4/*mean (0.144827), correlation (0.292797)*/,
-        -13,8, -8,-12/*mean (0.149202), correlation (0.308918)*/,
-        -13,0, -8,-4/*mean (0.160909), correlation (0.310013)*/,
-        3,3, 7,8/*mean (0.177755), correlation (0.309394)*/,
-        5,7, 10,-7/*mean (0.212337), correlation (0.310315)*/,
-        -1,7, 1,-12/*mean (0.214429), correlation (0.311933)*/,
-        3,-10, 5,6/*mean (0.235807), correlation (0.313104)*/,
-        2,-4, 3,-10/*mean (0.00494827), correlation (0.344948)*/,
-        -13,0, -13,5/*mean (0.0549145), correlation (0.344675)*/,
-        -13,-7, -12,12/*mean (0.103385), correlation (0.342715)*/,
-        -13,3, -11,8/*mean (0.134222), correlation (0.322922)*/,
-        -7,12, -4,7/*mean (0.153284), correlation (0.337061)*/,
-        6,-10, 12,8/*mean (0.154881), correlation (0.329257)*/,
-        -9,-1, -7,-6/*mean (0.200967), correlation (0.33312)*/,
-        -2,-5, 0,12/*mean (0.201518), correlation (0.340635)*/,
-        -12,5, -7,5/*mean (0.207805), correlation (0.335631)*/,
-        3,-10, 8,-13/*mean (0.224438), correlation (0.34504)*/,
-        -7,-7, -4,5/*mean (0.239361), correlation (0.338053)*/,
-        -3,-2, -1,-7/*mean (0.240744), correlation (0.344322)*/,
-        2,9, 5,-11/*mean (0.242949), correlation (0.34145)*/,
-        -11,-13, -5,-13/*mean (0.244028), correlation (0.336861)*/,
-        -1,6, 0,-1/*mean (0.247571), correlation (0.343684)*/,
-        5,-3, 5,2/*mean (0.000697256), correlation (0.357265)*/,
-        -4,-13, -4,12/*mean (0.00213675), correlation (0.373827)*/,
-        -9,-6, -9,6/*mean (0.0126856), correlation (0.373938)*/,
-        -12,-10, -8,-4/*mean (0.0152497), correlation (0.364237)*/,
-        10,2, 12,-3/*mean (0.0299933), correlation (0.345292)*/,
-        7,12, 12,12/*mean (0.0307242), correlation (0.366299)*/,
-        -7,-13, -6,5/*mean (0.0534975), correlation (0.368357)*/,
-        -4,9, -3,4/*mean (0.099865), correlation (0.372276)*/,
-        7,-1, 12,2/*mean (0.117083), correlation (0.364529)*/,
-        -7,6, -5,1/*mean (0.126125), correlation (0.369606)*/,
-        -13,11, -12,5/*mean (0.130364), correlation (0.358502)*/,
-        -3,7, -2,-6/*mean (0.131691), correlation (0.375531)*/,
-        7,-8, 12,-7/*mean (0.160166), correlation (0.379508)*/,
-        -13,-7, -11,-12/*mean (0.167848), correlation (0.353343)*/,
-        1,-3, 12,12/*mean (0.183378), correlation (0.371916)*/,
-        2,-6, 3,0/*mean (0.228711), correlation (0.371761)*/,
-        -4,3, -2,-13/*mean (0.247211), correlation (0.364063)*/,
-        -1,-13, 1,9/*mean (0.249325), correlation (0.378139)*/,
-        7,1, 8,-6/*mean (0.000652272), correlation (0.411682)*/,
-        1,-1, 3,12/*mean (0.00248538), correlation (0.392988)*/,
-        9,1, 12,6/*mean (0.0206815), correlation (0.386106)*/,
-        -1,-9, -1,3/*mean (0.0364485), correlation (0.410752)*/,
-        -13,-13, -10,5/*mean (0.0376068), correlation (0.398374)*/,
-        7,7, 10,12/*mean (0.0424202), correlation (0.405663)*/,
-        12,-5, 12,9/*mean (0.0942645), correlation (0.410422)*/,
-        6,3, 7,11/*mean (0.1074), correlation (0.413224)*/,
-        5,-13, 6,10/*mean (0.109256), correlation (0.408646)*/,
-        2,-12, 2,3/*mean (0.131691), correlation (0.416076)*/,
-        3,8, 4,-6/*mean (0.165081), correlation (0.417569)*/,
-        2,6, 12,-13/*mean (0.171874), correlation (0.408471)*/,
-        9,-12, 10,3/*mean (0.175146), correlation (0.41296)*/,
-        -8,4, -7,9/*mean (0.183682), correlation (0.402956)*/,
-        -11,12, -4,-6/*mean (0.184672), correlation (0.416125)*/,
-        1,12, 2,-8/*mean (0.191487), correlation (0.386696)*/,
-        6,-9, 7,-4/*mean (0.192668), correlation (0.394771)*/,
-        2,3, 3,-2/*mean (0.200157), correlation (0.408303)*/,
-        6,3, 11,0/*mean (0.204588), correlation (0.411762)*/,
-        3,-3, 8,-8/*mean (0.205904), correlation (0.416294)*/,
-        7,8, 9,3/*mean (0.213237), correlation (0.409306)*/,
-        -11,-5, -6,-4/*mean (0.243444), correlation (0.395069)*/,
-        -10,11, -5,10/*mean (0.247672), correlation (0.413392)*/,
-        -5,-8, -3,12/*mean (0.24774), correlation (0.411416)*/,
-        -10,5, -9,0/*mean (0.00213675), correlation (0.454003)*/,
-        8,-1, 12,-6/*mean (0.0293635), correlation (0.455368)*/,
-        4,-6, 6,-11/*mean (0.0404971), correlation (0.457393)*/,
-        -10,12, -8,7/*mean (0.0481107), correlation (0.448364)*/,
-        4,-2, 6,7/*mean (0.050641), correlation (0.455019)*/,
-        -2,0, -2,12/*mean (0.0525978), correlation (0.44338)*/,
-        -5,-8, -5,2/*mean (0.0629667), correlation (0.457096)*/,
-        7,-6, 10,12/*mean (0.0653846), correlation (0.445623)*/,
-        -9,-13, -8,-8/*mean (0.0858749), correlation (0.449789)*/,
-        -5,-13, -5,-2/*mean (0.122402), correlation (0.450201)*/,
-        8,-8, 9,-13/*mean (0.125416), correlation (0.453224)*/,
-        -9,-11, -9,0/*mean (0.130128), correlation (0.458724)*/,
-        1,-8, 1,-2/*mean (0.132467), correlation (0.440133)*/,
-        7,-4, 9,1/*mean (0.132692), correlation (0.454)*/,
-        -2,1, -1,-4/*mean (0.135695), correlation (0.455739)*/,
-        11,-6, 12,-11/*mean (0.142904), correlation (0.446114)*/,
-        -12,-9, -6,4/*mean (0.146165), correlation (0.451473)*/,
-        3,7, 7,12/*mean (0.147627), correlation (0.456643)*/,
-        5,5, 10,8/*mean (0.152901), correlation (0.455036)*/,
-        0,-4, 2,8/*mean (0.167083), correlation (0.459315)*/,
-        -9,12, -5,-13/*mean (0.173234), correlation (0.454706)*/,
-        0,7, 2,12/*mean (0.18312), correlation (0.433855)*/,
-        -1,2, 1,7/*mean (0.185504), correlation (0.443838)*/,
-        5,11, 7,-9/*mean (0.185706), correlation (0.451123)*/,
-        3,5, 6,-8/*mean (0.188968), correlation (0.455808)*/,
-        -13,-4, -8,9/*mean (0.191667), correlation (0.459128)*/,
-        -5,9, -3,-3/*mean (0.193196), correlation (0.458364)*/,
-        -4,-7, -3,-12/*mean (0.196536), correlation (0.455782)*/,
-        6,5, 8,0/*mean (0.1972), correlation (0.450481)*/,
-        -7,6, -6,12/*mean (0.199438), correlation (0.458156)*/,
-        -13,6, -5,-2/*mean (0.211224), correlation (0.449548)*/,
-        1,-10, 3,10/*mean (0.211718), correlation (0.440606)*/,
-        4,1, 8,-4/*mean (0.213034), correlation (0.443177)*/,
-        -2,-2, 2,-13/*mean (0.234334), correlation (0.455304)*/,
-        2,-12, 12,12/*mean (0.235684), correlation (0.443436)*/,
-        -2,-13, 0,-6/*mean (0.237674), correlation (0.452525)*/,
-        4,1, 9,3/*mean (0.23962), correlation (0.444824)*/,
-        -6,-10, -3,-5/*mean (0.248459), correlation (0.439621)*/,
-        -3,-13, -1,1/*mean (0.249505), correlation (0.456666)*/,
-        7,5, 12,-11/*mean (0.00119208), correlation (0.495466)*/,
-        4,-2, 5,-7/*mean (0.00372245), correlation (0.484214)*/,
-        -13,9, -9,-5/*mean (0.00741116), correlation (0.499854)*/,
-        7,1, 8,6/*mean (0.0208952), correlation (0.499773)*/,
-        7,-8, 7,6/*mean (0.0220085), correlation (0.501609)*/,
-        -7,-4, -7,1/*mean (0.0233806), correlation (0.496568)*/,
-        -8,11, -7,-8/*mean (0.0236505), correlation (0.489719)*/,
-        -13,6, -12,-8/*mean (0.0268781), correlation (0.503487)*/,
-        2,4, 3,9/*mean (0.0323324), correlation (0.501938)*/,
-        10,-5, 12,3/*mean (0.0399235), correlation (0.494029)*/,
-        -6,-5, -6,7/*mean (0.0420153), correlation (0.486579)*/,
-        8,-3, 9,-8/*mean (0.0548021), correlation (0.484237)*/,
-        2,-12, 2,8/*mean (0.0616622), correlation (0.496642)*/,
-        -11,-2, -10,3/*mean (0.0627755), correlation (0.498563)*/,
-        -12,-13, -7,-9/*mean (0.0829622), correlation (0.495491)*/,
-        -11,0, -10,-5/*mean (0.0843342), correlation (0.487146)*/,
-        5,-3, 11,8/*mean (0.0929937), correlation (0.502315)*/,
-        -2,-13, -1,12/*mean (0.113327), correlation (0.48941)*/,
-        -1,-8, 0,9/*mean (0.132119), correlation (0.467268)*/,
-        -13,-11, -12,-5/*mean (0.136269), correlation (0.498771)*/,
-        -10,-2, -10,11/*mean (0.142173), correlation (0.498714)*/,
-        -3,9, -2,-13/*mean (0.144141), correlation (0.491973)*/,
-        2,-3, 3,2/*mean (0.14892), correlation (0.500782)*/,
-        -9,-13, -4,0/*mean (0.150371), correlation (0.498211)*/,
-        -4,6, -3,-10/*mean (0.152159), correlation (0.495547)*/,
-        -4,12, -2,-7/*mean (0.156152), correlation (0.496925)*/,
-        -6,-11, -4,9/*mean (0.15749), correlation (0.499222)*/,
-        6,-3, 6,11/*mean (0.159211), correlation (0.503821)*/,
-        -13,11, -5,5/*mean (0.162427), correlation (0.501907)*/,
-        11,11, 12,6/*mean (0.16652), correlation (0.497632)*/,
-        7,-5, 12,-2/*mean (0.169141), correlation (0.484474)*/,
-        -1,12, 0,7/*mean (0.169456), correlation (0.495339)*/,
-        -4,-8, -3,-2/*mean (0.171457), correlation (0.487251)*/,
-        -7,1, -6,7/*mean (0.175), correlation (0.500024)*/,
-        -13,-12, -8,-13/*mean (0.175866), correlation (0.497523)*/,
-        -7,-2, -6,-8/*mean (0.178273), correlation (0.501854)*/,
-        -8,5, -6,-9/*mean (0.181107), correlation (0.494888)*/,
-        -5,-1, -4,5/*mean (0.190227), correlation (0.482557)*/,
-        -13,7, -8,10/*mean (0.196739), correlation (0.496503)*/,
-        1,5, 5,-13/*mean (0.19973), correlation (0.499759)*/,
-        1,0, 10,-13/*mean (0.204465), correlation (0.49873)*/,
-        9,12, 10,-1/*mean (0.209334), correlation (0.49063)*/,
-        5,-8, 10,-9/*mean (0.211134), correlation (0.503011)*/,
-        -1,11, 1,-13/*mean (0.212), correlation (0.499414)*/,
-        -9,-3, -6,2/*mean (0.212168), correlation (0.480739)*/,
-        -1,-10, 1,12/*mean (0.212731), correlation (0.502523)*/,
-        -13,1, -8,-10/*mean (0.21327), correlation (0.489786)*/,
-        8,-11, 10,-6/*mean (0.214159), correlation (0.488246)*/,
-        2,-13, 3,-6/*mean (0.216993), correlation (0.50287)*/,
-        7,-13, 12,-9/*mean (0.223639), correlation (0.470502)*/,
-        -10,-10, -5,-7/*mean (0.224089), correlation (0.500852)*/,
-        -10,-8, -8,-13/*mean (0.228666), correlation (0.502629)*/,
-        4,-6, 8,5/*mean (0.22906), correlation (0.498305)*/,
-        3,12, 8,-13/*mean (0.233378), correlation (0.503825)*/,
-        -4,2, -3,-3/*mean (0.234323), correlation (0.476692)*/,
-        5,-13, 10,-12/*mean (0.236392), correlation (0.475462)*/,
-        4,-13, 5,-1/*mean (0.236842), correlation (0.504132)*/,
-        -9,9, -4,3/*mean (0.236977), correlation (0.497739)*/,
-        0,3, 3,-9/*mean (0.24314), correlation (0.499398)*/,
-        -12,1, -6,1/*mean (0.243297), correlation (0.489447)*/,
-        3,2, 4,-8/*mean (0.00155196), correlation (0.553496)*/,
-        -10,-10, -10,9/*mean (0.00239541), correlation (0.54297)*/,
-        8,-13, 12,12/*mean (0.0034413), correlation (0.544361)*/,
-        -8,-12, -6,-5/*mean (0.003565), correlation (0.551225)*/,
-        2,2, 3,7/*mean (0.00835583), correlation (0.55285)*/,
-        10,6, 11,-8/*mean (0.00885065), correlation (0.540913)*/,
-        6,8, 8,-12/*mean (0.0101552), correlation (0.551085)*/,
-        -7,10, -6,5/*mean (0.0102227), correlation (0.533635)*/,
-        -3,-9, -3,9/*mean (0.0110211), correlation (0.543121)*/,
-        -1,-13, -1,5/*mean (0.0113473), correlation (0.550173)*/,
-        -3,-7, -3,4/*mean (0.0140913), correlation (0.554774)*/,
-        -8,-2, -8,3/*mean (0.017049), correlation (0.55461)*/,
-        4,2, 12,12/*mean (0.01778), correlation (0.546921)*/,
-        2,-5, 3,11/*mean (0.0224022), correlation (0.549667)*/,
-        6,-9, 11,-13/*mean (0.029161), correlation (0.546295)*/,
-        3,-1, 7,12/*mean (0.0303081), correlation (0.548599)*/,
-        11,-1, 12,4/*mean (0.0355151), correlation (0.523943)*/,
-        -3,0, -3,6/*mean (0.0417904), correlation (0.543395)*/,
-        4,-11, 4,12/*mean (0.0487292), correlation (0.542818)*/,
-        2,-4, 2,1/*mean (0.0575124), correlation (0.554888)*/,
-        -10,-6, -8,1/*mean (0.0594242), correlation (0.544026)*/,
-        -13,7, -11,1/*mean (0.0597391), correlation (0.550524)*/,
-        -13,12, -11,-13/*mean (0.0608974), correlation (0.55383)*/,
-        6,0, 11,-13/*mean (0.065126), correlation (0.552006)*/,
-        0,-1, 1,4/*mean (0.074224), correlation (0.546372)*/,
-        -13,3, -9,-2/*mean (0.0808592), correlation (0.554875)*/,
-        -9,8, -6,-3/*mean (0.0883378), correlation (0.551178)*/,
-        -13,-6, -8,-2/*mean (0.0901035), correlation (0.548446)*/,
-        5,-9, 8,10/*mean (0.0949843), correlation (0.554694)*/,
-        2,7, 3,-9/*mean (0.0994152), correlation (0.550979)*/,
-        -1,-6, -1,-1/*mean (0.10045), correlation (0.552714)*/,
-        9,5, 11,-2/*mean (0.100686), correlation (0.552594)*/,
-        11,-3, 12,-8/*mean (0.101091), correlation (0.532394)*/,
-        3,0, 3,5/*mean (0.101147), correlation (0.525576)*/,
-        -1,4, 0,10/*mean (0.105263), correlation (0.531498)*/,
-        3,-6, 4,5/*mean (0.110785), correlation (0.540491)*/,
-        -13,0, -10,5/*mean (0.112798), correlation (0.536582)*/,
-        5,8, 12,11/*mean (0.114181), correlation (0.555793)*/,
-        8,9, 9,-6/*mean (0.117431), correlation (0.553763)*/,
-        7,-4, 8,-12/*mean (0.118522), correlation (0.553452)*/,
-        -10,4, -10,9/*mean (0.12094), correlation (0.554785)*/,
-        7,3, 12,4/*mean (0.122582), correlation (0.555825)*/,
-        9,-7, 10,-2/*mean (0.124978), correlation (0.549846)*/,
-        7,0, 12,-2/*mean (0.127002), correlation (0.537452)*/,
-        -1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/
-    };
-
-    void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize)
-    {
-        RNG rng(0x12345678);
-
-        pattern.create(2, ntuples * tupleSize, CV_32SC1);
-        pattern.setTo(Scalar::all(0));
-
-        int* pattern_x_ptr = pattern.ptr<int>(0);
-        int* pattern_y_ptr = pattern.ptr<int>(1);
-
-        for (int i = 0; i < ntuples; i++)
-        {
-            for (int k = 0; k < tupleSize; k++)
-            {
-                for(;;)
-                {
-                    int idx = rng.uniform(0, poolSize);
-                    Point pt = pattern0[idx];
-
-                    int k1;
-                    for (k1 = 0; k1 < k; k1++)
-                        if (pattern_x_ptr[tupleSize * i + k1] == pt.x && pattern_y_ptr[tupleSize * i + k1] == pt.y)
-                            break;
-
-                    if (k1 == k)
-                    {
-                        pattern_x_ptr[tupleSize * i + k] = pt.x;
-                        pattern_y_ptr[tupleSize * i + k] = pt.y;
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    void makeRandomPattern(int patchSize, Point* pattern, int npoints)
-    {
-        // we always start with a fixed seed,
-        // to make patterns the same on each run
-        RNG rng(0x34985739);
-
-        for (int i = 0; i < npoints; i++)
-        {
-            pattern[i].x = rng.uniform(-patchSize / 2, patchSize / 2 + 1);
-            pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1);
-        }
-    }
-}
-
-cv::ocl::ORB_OCL::ORB_OCL(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) :
-    nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K),
-    scoreType_(scoreType), patchSize_(patchSize),
-    fastDetector_(DEFAULT_FAST_THRESHOLD)
-{
-    CV_Assert(patchSize_ >= 2);
-
-    // fill the extractors and descriptors for the corresponding scales
-    float factor = 1.0f / scaleFactor_;
-    float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_));
-
-    n_features_per_level_.resize(nLevels_);
-    size_t sum_n_features = 0;
-    for (int level = 0; level < nLevels_ - 1; ++level)
-    {
-        n_features_per_level_[level] = cvRound(n_desired_features_per_scale);
-        sum_n_features += n_features_per_level_[level];
-        n_desired_features_per_scale *= factor;
-    }
-    n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
-
-    // pre-compute the end of a row in a circular patch
-    int half_patch_size = patchSize_ / 2;
-    std::vector<int> u_max(half_patch_size + 2);
-    for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
-        u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
-
-    // Make sure we are symmetric
-    for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
-    {
-        while (u_max[v_0] == u_max[v_0 + 1])
-            ++v_0;
-        u_max[v] = v_0;
-        ++v_0;
-    }
-    CV_Assert(u_max.size() < 32);
-    //cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
-    uMax_ = oclMat(1, u_max.size(), CV_32SC1, &u_max[0]);
-
-    // Calc pattern
-    const int npoints = 512;
-    Point pattern_buf[npoints];
-    const Point* pattern0 = (const Point*)bit_pattern_31_;
-    if (patchSize_ != 31)
-    {
-        pattern0 = pattern_buf;
-        makeRandomPattern(patchSize_, pattern_buf, npoints);
-    }
-
-    CV_Assert(WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4);
-
-    Mat h_pattern;
-
-    if (WTA_K_ == 2)
-    {
-        h_pattern.create(2, npoints, CV_32SC1);
-
-        int* pattern_x_ptr = h_pattern.ptr<int>(0);
-        int* pattern_y_ptr = h_pattern.ptr<int>(1);
-
-        for (int i = 0; i < npoints; ++i)
-        {
-            pattern_x_ptr[i] = pattern0[i].x;
-            pattern_y_ptr[i] = pattern0[i].y;
-        }
-    }
-    else
-    {
-        int ntuples = descriptorSize() * 4;
-        initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
-    }
-
-    pattern_.upload(h_pattern);
-
-    //blurFilter = ocl::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
-    blurFilter = ocl::createGaussianFilter_GPU(CV_8UC1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
-
-    blurForDescriptor = true;
-}
-
-namespace
-{
-    inline float getScale(float scaleFactor, int firstLevel, int level)
-    {
-        return pow(scaleFactor, level - firstLevel);
-    }
-}
-
-void cv::ocl::ORB_OCL::buildScalePyramids(const oclMat& image, const oclMat& mask)
-{
-    CV_Assert(image.type() == CV_8UC1);
-    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
-
-    imagePyr_.resize(nLevels_);
-    maskPyr_.resize(nLevels_);
-
-    for (int level = 0; level < nLevels_; ++level)
-    {
-        float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
-
-        Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
-
-        ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
-        ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
-        maskPyr_[level].setTo(Scalar::all(255));
-
-        // Compute the resized image
-        if (level != firstLevel_)
-        {
-            if (level < firstLevel_)
-            {
-                ocl::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
-
-                if (!mask.empty())
-                    ocl::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
-            }
-            else
-            {
-                ocl::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
-
-                if (!mask.empty())
-                {
-                    ocl::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
-                    ocl::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
-                }
-            }
-        }
-        else
-        {
-            image.copyTo(imagePyr_[level]);
-
-            if (!mask.empty())
-                mask.copyTo(maskPyr_[level]);
-        }
-
-        // Filter keypoints by image border
-        ensureSizeIsEnough(sz, CV_8UC1, buf_);
-        buf_.setTo(Scalar::all(0));
-        Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
-        buf_(inner).setTo(Scalar::all(255));
-
-        ocl::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
-    }
-}
-
-static void HarrisResponses_OCL(const oclMat& img, oclMat& keypoints, const int npoints, int blockSize, float harris_k)
-{
-    size_t localThreads[3] = {32, 8, 1};
-    size_t globalThreads[3] = {divUp(npoints, localThreads[1]) * localThreads[1] * localThreads[0],
-                               1,
-                               1};
-
-    Context *clCxt = Context::getContext();
-    String kernelName = "HarrisResponses";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int imgStep = img.step / img.elemSize();
-    int keypointsStep = keypoints.step / keypoints.elemSize();
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&blockSize));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&harris_k));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&imgStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsStep));
-
-    bool is_cpu = isCpuDevice();
-    if (is_cpu)
-        openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1, (char*)"-D CPU");
-    else
-    {
-        cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &orb, kernelName);
-        int wave_size = (int)queryWaveFrontSize(kernel);
-        openCLSafeCall(clReleaseKernel(kernel));
-
-        std::string opt = format("-D WAVE_SIZE=%d", wave_size);
-        openCLExecuteKernel(Context::getContext(), &orb, kernelName, globalThreads, localThreads, args, -1, -1, opt.c_str());
-    }
-}
-
-static void IC_Angle_OCL(const oclMat& image, oclMat& keypoints, const oclMat& uMax, int npoints, int half_k)
-{
-    size_t localThreads[3] = {32, 8, 1};
-    size_t globalThreads[3] = {divUp(npoints, localThreads[1]) * localThreads[1] * localThreads[0],
-                               1,
-                               1};
-
-    Context *clCxt = Context::getContext();
-    String kernelName = "IC_Angle";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int imageStep = image.step / image.elemSize();
-    int keypointsStep = keypoints.step / keypoints.elemSize();
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&uMax.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&half_k));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&imageStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsStep));
-
-    bool is_cpu = isCpuDevice();
-    if (is_cpu)
-        openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1, (char*)"-D CPU");
-    else
-    {
-        cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &orb, kernelName);
-        int wave_size = (int)queryWaveFrontSize(kernel);
-        openCLSafeCall(clReleaseKernel(kernel));
-
-        std::string opt = format("-D WAVE_SIZE=%d", wave_size);
-        openCLExecuteKernel(Context::getContext(), &orb, kernelName, globalThreads, localThreads, args, -1, -1, opt.c_str());
-    }
-}
-
-static void convertRowsToChannels_OCL(const oclMat& keypointsIn, oclMat& keypointsOut, int npoints)
-{
-    size_t localThreads[3] = {256, 1, 1};
-    size_t globalThreads[3] = {divUp(npoints, localThreads[0]) * localThreads[0],
-                               1,
-                               1};
-
-    Context *clCxt = Context::getContext();
-    String kernelName = "convertRowsToChannels";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int keypointsInStep = keypointsIn.step / keypointsIn.elemSize();
-    int keypointsOutStep = keypointsOut.step / keypointsOut.elemSize();
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsIn.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsOut.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsInStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsOutStep));
-
-    openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-static void convertChannelsToRows_OCL(const oclMat& keypointsPos, const oclMat& keypointsResp,
-                                      oclMat& keypointsOut, int npoints)
-{
-    size_t localThreads[3] = {256, 1, 1};
-    size_t globalThreads[3] = {divUp(npoints, localThreads[0]) * localThreads[0],
-                               1,
-                               1};
-
-    Context *clCxt = Context::getContext();
-    String kernelName = "convertChannelsToRows";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int keypointsPosStep = keypointsPos.step / keypointsResp.elemSize();
-    int keypointsRespStep = keypointsResp.step / keypointsResp.elemSize();
-    int keypointsOutStep = keypointsOut.step / keypointsOut.elemSize();
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsPos.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsResp.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsOut.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsPosStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsRespStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsOutStep));
-
-    openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void cv::ocl::ORB_OCL::computeKeyPointsPyramid()
-{
-    int half_patch_size = patchSize_ / 2;
-
-    keyPointsPyr_.resize(nLevels_);
-    keyPointsCount_.resize(nLevels_);
-
-    for (int level = 0; level < nLevels_; ++level)
-    {
-        keyPointsCount_[level] = fastDetector_.calcKeyPointsLocation(imagePyr_[level], maskPyr_[level]);
-
-        if (keyPointsCount_[level] == 0)
-            continue;
-
-        keyPointsCount_[level] = fastDetector_.getKeyPoints(keyPointsPyr_[level]);
-
-        if (keyPointsCount_[level] == 0)
-            continue;
-
-        int n_features = static_cast<int>(n_features_per_level_[level]);
-
-        if (scoreType_ == ORB::HARRIS_SCORE)
-        {
-            int featuresToIncrease = 2 * n_features - keyPointsPyr_[level].cols;
-            if (featuresToIncrease < 0) featuresToIncrease = 0;
-
-            // Keeps more points than necessary as FAST does not give amazing corners
-            // and expands rows in the keypoint matrix to store angle, octave and size
-            copyMakeBorder(keyPointsPyr_[level], keyPointsPyr_[level],
-                           0, ROWS_COUNT-keyPointsPyr_[level].rows,
-                           0, featuresToIncrease,
-                           BORDER_CONSTANT, 0.f);
-
-            // Compute the Harris cornerness (better scoring than FAST)
-            HarrisResponses_OCL(imagePyr_[level], keyPointsPyr_[level], keyPointsCount_[level], 7, HARRIS_K);
-        }
-        else
-        {
-            // Expands rows in the keypoint matrix to store angle, octave and size
-            copyMakeBorder(keyPointsPyr_[level], keyPointsPyr_[level],
-                           0, ROWS_COUNT-keyPointsPyr_[level].rows,
-                           0, 0,
-                           BORDER_CONSTANT, 0.f);
-        }
-
-
-        // To use sortByKey the keypoint locations have to be reorganized as one row and two channels,
-        // leaving the keys (responses) as a one row, one channel matrix.
-        // TODO: change this when sortByRow is implemented.
-        oclMat keypointsResp, keypointsPos(1,keyPointsCount_[level],CV_32FC2);
-        keyPointsPyr_[level].row(RESPONSE_ROW).colRange(0,keyPointsCount_[level]).copyTo(keypointsResp);
-
-        convertRowsToChannels_OCL(keyPointsPyr_[level].rowRange(0,2), keypointsPos, keyPointsCount_[level]);
-        ocl::sortByKey(keypointsResp, keypointsPos, SORT_MERGE, true);
-
-        keyPointsCount_[level] = std::min(n_features,keyPointsCount_[level]);
-
-        // The data is then reorganized back to one channel, three rows (X_ROW, Y_ROW, RESPONSE_ROW)
-        convertChannelsToRows_OCL(keypointsPos, keypointsResp, keyPointsPyr_[level], keyPointsCount_[level]);
-
-        // Compute orientation
-        IC_Angle_OCL(imagePyr_[level], keyPointsPyr_[level], uMax_, keyPointsCount_[level], half_patch_size);
-    }
-}
-
-static void computeOrbDescriptor_OCL(const oclMat& img, const oclMat& keypoints, const oclMat& pattern,
-                                     oclMat& desc, const int npoints, const int dsize, const int WTA_K,
-                                     const int offset)
-{
-    size_t localThreads[3] = {32, 8, 1};
-    size_t globalThreads[3] = {divUp(dsize, localThreads[0]) * localThreads[0],
-                               divUp(npoints, localThreads[1]) * localThreads[1],
-                               1};
-
-    Context *clCxt = Context::getContext();
-    String kernelName = "computeOrbDescriptor";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int imgStep = img.step / img.elemSize();
-    int keypointsStep = keypoints.step / keypoints.elemSize();
-    int patternStep = pattern.step / pattern.elemSize();
-    int descStep = desc.step / desc.elemSize();
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&pattern.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&desc.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dsize));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&WTA_K));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&offset));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&imgStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patternStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&descStep));
-
-    openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void cv::ocl::ORB_OCL::computeDescriptors(oclMat& descriptors)
-{
-    int nAllkeypoints = 0;
-
-    for (int level = 0; level < nLevels_; ++level)
-        nAllkeypoints += keyPointsCount_[level];
-
-    if (nAllkeypoints == 0)
-    {
-        descriptors.release();
-        return;
-    }
-
-    ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors);
-
-    int offset = 0;
-
-    for (int level = 0; level < nLevels_; ++level)
-    {
-        if (keyPointsCount_[level] == 0)
-            continue;
-
-        if (blurForDescriptor)
-        {
-            // preprocess the resized image
-            ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_);
-            blurFilter->apply(imagePyr_[level], buf_);
-        }
-
-        computeOrbDescriptor_OCL(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level],
-                    pattern_, descriptors, keyPointsCount_[level], descriptorSize(), WTA_K_, offset);
-
-        offset += keyPointsCount_[level];
-    }
-}
-
-static void mergeLocation_OCL(const oclMat& keypointsIn, oclMat& keypointsOut, const int npoints,
-                              const int offset, const float scale, const int octave, const float size)
-{
-    size_t localThreads[3] = {256, 1, 1};
-    size_t globalThreads[3] = {divUp(npoints, localThreads[0]) * localThreads[0],
-                               1,
-                               1};
-
-    Context *clCxt = Context::getContext();
-    String kernelName = "mergeLocation";
-    std::vector< std::pair<size_t, const void *> > args;
-
-    int keypointsInStep = keypointsIn.step / keypointsIn.elemSize();
-    int keypointsOutStep = keypointsOut.step / keypointsOut.elemSize();
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsIn.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsOut.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&offset));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsInStep));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsOutStep));
-
-    openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void cv::ocl::ORB_OCL::mergeKeyPoints(oclMat& keypoints)
-{
-    int nAllkeypoints = 0;
-
-    for (int level = 0; level < nLevels_; ++level)
-        nAllkeypoints += keyPointsCount_[level];
-
-    if (nAllkeypoints == 0)
-    {
-        keypoints.release();
-        return;
-    }
-
-    ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, keypoints);
-
-    int offset = 0;
-
-    for (int level = 0; level < nLevels_; ++level)
-    {
-        if (keyPointsCount_[level] == 0)
-            continue;
-
-        float sf = getScale(scaleFactor_, firstLevel_, level);
-
-        float locScale = level != firstLevel_ ? sf : 1.0f;
-        float size = patchSize_ * sf;
-
-        mergeLocation_OCL(keyPointsPyr_[level], keypoints, keyPointsCount_[level], offset, locScale, level, size);
-
-        offset += keyPointsCount_[level];
-    }
-}
-
-void cv::ocl::ORB_OCL::downloadKeyPoints(const oclMat &d_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (d_keypoints.empty())
-    {
-        keypoints.clear();
-        return;
-    }
-
-    Mat h_keypoints(d_keypoints);
-
-    convertKeyPoints(h_keypoints, keypoints);
-}
-
-void cv::ocl::ORB_OCL::convertKeyPoints(const Mat &d_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (d_keypoints.empty())
-    {
-        keypoints.clear();
-        return;
-    }
-
-    CV_Assert(d_keypoints.type() == CV_32FC1 && d_keypoints.rows == ROWS_COUNT);
-
-    const float* x_ptr = d_keypoints.ptr<float>(X_ROW);
-    const float* y_ptr = d_keypoints.ptr<float>(Y_ROW);
-    const float* response_ptr = d_keypoints.ptr<float>(RESPONSE_ROW);
-    const float* angle_ptr = d_keypoints.ptr<float>(ANGLE_ROW);
-    const float* octave_ptr = d_keypoints.ptr<float>(OCTAVE_ROW);
-    const float* size_ptr = d_keypoints.ptr<float>(SIZE_ROW);
-
-    keypoints.resize(d_keypoints.cols);
-
-    for (int i = 0; i < d_keypoints.cols; ++i)
-    {
-        KeyPoint kp;
-
-        kp.pt.x = x_ptr[i];
-        kp.pt.y = y_ptr[i];
-        kp.response = response_ptr[i];
-        kp.angle = angle_ptr[i];
-        kp.octave = static_cast<int>(octave_ptr[i]);
-        kp.size = size_ptr[i];
-
-        keypoints[i] = kp;
-    }
-}
-
-void cv::ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints)
-{
-    buildScalePyramids(image, mask);
-    computeKeyPointsPyramid();
-    mergeKeyPoints(keypoints);
-}
-
-void cv::ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors)
-{
-    buildScalePyramids(image, mask);
-    computeKeyPointsPyramid();
-    computeDescriptors(descriptors);
-    mergeKeyPoints(keypoints);
-}
-
-void cv::ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
-{
-    (*this)(image, mask, d_keypoints_);
-    downloadKeyPoints(d_keypoints_, keypoints);
-}
-
-void cv::ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors)
-{
-    (*this)(image, mask, d_keypoints_, descriptors);
-    downloadKeyPoints(d_keypoints_, keypoints);
-}
-
-void cv::ocl::ORB_OCL::release()
-{
-    imagePyr_.clear();
-    maskPyr_.clear();
-
-    buf_.release();
-
-    keyPointsPyr_.clear();
-
-    fastDetector_.release();
-
-    d_keypoints_.release();
-
-    uMax_.release();
-}
diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp
deleted file mode 100644
index 4cd700a..0000000
--- a/modules/ocl/src/precomp.hpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Guoping Long, longguoping@gmail.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_PRECOMP_H__
-#define __OPENCV_PRECOMP_H__
-
-#if defined _MSC_VER && _MSC_VER >= 1200
-#pragma warning( disable: 4127 4267 4324 4244 4251 4710 4711 4514 4996 )
-#endif
-
-#if defined(_WIN32)
-#include <windows.h>
-#endif
-
-#include "cvconfig.h"
-
-#include <map>
-#include <iostream>
-#include <limits>
-#include <vector>
-#include <algorithm>
-#include <sstream>
-#include <exception>
-#include <stdio.h>
-
-#undef OPENCV_NOSTL
-
-#include "opencv2/imgproc.hpp"
-#include "opencv2/objdetect/objdetect_c.h"
-#include "opencv2/ocl.hpp"
-#include "opencv2/features2d.hpp"
-
-#include "opencv2/core/utility.hpp"
-#include "opencv2/core/private.hpp"
-#include "opencv2/core/ocl.hpp"
-
-#define __ATI__
-
-#if defined (HAVE_OPENCL)
-
-#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
-#include "opencv2/ocl/private/util.hpp"
-#include "safe_call.hpp"
-
-#else /* defined(HAVE_OPENCL) */
-
-static inline void throw_nogpu()
-{
-    CV_Error(CV_GpuNotSupported, "The library is compilled without OpenCL support.\n");
-}
-
-#endif /* defined(HAVE_OPENCL) */
-
-#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp
deleted file mode 100644
index 7e5e35a..0000000
--- a/modules/ocl/src/pyrdown.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//        Dachuan Zhao, dachuan@multicorewareinc.com
-//        Yao Wang, yao@multicorewareinc.com
-//
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////// add subtract multiply divide /////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-static void pyrdown_run(const oclMat &src, const oclMat &dst)
-{
-
-    CV_Assert(src.type() == dst.type());
-    CV_Assert(src.depth() != CV_8S);
-
-    Context  *clCxt = src.clCxt;
-    String kernelName = "pyrDown";
-
-    size_t localThreads[3]  = { 256, 1, 1 };
-    size_t globalThreads[3] = { src.cols, dst.rows, 1};
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols));
-
-    openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
-}
-//////////////////////////////////////////////////////////////////////////////
-// pyrDown
-
-void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
-{
-    int depth = src.depth(), channels = src.channels();
-    CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F);
-    CV_Assert(channels == 1 || channels == 3 || channels == 4);
-
-    dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
-
-    pyrdown_run(src, dst);
-}
diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp
deleted file mode 100644
index 2ff3dcd..0000000
--- a/modules/ocl/src/pyrlk.cpp
+++ /dev/null
@@ -1,338 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//      Dachuan Zhao, dachuan@multicorewareinc.com
-//      Yao Wang, yao@multicorewareinc.com
-//      Nathan, liujun@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-struct dim3
-{
-    unsigned int x, y, z;
-};
-
-static void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
-{
-    winSize.width *= cn;
-
-    if (winSize.width > 32 && winSize.width > 2 * winSize.height)
-    {
-        block.x = isDeviceArch11 ? 16 : 32;
-        block.y = 8;
-    }
-    else
-    {
-        block.x = 16;
-        block.y = isDeviceArch11 ? 8 : 16;
-    }
-
-    patch.x = (winSize.width  + block.x - 1) / block.x;
-    patch.y = (winSize.height + block.y - 1) / block.y;
-
-    block.z = patch.z = 1;
-}
-
-static void pyrdown_run_cus(const oclMat &src, const oclMat &dst)
-{
-
-    CV_Assert(src.type() == dst.type());
-    CV_Assert(src.depth() != CV_8S);
-
-    Context  *clCxt = src.clCxt;
-
-    String kernelName = "pyrDown";
-
-    size_t localThreads[3]  = { 256, 1, 1 };
-    size_t globalThreads[3] = { src.cols, dst.rows, 1};
-
-    std::vector<std::pair<size_t , const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols));
-
-    openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth(), CLFLUSH);
-}
-
-static void pyrDown_cus(const oclMat &src, oclMat &dst)
-{
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-
-    dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
-    pyrdown_run_cus(src, dst);
-}
-
-static void lkSparse_run(oclMat &I, oclMat &J,
-                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
-                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
-{
-    Context  *clCxt = I.clCxt;
-    String kernelName = "lkSparse";
-    size_t localThreads[3]  = { 8, 8, 1 };
-    size_t globalThreads[3] = { 8 * ptcount, 8, 1};
-    int cn = I.oclchannels();
-    char calcErr = level==0?1:0;
-
-    std::vector<std::pair<size_t , const void *> > args;
-
-    cl_mem ITex = bindTexture(I);
-    cl_mem JTex = bindTexture(J);
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&level ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
-    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));
-
-    bool is_cpu = isCpuDevice();
-    if (is_cpu)
-    {
-        openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
-    }
-    else
-    {
-        std::stringstream idxStr;
-        idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
-        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
-        int wave_size = (int)queryWaveFrontSize(kernel);
-        openCLSafeCall(clReleaseKernel(kernel));
-
-        static char opt[32] = {0};
-        sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
-
-        openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
-                            args, I.oclchannels(), I.depth(), opt);
-    }
-    releaseTexture(ITex);
-    releaseTexture(JTex);
-}
-
-void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err)
-{
-    if (prevPts.empty())
-    {
-        nextPts.release();
-        status.release();
-        return;
-    }
-
-    derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
-
-    iters = std::min(std::max(iters, 0), 100);
-
-    const int cn = prevImg.oclchannels();
-
-    dim3 block, patch;
-    calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);
-
-    CV_Assert(derivLambda >= 0);
-    CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2);
-    CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
-    CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
-    CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);
-
-    if (useInitialFlow)
-        CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
-    else
-        ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
-
-    oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
-    oclMat temp2 = nextPts.reshape(1);
-    multiply(1.0f/(1<<maxLevel)/2.0f, temp1, temp2);
-
-    ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
-    status.setTo(Scalar::all(1));
-
-    bool errMat = false;
-    if (!err)
-    {
-        err = new oclMat(1, prevPts.cols, CV_32FC1);
-        errMat = true;
-    }
-    else
-        ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
-
-    // build the image pyramids.
-    prevPyr_.resize(maxLevel + 1);
-    nextPyr_.resize(maxLevel + 1);
-
-    if (cn == 1 || cn == 4)
-    {
-        prevImg.convertTo(prevPyr_[0], CV_32F);
-        nextImg.convertTo(nextPyr_[0], CV_32F);
-    }
-
-    for (int level = 1; level <= maxLevel; ++level)
-    {
-        pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]);
-        pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]);
-    }
-
-    // dI/dx ~ Ix, dI/dy ~ Iy
-    for (int level = maxLevel; level >= 0; level--)
-    {
-        lkSparse_run(prevPyr_[level], nextPyr_[level],
-                     prevPts, nextPts, status, *err, getMinEigenVals, prevPts.cols,
-                     level, /*block, */patch, winSize, iters);
-    }
-
-    clFinish(*(cl_command_queue*)prevImg.clCxt->getOpenCLCommandQueuePtr());
-
-    if(errMat)
-        delete err;
-}
-
-static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
-                 oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
-{
-    Context  *clCxt = I.clCxt;
-
-    String kernelName = "lkDense";
-
-    size_t localThreads[3]  = { 16, 16, 1 };
-    size_t globalThreads[3] = { I.cols, I.rows, 1};
-
-    cl_char calcErr = err ? 1 : 0;
-
-    cl_mem ITex;
-    cl_mem JTex;
-
-    ITex = bindTexture(I);
-    JTex = bindTexture(J);
-
-    std::vector<std::pair<size_t , const void *> > args;
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
-
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&u.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&u.step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&v.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&v.step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevU.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevU.step ));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevV.data ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevV.step ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
-    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));
-
-    openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
-
-    releaseTexture(ITex);
-    releaseTexture(JTex);
-}
-
-void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err)
-{
-    CV_Assert(prevImg.type() == CV_8UC1);
-    CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
-    CV_Assert(maxLevel >= 0);
-    CV_Assert(winSize.width > 2 && winSize.height > 2);
-
-    if (err)
-        err->create(prevImg.size(), CV_32FC1);
-
-    prevPyr_.resize(maxLevel + 1);
-    nextPyr_.resize(maxLevel + 1);
-
-    prevPyr_[0] = prevImg;
-    nextImg.convertTo(nextPyr_[0], CV_32F);
-
-    for (int level = 1; level <= maxLevel; ++level)
-    {
-        pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]);
-        pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]);
-    }
-
-    ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
-    ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
-    ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
-    ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
-    uPyr_[1].setTo(Scalar::all(0));
-    vPyr_[1].setTo(Scalar::all(0));
-
-    Size winSize2i(winSize.width, winSize.height);
-
-    int idx = 0;
-
-    for (int level = maxLevel; level >= 0; level--)
-    {
-        int idx2 = (idx + 1) & 1;
-
-        lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
-                    level == 0 ? err : 0, winSize2i, iters);
-
-        if (level > 0)
-            idx = idx2;
-    }
-
-    uPyr_[idx].copyTo(u);
-    vPyr_[idx].copyTo(v);
-
-    clFinish(*(cl_command_queue*)prevImg.clCxt->getOpenCLCommandQueuePtr());
-}
diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp
deleted file mode 100644
index 95d3a34..0000000
--- a/modules/ocl/src/pyrup.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//        Zhang Chunpeng chunpeng@multicorewareinc.com
-//        Yao Wang, yao@multicorewareinc.com
-//
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-/* Haar features calculation */
-//#define EMU
-
-namespace cv
-{
-    namespace ocl
-    {
-        void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst)
-        {
-            int depth = src.depth(), channels = src.channels(), oclChannels = src.oclchannels();
-
-            CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F);
-            CV_Assert(channels == 1 || channels == 3 || channels == 4);
-
-            dst.create(src.rows * 2, src.cols * 2, src.type());
-
-            Context *clCxt = src.clCxt;
-
-            const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float" };
-            char buildOptions[250], convertString[50];
-            const char * const channelsString = oclChannels == 1 ? "" : "4";
-            sprintf(convertString, "convert_%s%s_sat_rte", typeMap[depth], channelsString);
-            sprintf(buildOptions, "-D Type=%s%s -D floatType=float%s -D convertToType=%s -D convertToFloat=%s",
-                    typeMap[depth], channelsString, channelsString,
-                    depth == CV_32F ? "" : convertString,
-                    oclChannels == 4 ? "convert_float4" : "(float)");
-
-            const String kernelName = "pyrUp";
-            int dststep = dst.step / dst.elemSize(), srcstep = src.step / src.elemSize();
-
-            std::vector< std::pair<size_t, const void *> > args;
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcstep));
-            args.push_back( std::make_pair( sizeof(cl_int), (void *)&dststep));
-
-            size_t globalThreads[3] = {dst.cols, dst.rows, 1};
-            size_t localThreads[3]  = {16, 16, 1};
-
-
-            openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, -1, -1,
-                                buildOptions);
-        }
-    }
-}
diff --git a/modules/ocl/src/safe_call.hpp b/modules/ocl/src/safe_call.hpp
deleted file mode 100644
index 14cbb6d..0000000
--- a/modules/ocl/src/safe_call.hpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//   Long Guoping , longguoping@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OPENCL_SAFE_CALL_HPP__
-#define __OPENCV_OPENCL_SAFE_CALL_HPP__
-
-#include "opencv2/core/opencl/runtime/opencl_core.hpp"
-
-#define openCLSafeCall(expr)  ___openCLSafeCall(expr, __FILE__, __LINE__, CV_Func)
-#define openCLVerifyCall(res) ___openCLSafeCall(res, __FILE__, __LINE__, CV_Func)
-
-
-namespace cv
-{
-    namespace ocl
-    {
-        const char *getOpenCLErrorString( int err );
-
-        static inline void ___openCLSafeCall(int err, const char *file, const int line, const char *func = "")
-        {
-            if (CL_SUCCESS != err)
-                cv::error(Error::OpenCLApiCallError, getOpenCLErrorString(err), func, file, line);
-        }
-    }
-}
-
-#endif /* __OPENCV_OPENCL_SAFE_CALL_HPP__ */
diff --git a/modules/ocl/src/sort_by_key.cpp b/modules/ocl/src/sort_by_key.cpp
deleted file mode 100644
index 596f94e..0000000
--- a/modules/ocl/src/sort_by_key.cpp
+++ /dev/null
@@ -1,472 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace cv
-{
-namespace ocl
-{
-void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan);
-
-#ifndef ANDROID
-//TODO(pengx17): change this value depending on device other than a constant
-const static unsigned int GROUP_SIZE = 256;
-#endif
-
-const char * depth_strings[] =
-{
-    "uchar",   //CV_8U
-    "char",    //CV_8S
-    "ushort",  //CV_16U
-    "short",   //CV_16S
-    "int",     //CV_32S
-    "float",   //CV_32F
-    "double"   //CV_64F
-};
-
-void static genSortBuildOption(const oclMat& keys, const oclMat& vals, bool isGreaterThan, char * build_opt_buf)
-{
-    sprintf(build_opt_buf, "-D IS_GT=%d -D K_T=%s -D V_T=%s",
-            isGreaterThan?1:0, depth_strings[keys.depth()], depth_strings[vals.depth()]);
-    if(vals.oclchannels() > 1)
-    {
-        sprintf( build_opt_buf + strlen(build_opt_buf), "%d", vals.oclchannels());
-    }
-}
-inline bool isSizePowerOf2(size_t size)
-{
-    return ((size - 1) & (size)) == 0;
-}
-
-namespace bitonic_sort
-{
-static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
-{
-    CV_Assert(isSizePowerOf2(vecSize));
-
-    Context * cxt = Context::getContext();
-    size_t globalThreads[3] = {vecSize / 2, 1, 1};
-
-    // 2^numStages should be equal to vecSize or the output is invalid
-    int numStages = 0;
-    for(int i = vecSize; i > 1; i >>= 1)
-    {
-        ++numStages;
-    }
-    char build_opt_buf [100];
-    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
-    const int argc = 5;
-    std::vector< std::pair<size_t, const void *> > args(argc);
-    String kernelname = "bitonicSort";
-
-    args[0] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
-    args[1] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
-    args[2] = std::make_pair(sizeof(cl_int), (void *)&vecSize);
-
-    for(int stage = 0; stage < numStages; ++stage)
-    {
-        args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
-        for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
-        {
-            args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
-#ifdef ANDROID
-            openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
-#else
-            size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
-            openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
-#endif
-        }
-    }
-}
-}  /* bitonic_sort */
-
-namespace selection_sort
-{
-// FIXME:
-// This function cannot sort arrays with duplicated keys
-static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
-{
-    CV_Error(-1, "This function is incorrect at the moment.");
-    Context * cxt = Context::getContext();
-
-    size_t globalThreads[3] = {vecSize, 1, 1};
-
-    std::vector< std::pair<size_t, const void *> > args;
-    char build_opt_buf [100];
-    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
-
-    //local
-    String kernelname = "selectionSortLocal";
-#ifdef ANDROID
-    int lds_size = cxt->getDeviceInfo().maxWorkGroupSize * keys.elemSize();
-#else
-    int lds_size = GROUP_SIZE * keys.elemSize();
-#endif
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&keys.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&vals.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
-    args.push_back(std::make_pair(lds_size,       (void*)NULL));
-
-#ifdef ANDROID
-    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
-#else
-    size_t localThreads[3] = {GROUP_SIZE, 1, 1};
-    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
-#endif
-
-    //final
-    kernelname = "selectionSortFinal";
-    args.pop_back();
-#ifdef ANDROID
-    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
-#else
-    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
-#endif
-}
-
-}  /* selection_sort */
-
-
-namespace radix_sort
-{
-//FIXME(pengx17):
-// exclusive scan, need to be optimized as this is too naive...
-//void naive_scan_addition(oclMat& input, oclMat& output)
-//{
-//    Context * cxt = Context::getContext();
-//    size_t vecSize = input.cols;
-//    size_t globalThreads[3] = {1, 1, 1};
-//    size_t localThreads[3]  = {1, 1, 1};
-//
-//    String kernelname = "naiveScanAddition";
-//
-//    std::vector< std::pair<size_t, const void *> > args;
-//    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&input.data));
-//    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&output.data));
-//    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
-//    openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1);
-//}
-
-void static naive_scan_addition_cpu(oclMat& input, oclMat& output)
-{
-    Mat m_input = input, m_output(output.size(), output.type());
-    MatIterator_<int> i_mit = m_input.begin<int>();
-    MatIterator_<int> o_mit = m_output.begin<int>();
-    *o_mit = 0;
-    ++i_mit;
-    ++o_mit;
-    for(; i_mit != m_input.end<int>(); ++i_mit, ++o_mit)
-    {
-        *o_mit = *(o_mit - 1) + *(i_mit - 1);
-    }
-    output = m_output;
-}
-
-
-//radix sort ported from Bolt
-static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGreaterThan)
-{
-    CV_Assert(keys.depth() == CV_32S || keys.depth() == CV_32F); // we assume keys are 4 bytes
-
-    bool isKeyFloat = keys.type() == CV_32F;
-
-    const int RADIX = 4; //Now you cannot replace this with Radix 8 since there is a
-                         //local array of 16 elements in the histogram kernel.
-    const int RADICES = (1 << RADIX); //Values handeled by each work-item?
-
-    bool  newBuffer = false;
-    size_t vecSize = origVecSize;
-
-    unsigned int groupSize  = RADICES;
-
-    size_t mulFactor = groupSize * RADICES;
-
-    oclMat buffer_keys, buffer_vals;
-
-    if(origVecSize % mulFactor != 0)
-    {
-        vecSize = ((vecSize + mulFactor) / mulFactor) * mulFactor;
-        buffer_keys.create(1, vecSize, keys.type());
-        buffer_vals.create(1, vecSize, vals.type());
-        Scalar padding_value;
-        oclMat roi_buffer_vals = buffer_vals(Rect(0,0,origVecSize,1));
-
-        if(isGreaterThan)
-        {
-            switch(buffer_keys.depth())
-            {
-            case CV_32F:
-                padding_value = Scalar::all(-FLT_MAX);
-                break;
-            case CV_32S:
-                padding_value = Scalar::all(INT_MIN);
-                break;
-            }
-        }
-        else
-        {
-            switch(buffer_keys.depth())
-            {
-            case CV_32F:
-                padding_value = Scalar::all(FLT_MAX);
-                break;
-            case CV_32S:
-                padding_value = Scalar::all(INT_MAX);
-                break;
-            }
-        }
-        ocl::copyMakeBorder(
-            keys(Rect(0,0,origVecSize,1)), buffer_keys,
-            0, 0, 0, vecSize - origVecSize,
-            BORDER_CONSTANT, padding_value);
-        vals(Rect(0,0,origVecSize,1)).copyTo(roi_buffer_vals);
-        newBuffer = true;
-    }
-    else
-    {
-        buffer_keys = keys;
-        buffer_vals = vals;
-        newBuffer = false;
-    }
-    oclMat swap_input_keys(1, vecSize, keys.type());
-    oclMat swap_input_vals(1, vecSize, vals.type());
-    oclMat hist_bin_keys(1, vecSize, CV_32SC1);
-    oclMat hist_bin_dest_keys(1, vecSize, CV_32SC1);
-
-    Context * cxt = Context::getContext();
-
-    size_t globalThreads[3] = {vecSize / RADICES, 1, 1};
-    size_t localThreads[3]  = {groupSize, 1, 1};
-
-    std::vector< std::pair<size_t, const void *> > args;
-    char build_opt_buf [100];
-    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
-
-    //additional build option for radix sort
-    sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT");
-
-    String kernelnames[2] = {String("histogramRadixN"), String("permuteRadixN")};
-
-    int swap = 0;
-    for(int bits = 0; bits < (static_cast<int>(keys.elemSize()) * 8); bits += RADIX)
-    {
-        args.clear();
-        //Do a histogram pass locally
-        if(swap == 0)
-        {
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
-        }
-        else
-        {
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
-        }
-        args.push_back(std::make_pair(sizeof(cl_mem), (void *)&hist_bin_keys.data));
-        args.push_back(std::make_pair(sizeof(cl_int), (void *)&bits));
-        openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelnames[0], globalThreads, localThreads,
-            args, -1, -1, build_opt_buf);
-
-        args.clear();
-        //Perform a global scan
-        naive_scan_addition_cpu(hist_bin_keys, hist_bin_dest_keys);
-        // end of scan
-        if(swap == 0)
-        {
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_vals.data));
-        }
-        else
-        {
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_vals.data));
-        }
-        args.push_back(std::make_pair(sizeof(cl_mem), (void *)&hist_bin_dest_keys.data));
-        args.push_back(std::make_pair(sizeof(cl_int), (void *)&bits));
-
-        if(swap == 0)
-        {
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_vals.data));
-        }
-        else
-        {
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
-            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_vals.data));
-        }
-        openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelnames[1], globalThreads, localThreads,
-            args, -1, -1, build_opt_buf);
-        swap = swap ? 0 : 1;
-    }
-    if(newBuffer)
-    {
-        buffer_keys(Rect(0,0,origVecSize,1)).copyTo(keys);
-        buffer_vals(Rect(0,0,origVecSize,1)).copyTo(vals);
-    }
-}
-
-}  /* radix_sort */
-
-namespace merge_sort
-{
-static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
-{
-    Context * cxt = Context::getContext();
-
-    const size_t GROUP_SIZE = cxt->getDeviceInfo().maxWorkGroupSize >= 256 ? 256: 128;
-
-    size_t globalThreads[3] = {vecSize, 1, 1};
-    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
-
-    std::vector< std::pair<size_t, const void *> > args;
-    char build_opt_buf [100];
-    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
-
-    String kernelname[] = {String("blockInsertionSort"), String("merge")};
-    int keylds_size = GROUP_SIZE * keys.elemSize();
-    int vallds_size = GROUP_SIZE * vals.elemSize();
-    args.push_back(std::make_pair(sizeof(cl_mem),  (void *)&keys.data));
-    args.push_back(std::make_pair(sizeof(cl_mem),  (void *)&vals.data));
-    args.push_back(std::make_pair(sizeof(cl_uint), (void *)&vecSize));
-    args.push_back(std::make_pair(keylds_size,     (void*)NULL));
-    args.push_back(std::make_pair(vallds_size,     (void*)NULL));
-
-    openCLExecuteKernel(cxt, &kernel_stablesort_by_key, kernelname[0], globalThreads, localThreads, args, -1, -1, build_opt_buf);
-
-    //  Early exit for the case of no merge passes, values are already in destination vector
-    if(vecSize <= GROUP_SIZE)
-    {
-        return;
-    }
-
-    //  An odd number of elements requires an extra merge pass to sort
-    size_t numMerges = 0;
-    //  Calculate the log2 of vecSize, taking into acvecSize our block size from kernel 1 is 64
-    //  this is how many merge passes we want
-    size_t log2BlockSize = vecSize >> 6;
-    for( ; log2BlockSize > 1; log2BlockSize >>= 1 )
-    {
-        ++numMerges;
-    }
-    //  Check to see if the input vector size is a power of 2, if not we will need last merge pass
-    numMerges += isSizePowerOf2(vecSize)? 1: 0;
-
-    //  Allocate a flipflop buffer because the merge passes are out of place
-    oclMat tmpKeyBuffer(keys.size(), keys.type());
-    oclMat tmpValBuffer(vals.size(), vals.type());
-    args.resize(8);
-
-    args[4] = std::make_pair(sizeof(cl_uint), (void *)&vecSize);
-    args[6] = std::make_pair(keylds_size,    (void*)NULL);
-    args[7] = std::make_pair(vallds_size,    (void*)NULL);
-
-    for(size_t pass = 1; pass <= numMerges; ++pass )
-    {
-        //  For each pass, flip the input-output buffers
-        if( pass & 0x1 )
-        {
-            args[0] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
-            args[1] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
-            args[2] = std::make_pair(sizeof(cl_mem), (void *)&tmpKeyBuffer.data);
-            args[3] = std::make_pair(sizeof(cl_mem), (void *)&tmpValBuffer.data);
-        }
-        else
-        {
-            args[0] = std::make_pair(sizeof(cl_mem), (void *)&tmpKeyBuffer.data);
-            args[1] = std::make_pair(sizeof(cl_mem), (void *)&tmpValBuffer.data);
-            args[2] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
-            args[3] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
-        }
-        //  For each pass, the merge window doubles
-        unsigned int srcLogicalBlockSize = static_cast<unsigned int>( localThreads[0] << (pass-1) );
-        args[5] = std::make_pair(sizeof(cl_uint), (void *)&srcLogicalBlockSize);
-        openCLExecuteKernel(cxt, &kernel_stablesort_by_key, kernelname[1], globalThreads, localThreads, args, -1, -1, build_opt_buf);
-    }
-    //  If there are an odd number of merges, then the output data is sitting in the temp buffer.  We need to copy
-    //  the results back into the input array
-    if( numMerges & 1 )
-    {
-        tmpKeyBuffer.copyTo(keys);
-        tmpValBuffer.copyTo(vals);
-    }
-}
-}  /* merge_sort */
-
-}
-} /* namespace cv { namespace ocl */
-
-
-void cv::ocl::sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan)
-{
-    CV_Assert( keys.rows == 1 ); // we only allow one dimensional input
-    CV_Assert( keys.channels() == 1 ); // we only allow one channel keys
-    CV_Assert( vecSize <= static_cast<size_t>(keys.cols) );
-    switch(method)
-    {
-    case SORT_BITONIC:
-        bitonic_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
-        break;
-    case SORT_SELECTION:
-        selection_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
-        break;
-    case SORT_RADIX:
-        radix_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
-        break;
-    case SORT_MERGE:
-        merge_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
-        break;
-    }
-}
-
-void cv::ocl::sortByKey(oclMat& keys, oclMat& vals, int method, bool isGreaterThan)
-{
-    CV_Assert( keys.size() == vals.size() );
-    CV_Assert( keys.rows == 1 ); // we only allow one dimensional input
-    size_t vecSize = static_cast<size_t>(keys.cols);
-    sortByKey(keys, vals, vecSize, method, isGreaterThan);
-}
diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp
deleted file mode 100644
index 5838697..0000000
--- a/modules/ocl/src/split_merge.cpp
+++ /dev/null
@@ -1,300 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-namespace cv
-{
-    namespace ocl
-    {
-        namespace split_merge
-        {
-            static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
-            {
-                if(!mat_dst.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_dst.type() == CV_64F)
-                {
-                    CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-                    return;
-                }
-
-                Context  *clCxt = mat_dst.clCxt;
-                int channels = mat_dst.oclchannels();
-                int depth = mat_dst.depth();
-
-                String kernelName = "merge_vector";
-
-                int vector_lengths[4][7] = {{0, 0, 0, 0, 0, 0, 0},
-                    {2, 2, 1, 1, 1, 1, 1},
-                    {4, 4, 2, 2 , 1, 1, 1},
-                    {1, 1, 1, 1, 1, 1, 1}
-                };
-
-                size_t vector_length = vector_lengths[channels - 1][depth];
-                int offset_cols = (mat_dst.offset / mat_dst.elemSize()) & (vector_length - 1);
-                int cols = divUp(mat_dst.cols + offset_cols, vector_length);
-
-                size_t localThreads[3]  = { 64, 4, 1 };
-                size_t globalThreads[3] = { cols, mat_dst.rows, 1 };
-
-                int dst_step1 = mat_dst.cols * mat_dst.elemSize();
-                std::vector<std::pair<size_t , const void *> > args;
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst.data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst.step));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst.offset));
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[0].data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[0].step));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[0].offset));
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[1].data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[1].step));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[1].offset));
-
-                if(channels == 4)
-                {
-                    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[2].data));
-                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
-                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[2].offset));
-
-                    if(n == 3)
-                    {
-                        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[2].data));
-                        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
-                        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[2].offset));
-                    }
-                    else if( n == 4)
-                    {
-                        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[3].data));
-                        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[3].step));
-                        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[3].offset));
-                    }
-                }
-
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst.rows));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1));
-
-                openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth);
-            }
-            static void merge(const oclMat *mat_src, size_t n, oclMat &mat_dst)
-            {
-                CV_Assert(mat_src);
-                CV_Assert(n > 0);
-
-                int depth = mat_src[0].depth();
-                Size size = mat_src[0].size();
-
-                int total_channels = 0;
-
-                for(size_t i = 0; i < n; ++i)
-                {
-                    CV_Assert(depth == mat_src[i].depth());
-                    CV_Assert(size == mat_src[i].size());
-
-                    total_channels += mat_src[i].oclchannels();
-                }
-
-                CV_Assert(total_channels <= 4);
-
-                if(total_channels == 1)
-                {
-                    mat_src[0].copyTo(mat_dst);
-                    return;
-                }
-
-                mat_dst.create(size, CV_MAKETYPE(depth, total_channels));
-                merge_vector_run(mat_src, n, mat_dst);
-            }
-            static void split_vector_run(const oclMat &src, oclMat *dst)
-            {
-
-                if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
-                {
-                    CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
-                    return;
-                }
-
-                Context  *clCtx = src.clCxt;
-                int channels = src.channels();
-                int depth = src.depth();
-                depth = (depth == CV_8S) ? CV_8U : depth;
-                depth = (depth == CV_16S) ? CV_16U : depth;
-
-                String kernelName = "split_vector";
-
-                size_t VEC_SIZE = 4;
-
-                std::vector<std::pair<size_t , const void *> > args;
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step));
-                int srcOffsetXBytes = src.offset % src.step;
-                int srcOffsetY = src.offset / src.step;
-                cl_int2 srcOffset = {{srcOffsetXBytes, srcOffsetY}};
-                args.push_back( std::make_pair( sizeof(cl_int2), (void *)&srcOffset));
-
-                bool dst0Aligned = false, dst1Aligned = false, dst2Aligned = false, dst3Aligned = false;
-                int alignSize = dst[0].elemSize1() * VEC_SIZE;
-                int alignMask = alignSize - 1;
-
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[0].data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[0].step));
-                int dst0OffsetXBytes = dst[0].offset % dst[0].step;
-                int dst0OffsetY = dst[0].offset / dst[0].step;
-                cl_int2 dst0Offset = {{dst0OffsetXBytes, dst0OffsetY}};
-                args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst0Offset));
-                if ((dst0OffsetXBytes & alignMask) == 0)
-                    dst0Aligned = true;
-
-                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[1].data));
-                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[1].step));
-                int dst1OffsetXBytes = dst[1].offset % dst[1].step;
-                int dst1OffsetY = dst[1].offset / dst[1].step;
-                cl_int2 dst1Offset = {{dst1OffsetXBytes, dst1OffsetY}};
-                args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst1Offset));
-                if ((dst1OffsetXBytes & alignMask) == 0)
-                    dst1Aligned = true;
-
-                // DON'T MOVE VARIABLES INTO 'IF' BODY
-                int dst2OffsetXBytes, dst2OffsetY;
-                cl_int2 dst2Offset;
-                int dst3OffsetXBytes, dst3OffsetY;
-                cl_int2 dst3Offset;
-                if (channels >= 3)
-                {
-                    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[2].data));
-                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[2].step));
-                    dst2OffsetXBytes = dst[2].offset % dst[2].step;
-                    dst2OffsetY = dst[2].offset / dst[2].step;
-                    dst2Offset.s[0] = dst2OffsetXBytes; dst2Offset.s[1] = dst2OffsetY;
-                    args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst2Offset));
-                    if ((dst2OffsetXBytes & alignMask) == 0)
-                        dst2Aligned = true;
-                }
-
-                if (channels >= 4)
-                {
-                    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[3].data));
-                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[3].step));
-                    dst3OffsetXBytes = dst[3].offset % dst[3].step;
-                    dst3OffsetY = dst[3].offset / dst[3].step;
-                    dst3Offset.s[0] = dst3OffsetXBytes; dst3Offset.s[1] = dst3OffsetY;
-                    args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst3Offset));
-                    if ((dst3OffsetXBytes & alignMask) == 0)
-                        dst3Aligned = true;
-                }
-
-                cl_int2 size = {{ src.cols, src.rows }};
-                args.push_back( std::make_pair( sizeof(cl_int2), (void *)&size));
-
-                String build_options =
-                        cv::format("-D VEC_SIZE=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d",
-                                   (int)VEC_SIZE, depth, channels);
-
-                if (dst0Aligned)
-                    build_options += " -D DST0_ALIGNED";
-                if (dst1Aligned)
-                    build_options += " -D DST1_ALIGNED";
-                if (dst2Aligned)
-                    build_options += " -D DST2_ALIGNED";
-                if (dst3Aligned)
-                    build_options += " -D DST3_ALIGNED";
-
-                const DeviceInfo& devInfo = clCtx->getDeviceInfo();
-
-                // TODO Workaround for issues. Need to investigate a problem.
-                if (channels == 2
-                        && devInfo.deviceType == CVCL_DEVICE_TYPE_CPU
-                        && devInfo.platform->platformVendor.find("Intel") != std::string::npos
-                        && (devInfo.deviceVersion.find("Build 56860") != std::string::npos
-                            || devInfo.deviceVersion.find("Build 76921") != std::string::npos
-                            || devInfo.deviceVersion.find("Build 78712") != std::string::npos))
-                    build_options += " -D BYPASS_VSTORE=true";
-
-                size_t globalThreads[3] = { divUp(src.cols, VEC_SIZE), src.rows, 1 };
-                openCLExecuteKernel(clCtx, &split_mat, kernelName, globalThreads, NULL, args, -1, -1, build_options.c_str());
-            }
-            static void split(const oclMat &mat_src, oclMat *mat_dst)
-            {
-                CV_Assert(mat_dst);
-
-                int depth = mat_src.depth();
-                int num_channels = mat_src.channels();
-                Size size = mat_src.size();
-
-                if (num_channels == 1)
-                {
-                    mat_src.copyTo(mat_dst[0]);
-                    return;
-                }
-
-                for (int i = 0; i < mat_src.oclchannels(); i++)
-                    mat_dst[i].create(size, CV_MAKETYPE(depth, 1));
-
-                split_vector_run(mat_src, mat_dst);
-            }
-        }
-    }
-}
-
-void cv::ocl::merge(const oclMat *src, size_t n, oclMat &dst)
-{
-    split_merge::merge(src, n, dst);
-}
-void cv::ocl::merge(const std::vector<oclMat> &src, oclMat &dst)
-{
-    split_merge::merge(&src[0], src.size(), dst);
-}
-
-void cv::ocl::split(const oclMat &src, oclMat *dst)
-{
-    split_merge::split(src, dst);
-}
-void cv::ocl::split(const oclMat &src, std::vector<oclMat> &dst)
-{
-    dst.resize(src.oclchannels()); // TODO Why oclchannels?
-    if(src.oclchannels() > 0)
-        split_merge::split(src, &dst[0]);
-}
diff --git a/modules/ocl/src/stereo_csbp.cpp b/modules/ocl/src/stereo_csbp.cpp
deleted file mode 100644
index 2f9391c..0000000
--- a/modules/ocl/src/stereo_csbp.cpp
+++ /dev/null
@@ -1,698 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Jin Ma, jin@multicorewareinc.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace cv
-{
-    namespace ocl
-    {
-        namespace stereoCSBP
-        {
-            static inline int divUp(int total, int grain)
-            {
-                return (total + grain - 1) / grain;
-            }
-            static String get_kernel_name(String kernel_name, int data_type)
-            {
-                return kernel_name + (data_type == CV_16S ? "0" : "1");
-            }
-            using cv::ocl::StereoConstantSpaceBP;
-            //////////////////////////////////////////////////////////////////////////////////
-            /////////////////////////////////init_data_cost//////////////////////////////////
-            //////////////////////////////////////////////////////////////////////////////////
-            static void init_data_cost_caller(const oclMat &left, const oclMat &right, oclMat &temp,
-                StereoConstantSpaceBP &rthis,
-                int msg_step, int h, int w, int level)
-            {
-                Context  *clCxt = left.clCxt;
-                int data_type = rthis.msg_type;
-                int channels = left.oclchannels();
-
-                String kernelName = get_kernel_name("init_data_cost_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-
-                //size_t blockSize = 256;
-                size_t localThreads[]  = {32, 8 ,1};
-                size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
-                    divUp(h, localThreads[1]) *localThreads[1],
-                    1
-                };
-
-                int cdisp_step1 = msg_step * h;
-                openCLVerifyKernel(clCxt, kernel,  localThreads);
-                openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem),  (void *)&temp.data));
-                openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem),  (void *)&left.data));
-                openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem),  (void *)&right.data));
-                openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int),  (void *)&h));
-                openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int),  (void *)&w));
-                openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int),  (void *)&level));
-                openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int),  (void *)&channels));
-                openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int),  (void *)&msg_step));
-                openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&rthis.data_weight));
-                openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_float), (void *)&rthis.max_data_term));
-                openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&cdisp_step1));
-                openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th));
-                openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step));
-                openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-
-            static void init_data_cost_reduce_caller(const oclMat &left, const oclMat &right, oclMat &temp,
-                StereoConstantSpaceBP &rthis,
-                int msg_step, int h, int w, int level)
-            {
-
-                Context  *clCxt = left.clCxt;
-                int data_type = rthis.msg_type;
-                int channels = left.oclchannels();
-                int win_size = (int)std::pow(2.f, level);
-
-                String kernelName = get_kernel_name("init_data_cost_reduce_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-
-                const int threadsNum = 256;
-                //size_t blockSize = threadsNum;
-                size_t localThreads[3]  = {win_size, 1, threadsNum / win_size};
-                size_t globalThreads[3] = { w *localThreads[0],
-                    h * divUp(rthis.ndisp, localThreads[2]) *localThreads[1], 1 * localThreads[2]
-                };
-
-                int local_mem_size = threadsNum * sizeof(float);
-                int cdisp_step1 = msg_step * h;
-
-                openCLVerifyKernel(clCxt, kernel, localThreads);
-
-                openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem),  (void *)&temp.data));
-                openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem),  (void *)&left.data));
-                openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem),  (void *)&right.data));
-                openCLSafeCall(clSetKernelArg(kernel, 3,  local_mem_size,  (void *)NULL));
-                openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_int),  (void *)&level));
-                openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_int),  (void *)&left.rows));
-                openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_int),  (void *)&left.cols));
-                openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int),  (void *)&h));
-                openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int),  (void *)&win_size));
-                openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int),  (void *)&channels));
-                openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int),  (void *)&rthis.ndisp));
-                openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int),  (void *)&left.step));
-                openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_float), (void *)&rthis.data_weight));
-                openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_float), (void *)&rthis.max_data_term));
-                openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int),  (void *)&rthis.min_disp_th));
-                openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int),  (void *)&cdisp_step1));
-                openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int),  (void *)&msg_step));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-
-            static void get_first_initial_local_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
-                oclMat &temp, StereoConstantSpaceBP &rthis,
-                int h, int w, int nr_plane, int msg_step)
-            {
-                Context  *clCxt = temp.clCxt;
-                int data_type = rthis.msg_type;
-
-                String kernelName = get_kernel_name("get_first_k_initial_local_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-
-                //size_t blockSize = 256;
-                size_t localThreads[]  = {32, 8 ,1};
-                size_t globalThreads[] = { roundUp(w, localThreads[0]), roundUp(h, localThreads[1]), 1 };
-
-                int disp_step = msg_step * h;
-                openCLVerifyKernel(clCxt, kernel, localThreads);
-                openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_cost_selected));
-                openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&disp_selected_pyr));
-                openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&temp.data));
-                openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
-                openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
-                openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&nr_plane));
-                openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
-                openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
-                openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-            static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
-                oclMat &temp, StereoConstantSpaceBP &rthis,
-                int h, int w, int nr_plane, int msg_step)
-            {
-                Context  *clCxt = temp.clCxt;
-                int data_type = rthis.msg_type;
-
-                String kernelName = get_kernel_name("get_first_k_initial_global_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-
-                //size_t blockSize = 256;
-                size_t localThreads[]  = {32, 8, 1};
-                size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
-                    divUp(h, localThreads[1]) *localThreads[1],
-                    1
-                };
-
-                int disp_step = msg_step * h;
-                openCLVerifyKernel(clCxt, kernel, localThreads);
-                openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_cost_selected));
-                openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&disp_selected_pyr));
-                openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&temp.data));
-                openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
-                openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
-                openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&nr_plane));
-                openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
-                openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
-                openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-
-            static void init_data_cost(const oclMat &left, const oclMat &right, oclMat &temp, StereoConstantSpaceBP &rthis,
-                uchar *disp_selected_pyr, uchar *data_cost_selected,
-                size_t msg_step, int h, int w, int level, int nr_plane)
-            {
-
-                if(level <= 1)
-                    init_data_cost_caller(left, right, temp, rthis, msg_step, h, w, level);
-                else
-                    init_data_cost_reduce_caller(left, right, temp, rthis, msg_step, h, w, level);
-
-                if(rthis.use_local_init_data_cost == true)
-                {
-                    get_first_initial_local_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w, nr_plane, msg_step);
-                }
-                else
-                {
-                    get_first_initial_global_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w,
-                        nr_plane, msg_step);
-                }
-            }
-
-            ///////////////////////////////////////////////////////////////////////////////////////////////////
-            ///////////////////////////////////compute_data_cost//////////////////////////////////////////////
-            ////////////////////////////////////////////////////////////////////////////////////////////////
-            static void compute_data_cost_caller(uchar *disp_selected_pyr, uchar *data_cost,
-                StereoConstantSpaceBP &rthis, int msg_step1,
-                int msg_step2, const oclMat &left, const oclMat &right, int h,
-                int w, int h2, int level, int nr_plane)
-            {
-                Context  *clCxt = left.clCxt;
-                int channels = left.oclchannels();
-                int data_type = rthis.msg_type;
-
-                String kernelName = get_kernel_name("compute_data_cost_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-
-                size_t localThreads[]  = { 32, 8, 1 };
-                size_t globalThreads[] = { roundUp(w, localThreads[0]), roundUp(h, localThreads[1]), 1 };
-
-                int disp_step1 = msg_step1 * h;
-                int disp_step2 = msg_step2 * h2;
-                openCLVerifyKernel(clCxt, kernel, localThreads);
-                openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem),  (void *)&disp_selected_pyr));
-                openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem),  (void *)&data_cost));
-                openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem),  (void *)&left.data));
-                openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem),  (void *)&right.data));
-                openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_int),  (void *)&h));
-                openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_int),  (void *)&w));
-                openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_int),  (void *)&level));
-                openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int),  (void *)&nr_plane));
-                openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int),  (void *)&channels));
-                openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int),  (void *)&msg_step1));
-                openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int),  (void *)&msg_step2));
-                openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int),  (void *)&disp_step1));
-                openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int),  (void *)&disp_step2));
-                openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_float), (void *)&rthis.data_weight));
-                openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term));
-                openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int),  (void *)&left.step));
-                openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int),  (void *)&rthis.min_disp_th));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-            static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost,
-                StereoConstantSpaceBP &rthis, int msg_step1,
-                int msg_step2, const oclMat &left, const oclMat &right, int h,
-                int w, int h2, int level, int nr_plane)
-            {
-                Context  *clCxt = left.clCxt;
-                int data_type = rthis.msg_type;
-                int channels = left.oclchannels();
-                int win_size = (int)std::pow(2.f, level);
-
-                String kernelName = get_kernel_name("compute_data_cost_reduce_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-
-                const size_t threadsNum = 256;
-                //size_t blockSize = threadsNum;
-                size_t localThreads[3]  = { win_size, 1, threadsNum / win_size };
-                size_t globalThreads[3] = { w *localThreads[0],
-                    h * divUp(nr_plane, localThreads[2]) *localThreads[1], 1 * localThreads[2]
-                };
-
-                int disp_step1 = msg_step1 * h;
-                int disp_step2 = msg_step2 * h2;
-                size_t local_mem_size = threadsNum * sizeof(float);
-                openCLVerifyKernel(clCxt, kernel, localThreads);
-                openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem),  (void *)&disp_selected_pyr));
-                openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem),  (void *)&data_cost));
-                openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem),  (void *)&left.data));
-                openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem),  (void *)&right.data));
-                openCLSafeCall(clSetKernelArg(kernel, 4, local_mem_size,   (void *)NULL));
-                openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_int),  (void *)&level));
-                openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_int),  (void *)&left.rows));
-                openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int),  (void *)&left.cols));
-                openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int),  (void *)&h));
-                openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int),  (void *)&nr_plane));
-                openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int),  (void *)&channels));
-                openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int),  (void *)&win_size));
-                openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int),  (void *)&msg_step1));
-                openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int),  (void *)&msg_step2));
-                openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int),  (void *)&disp_step1));
-                openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int),  (void *)&disp_step2));
-                openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_float), (void *)&rthis.data_weight));
-                openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term));
-                openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int),  (void *)&left.step));
-                openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int),  (void *)&rthis.min_disp_th));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-            static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis,
-                int msg_step1, int msg_step2, const oclMat &left, const oclMat &right, int h, int w,
-                int h2, int level, int nr_plane)
-            {
-                if(level <= 1)
-                    compute_data_cost_caller(disp_selected_pyr, data_cost, rthis, msg_step1, msg_step2,
-                    left, right, h, w, h2, level, nr_plane);
-                else
-                    compute_data_cost_reduce_caller(disp_selected_pyr, data_cost, rthis,  msg_step1, msg_step2,
-                    left, right, h, w, h2, level, nr_plane);
-            }
-            ////////////////////////////////////////////////////////////////////////////////////////////////
-            //////////////////////////////////////init message//////////////////////////////////////////////
-            ////////////////////////////////////////////////////////////////////////////////////////////////
-            static void init_message(uchar *u_new, uchar *d_new, uchar *l_new, uchar *r_new,
-                uchar *u_cur, uchar *d_cur, uchar *l_cur, uchar *r_cur,
-                uchar *disp_selected_pyr_new, uchar *disp_selected_pyr_cur,
-                uchar *data_cost_selected, uchar *data_cost, oclMat &temp, StereoConstantSpaceBP rthis,
-                size_t msg_step1, size_t msg_step2, int h, int w, int nr_plane,
-                int h2, int w2, int nr_plane2)
-            {
-                Context  *clCxt = temp.clCxt;
-                int data_type = rthis.msg_type;
-
-                String kernelName = get_kernel_name("init_message_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-
-                //size_t blockSize = 256;
-                size_t localThreads[]  = {32, 8, 1};
-                size_t globalThreads[] = { roundUp(w, localThreads[0]), roundUp(h, localThreads[1]), 1 };
-
-                int disp_step1 = msg_step1 * h;
-                int disp_step2 = msg_step2 * h2;
-                openCLVerifyKernel(clCxt, kernel, localThreads);
-                openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem), (void *)&u_new));
-                openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem), (void *)&d_new));
-                openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem), (void *)&l_new));
-                openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem), (void *)&r_new));
-                openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_mem), (void *)&u_cur));
-                openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_mem), (void *)&d_cur));
-                openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_mem), (void *)&l_cur));
-                openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_mem), (void *)&r_cur));
-                openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_mem), (void *)&temp.data));
-                openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_mem), (void *)&disp_selected_pyr_new));
-                openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *)&disp_selected_pyr_cur));
-                openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *)&data_cost_selected));
-                openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *)&data_cost));
-                openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&h));
-                openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&w));
-                openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&nr_plane));
-                openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&h2));
-                openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_int), (void *)&w2));
-                openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&nr_plane2));
-                openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&disp_step1));
-                openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2));
-                openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1));
-                openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-            ////////////////////////////////////////////////////////////////////////////////////////////////
-            ///////////////////////////calc_all_iterations////////////////////////////////////////////////
-            //////////////////////////////////////////////////////////////////////////////////////////////
-            static void calc_all_iterations_caller(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
-                uchar *disp_selected_pyr, oclMat &temp, StereoConstantSpaceBP rthis,
-                int msg_step, int h, int w, int nr_plane, int i)
-            {
-                Context  *clCxt = temp.clCxt;
-                int data_type = rthis.msg_type;
-
-                String kernelName = get_kernel_name("compute_message_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-                size_t localThreads[]  = {32, 8, 1};
-                size_t globalThreads[] = {divUp(w, (localThreads[0]) << 1) *localThreads[0],
-                    divUp(h, localThreads[1]) *localThreads[1],
-                    1
-                };
-
-                int disp_step = msg_step * h;
-                openCLVerifyKernel(clCxt, kernel, localThreads);
-                openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem),  (void *)&u));
-                openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem),  (void *)&d));
-                openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem),  (void *)&l));
-                openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem),  (void *)&r));
-                openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_mem),  (void *)&data_cost_selected));
-                openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_mem),  (void *)&disp_selected_pyr));
-                openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_mem),  (void *)&temp.data));
-                openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int),  (void *)&h));
-                openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int),  (void *)&w));
-                openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int),  (void *)&nr_plane));
-                openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int),  (void *)&i));
-                openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_float), (void *)&rthis.max_disc_term));
-                openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int),  (void *)&disp_step));
-                openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int),  (void *)&msg_step));
-                openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-            static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
-                uchar *disp_selected_pyr, oclMat &temp, StereoConstantSpaceBP rthis,
-                int msg_step, int h, int w, int nr_plane)
-            {
-                for(int t = 0; t < rthis.iters; t++)
-                    calc_all_iterations_caller(u, d, l, r, data_cost_selected, disp_selected_pyr, temp, rthis,
-                    msg_step, h, w, nr_plane, t & 1);
-            }
-
-            ///////////////////////////////////////////////////////////////////////////////////////////////
-            //////////////////////////compute_disp////////////////////////////////////////////////////////
-            /////////////////////////////////////////////////////////////////////////////////////////////
-            static void compute_disp(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
-                uchar *disp_selected_pyr, StereoConstantSpaceBP &rthis, size_t msg_step,
-                oclMat &disp, int nr_plane)
-            {
-                Context  *clCxt = disp.clCxt;
-                int data_type = rthis.msg_type;
-
-                String kernelName = get_kernel_name("compute_disp_", data_type);
-
-                cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
-
-                //size_t blockSize = 256;
-                size_t localThreads[]  = { 32, 8, 1 };
-                size_t globalThreads[] = { roundUp(disp.cols, localThreads[0]), roundUp(disp.rows, localThreads[1]), 1 };
-
-                int step_size = disp.step / disp.elemSize();
-                int disp_step = disp.rows * msg_step;
-                openCLVerifyKernel(clCxt, kernel, localThreads);
-                openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem), (void *)&u));
-                openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem), (void *)&d));
-                openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem), (void *)&l));
-                openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem), (void *)&r));
-                openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_mem), (void *)&data_cost_selected));
-                openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_mem), (void *)&disp_selected_pyr));
-                openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_mem), (void *)&disp.data));
-                openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int), (void *)&step_size));
-                openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int), (void *)&disp.cols));
-                openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int), (void *)&disp.rows));
-                openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane));
-                openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step));
-                openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
-                openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
-                    globalThreads, localThreads, 0, NULL, NULL));
-
-                clFinish(*(cl_command_queue*)getClCommandQueuePtr());
-                openCLSafeCall(clReleaseKernel(kernel));
-            }
-        }
-    }
-}
-namespace
-{
-    const float DEFAULT_MAX_DATA_TERM = 30.0f;
-    const float DEFAULT_DATA_WEIGHT = 1.0f;
-    const float DEFAULT_MAX_DISC_TERM = 160.0f;
-    const float DEFAULT_DISC_SINGLE_JUMP = 10.0f;
-}
-
-void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane)
-{
-    ndisp = (int) ((float) width / 3.14f);
-    if ((ndisp & 1) != 0)
-        ndisp++;
-
-    int mm = ::max(width, height);
-    iters = mm / 100 + ((mm > 1200) ? - 4 : 4);
-
-    levels = (int)::log(static_cast<double>(mm)) * 2 / 3;
-    if (levels == 0) levels++;
-
-    nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1));
-}
-
-cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
-    int msg_type_)
-
-    : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
-    max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
-    max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0),
-    msg_type(msg_type_), use_local_init_data_cost(true)
-{
-    CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
-}
-
-
-cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
-    float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_,
-    int min_disp_th_, int msg_type_)
-    : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
-    max_data_term(max_data_term_), data_weight(data_weight_),
-    max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_),
-    msg_type(msg_type_), use_local_init_data_cost(true)
-{
-    CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
-}
-
-template<class T>
-static void csbp_operator(StereoConstantSpaceBP &rthis, oclMat u[2], oclMat d[2], oclMat l[2], oclMat r[2],
-    oclMat disp_selected_pyr[2], oclMat &data_cost, oclMat &data_cost_selected,
-    oclMat &temp, oclMat &out, const oclMat &left, const oclMat &right, oclMat &disp)
-{
-    CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
-        && left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
-
-    CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3));
-
-    const Scalar zero = Scalar::all(0);
-
-    ////////////////////////////////////Init///////////////////////////////////////////////////
-    int rows = left.rows;
-    int cols = left.cols;
-
-    rthis.levels = min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0)));
-    int levels = rthis.levels;
-
-    AutoBuffer<int> buf(levels * 4);
-
-    int *cols_pyr = buf;
-    int *rows_pyr = cols_pyr + levels;
-    int *nr_plane_pyr = rows_pyr + levels;
-    int *step_pyr = nr_plane_pyr + levels;
-
-    cols_pyr[0] = cols;
-    rows_pyr[0] = rows;
-    nr_plane_pyr[0] = rthis.nr_plane;
-
-    const int n = 64;
-    step_pyr[0] = alignSize(cols * sizeof(T), n) / sizeof(T);
-    for (int i = 1; i < levels; i++)
-    {
-        cols_pyr[i] = cols_pyr[i - 1]  / 2;
-        rows_pyr[i] = rows_pyr[i - 1]/ 2;
-
-        nr_plane_pyr[i] = nr_plane_pyr[i - 1] * 2;
-
-        step_pyr[i] = alignSize(cols_pyr[i] * sizeof(T), n) / sizeof(T);
-    }
-
-    Size msg_size(step_pyr[0], rows * nr_plane_pyr[0]);
-    Size data_cost_size(step_pyr[0], rows * nr_plane_pyr[0] * 2);
-
-    u[0].create(msg_size, DataType<T>::type);
-    d[0].create(msg_size, DataType<T>::type);
-    l[0].create(msg_size, DataType<T>::type);
-    r[0].create(msg_size, DataType<T>::type);
-
-    u[1].create(msg_size, DataType<T>::type);
-    d[1].create(msg_size, DataType<T>::type);
-    l[1].create(msg_size, DataType<T>::type);
-    r[1].create(msg_size, DataType<T>::type);
-
-    disp_selected_pyr[0].create(msg_size, DataType<T>::type);
-    disp_selected_pyr[1].create(msg_size, DataType<T>::type);
-
-    data_cost.create(data_cost_size, DataType<T>::type);
-    data_cost_selected.create(msg_size, DataType<T>::type);
-
-    Size temp_size = data_cost_size;
-    if (data_cost_size.width * data_cost_size.height < step_pyr[0] * rows_pyr[levels - 1] * rthis.ndisp)
-        temp_size = Size(step_pyr[0], rows_pyr[levels - 1] * rthis.ndisp);
-
-    temp.create(temp_size, DataType<T>::type);
-    temp = zero;
-
-    ///////////////////////////////// Compute////////////////////////////////////////////////
-
-    //csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight,
-    //   rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
-
-    l[0] = zero;
-    d[0] = zero;
-    r[0] = zero;
-    u[0] = zero;
-    disp_selected_pyr[0] = zero;
-
-    l[1] = zero;
-    d[1] = zero;
-    r[1] = zero;
-    u[1] = zero;
-    disp_selected_pyr[1] = zero;
-
-    data_cost = zero;
-
-    data_cost_selected = zero;
-
-    int cur_idx = 0;
-
-    for (int i = levels - 1; i >= 0; i--)
-    {
-        if (i == levels - 1)
-        {
-            cv::ocl::stereoCSBP::init_data_cost(left, right, temp, rthis, disp_selected_pyr[cur_idx].data,
-                data_cost_selected.data, step_pyr[0], rows_pyr[i], cols_pyr[i],
-                i, nr_plane_pyr[i]);
-        }
-        else
-        {
-            cv::ocl::stereoCSBP::compute_data_cost(
-                disp_selected_pyr[cur_idx].data, data_cost.data, rthis, step_pyr[0],
-                step_pyr[0], left, right, rows_pyr[i], cols_pyr[i], rows_pyr[i + 1], i,
-                nr_plane_pyr[i + 1]);
-
-            int new_idx = (cur_idx + 1) & 1;
-
-            cv::ocl::stereoCSBP::init_message(u[new_idx].data, d[new_idx].data, l[new_idx].data, r[new_idx].data,
-                u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
-                disp_selected_pyr[new_idx].data, disp_selected_pyr[cur_idx].data,
-                data_cost_selected.data, data_cost.data, temp, rthis, step_pyr[0],
-                step_pyr[0], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rows_pyr[i + 1],
-                cols_pyr[i + 1], nr_plane_pyr[i + 1]);
-            cur_idx = new_idx;
-        }
-        cv::ocl::stereoCSBP::calc_all_iterations(u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
-            data_cost_selected.data, disp_selected_pyr[cur_idx].data, temp,
-            rthis, step_pyr[0], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i]);
-    }
-
-    if (disp.empty())
-        disp.create(rows, cols, CV_16S);
-
-    out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
-    out = zero;
-
-    stereoCSBP::compute_disp(u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
-        data_cost_selected.data, disp_selected_pyr[cur_idx].data, rthis, step_pyr[0],
-        out, nr_plane_pyr[0]);
-    if (disp.type() != CV_16S)
-        out.convertTo(disp, disp.type());
-}
-
-
-typedef void (*csbp_operator_t)(StereoConstantSpaceBP &rthis, oclMat u[2], oclMat d[2], oclMat l[2], oclMat r[2],
-    oclMat disp_selected_pyr[2], oclMat &data_cost, oclMat &data_cost_selected,
-    oclMat &temp, oclMat &out, const oclMat &left, const oclMat &right, oclMat &disp);
-
-const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
-
-void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &left, const oclMat &right, oclMat &disp)
-{
-
-    CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
-    operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out,
-        left, right, disp);
-}
diff --git a/modules/ocl/src/stereobm.cpp b/modules/ocl/src/stereobm.cpp
deleted file mode 100644
index 4bfa80f..0000000
--- a/modules/ocl/src/stereobm.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Xiaopeng Fu, xiaopeng@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-namespace cv
-{
-namespace ocl
-{
-namespace stereoBM
-{
-/////////////////////////////////////////////////////////////////////////
-//////////////////////////prefilter_xsbel////////////////////////////////
-////////////////////////////////////////////////////////////////////////
-static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap)
-{
-    String kernelName = "prefilter_xsobel";
-
-    size_t blockSize = 1;
-    size_t globalThreads[3] = { input.cols, input.rows, 1 };
-    size_t localThreads[3]  = { blockSize, blockSize, 1 };
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&input.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&output.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&input.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&input.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&prefilterCap));
-
-    openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
-        globalThreads, localThreads, args, -1, -1);
-}
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////common////////////////////////////////////
-////////////////////////////////////////////////////////////////////////
-#define N_DISPARITIES 8
-#define ROWSperTHREAD 21
-#define BLOCK_W 128
-
-////////////////////////////////////////////////////////////////////////////
-///////////////////////////////stereoBM_GPU////////////////////////////////
-////////////////////////////////////////////////////////////////////////////
-static void stereo_bm(const oclMat &left, const oclMat &right,  oclMat &disp,
-               int maxdisp, int winSize,  oclMat &minSSD_buf)
-{
-    int winsz2 = winSize >> 1;
-
-    String kernelName = "stereoKernel";
-
-    disp.setTo(Scalar_<unsigned char>::all(0));
-    minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF));
-
-    size_t minssd_step = minSSD_buf.step / minSSD_buf.elemSize();
-    size_t local_mem_size = (N_DISPARITIES * (BLOCK_W + 2 * winsz2)) *
-                            sizeof(cl_uint);
-    //size_t blockSize = 1;
-    size_t localThreads[]  = { BLOCK_W, 1, 1 };
-    size_t globalThreads[] = { left.cols - maxdisp - 2 * winsz2,
-                               divUp(left.rows - 2 * winsz2, ROWSperTHREAD),
-                               1 };
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&right.data));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&minSSD_buf.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&minssd_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&disp.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&disp.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&maxdisp));
-    args.push_back(std::make_pair(local_mem_size, (void *)NULL));
-
-    char opt [128];
-    sprintf(opt, "-D radius=%d", winsz2);
-    openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
-        globalThreads, localThreads, args, -1, -1, opt);
-}
-////////////////////////////////////////////////////////////////////////////
-///////////////////////////////postfilter_textureness///////////////////////
-////////////////////////////////////////////////////////////////////////////
-static void postfilter_textureness(oclMat &left, int winSize,
-                            float avergeTexThreshold, oclMat &disparity)
-{
-    String kernelName = "textureness_kernel";
-
-    size_t blockSize = 1;
-    size_t localThreads[]  = { BLOCK_W, blockSize ,1};
-    size_t globalThreads[] = { left.cols,
-                               divUp(left.rows, 2 * ROWSperTHREAD),
-                               1 };
-
-    size_t local_mem_size = (localThreads[0] + localThreads[0] + (winSize / 2) * 2) * sizeof(float);
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&disparity.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&winSize));
-    args.push_back(std::make_pair(sizeof(cl_float), (void *)&avergeTexThreshold));
-    args.push_back(std::make_pair(local_mem_size, (void*)NULL));
-    openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
-        globalThreads, localThreads, args, -1, -1);
-}
-//////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////operator/////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////
-static void operator_(oclMat &minSSD, oclMat &leBuf, oclMat &riBuf, int preset, int ndisp,
-               int winSize, float avergeTexThreshold, const oclMat &left,
-               const oclMat &right, oclMat &disparity)
-
-{
-    CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
-    CV_DbgAssert(left.type() == CV_8UC1);
-    CV_DbgAssert(right.type() == CV_8UC1);
-
-    disparity.create(left.size(), CV_8UC1);
-    minSSD.create(left.size(), CV_32SC1);
-
-    oclMat le_for_bm =  left;
-    oclMat ri_for_bm = right;
-
-    if (preset == cv::ocl::StereoBM_OCL::PREFILTER_XSOBEL)
-    {
-        leBuf.create( left.size(),  left.type());
-        riBuf.create(right.size(), right.type());
-
-        prefilter_xsobel( left, leBuf, 31);
-        prefilter_xsobel(right, riBuf, 31);
-
-        le_for_bm = leBuf;
-        ri_for_bm = riBuf;
-    }
-
-    stereo_bm(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD);
-
-    if (avergeTexThreshold)
-    {
-        postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity);
-    }
-}
-}
-}
-}
-const float defaultAvgTexThreshold = 3;
-
-cv::ocl::StereoBM_OCL::StereoBM_OCL()
-    : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ),
-      avergeTexThreshold(defaultAvgTexThreshold)  {}
-
-cv::ocl::StereoBM_OCL::StereoBM_OCL(int preset_, int ndisparities_, int winSize_)
-    : preset(preset_), ndisp(ndisparities_), winSize(winSize_),
-      avergeTexThreshold(defaultAvgTexThreshold)
-{
-    const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);
-    CV_Assert(0 < ndisp && ndisp <= max_supported_ndisp);
-    CV_Assert(ndisp % 8 == 0);
-    CV_Assert(winSize % 2 == 1);
-}
-
-bool cv::ocl::StereoBM_OCL::checkIfGpuCallReasonable()
-{
-    return true;
-}
-
-void cv::ocl::StereoBM_OCL::operator() ( const oclMat &left, const oclMat &right,
-        oclMat &disparity)
-{
-    cv::ocl::stereoBM::operator_(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity);
-}
diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp
deleted file mode 100644
index a564c3d..0000000
--- a/modules/ocl/src/stereobp.cpp
+++ /dev/null
@@ -1,502 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Peng Xiao,   pengxiao@outlook.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-namespace cv
-{
-    namespace ocl
-    {
-        namespace stereoBP
-        {
-            //////////////////////////////////////////////////////////////////////////
-            //////////////////////////////common////////////////////////////////////
-            ////////////////////////////////////////////////////////////////////////
-            typedef struct
-            {
-                int   cndisp;
-                float cmax_data_term;
-                float cdata_weight;
-                float cmax_disc_term;
-                float cdisc_single_jump;
-            } con_struct_t;
-
-            cl_mem cl_con_struct =  NULL;
-            static void load_constants(int ndisp, float max_data_term, float data_weight,
-                                float max_disc_term, float disc_single_jump)
-            {
-                con_struct_t *con_struct = new con_struct_t;
-                con_struct -> cndisp            = ndisp;
-                con_struct -> cmax_data_term    = max_data_term;
-                con_struct -> cdata_weight      = data_weight;
-                con_struct -> cmax_disc_term    = max_disc_term;
-                con_struct -> cdisc_single_jump = disc_single_jump;
-
-                Context* clCtx = Context::getContext();
-                cl_context clContext = *(cl_context*)(clCtx->getOpenCLContextPtr());
-                cl_command_queue clCmdQueue = *(cl_command_queue*)(clCtx->getOpenCLCommandQueuePtr());
-                cl_con_struct = load_constant(clContext, clCmdQueue, (void *)con_struct,
-                                              sizeof(con_struct_t));
-
-                delete con_struct;
-            }
-            static void release_constants()
-            {
-                openCLFree(cl_con_struct);
-            }
-
-            /////////////////////////////////////////////////////////////////////////////
-            ///////////////////////////comp data////////////////////////////////////////
-            /////////////////////////////////////////////////////////////////////////
-            static void  comp_data_call(const oclMat &left, const oclMat &right, oclMat &data, int /*disp*/,
-                float /*cmax_data_term*/, float /*cdata_weight*/)
-            {
-                Context  *clCxt = left.clCxt;
-                int channels = left.oclchannels();
-                int data_type = data.type();
-
-                String kernelName = "comp_data";
-
-                std::vector<std::pair<size_t , const void *> > args;
-
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&left.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&left.rows));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&left.cols));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&left.step));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&right.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&right.step));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&data.step));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&cl_con_struct));
-
-                size_t gt[3] = {left.cols, left.rows, 1}, lt[3] = {16, 16, 1};
-
-                const int OPT_SIZE = 50;
-                char cn_opt [OPT_SIZE] = "";
-                sprintf( cn_opt, "%s -D CN=%d",
-                    (data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"),
-                    channels
-                    );
-                openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, cn_opt);
-            }
-            ///////////////////////////////////////////////////////////////////////////////////
-            /////////////////////////data set down////////////////////////////////////////////
-            /////////////////////////////////////////////////////////////////////////////////
-            static void data_step_down_call(int dst_cols, int dst_rows, int src_rows,
-                const oclMat &src, oclMat &dst, int disp)
-            {
-                Context  *clCxt = src.clCxt;
-                int data_type = src.type();
-
-                String kernelName = "data_step_down";
-
-                std::vector<std::pair<size_t , const void *> > args;
-
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_rows));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_rows));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_cols));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&disp));
-
-                size_t gt[3] = {dst_cols, dst_rows, 1}, lt[3] = {16, 16, 1};
-                const char* t_opt  = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT";
-                openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt);
-            }
-            /////////////////////////////////////////////////////////////////////////////////
-            ///////////////////////////live up message////////////////////////////////////////
-            /////////////////////////////////////////////////////////////////////////////////
-            static void level_up_message_call(int dst_cols, int dst_rows, int src_rows,
-                oclMat &src, oclMat &dst, int ndisp)
-            {
-                Context  *clCxt = src.clCxt;
-                int data_type = src.type();
-
-                String kernelName = "level_up_message";
-                std::vector<std::pair<size_t , const void *> > args;
-
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_rows));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_rows));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_cols));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&ndisp));
-
-                size_t gt[3] = {dst_cols, dst_rows, 1}, lt[3] = {16, 16, 1};
-                const char* t_opt  = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT";
-                openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt);
-            }
-            static void level_up_messages_calls(int dst_idx, int dst_cols, int dst_rows, int src_rows,
-                                         oclMat *mus, oclMat *mds, oclMat *mls, oclMat *mrs,
-                                         int ndisp)
-            {
-                int src_idx = (dst_idx + 1) & 1;
-
-                level_up_message_call(dst_cols, dst_rows, src_rows,
-                                      mus[src_idx], mus[dst_idx], ndisp);
-
-                level_up_message_call(dst_cols, dst_rows, src_rows,
-                                      mds[src_idx], mds[dst_idx], ndisp);
-
-                level_up_message_call(dst_cols, dst_rows, src_rows,
-                                      mls[src_idx], mls[dst_idx], ndisp);
-
-                level_up_message_call(dst_cols, dst_rows, src_rows,
-                                      mrs[src_idx], mrs[dst_idx], ndisp);
-            }
-            //////////////////////////////////////////////////////////////////////////////////
-            //////////////////////////////cals_all_iterations_call///////////////////////////
-            /////////////////////////////////////////////////////////////////////////////////
-            static void calc_all_iterations_call(int cols, int rows, oclMat &u, oclMat &d,
-                oclMat &l, oclMat &r, oclMat &data,
-                int t, int cndisp, float cmax_disc_term,
-                float cdisc_single_jump)
-            {
-                Context  *clCxt = l.clCxt;
-                int data_type = u.type();
-
-                String kernelName = "one_iteration";
-
-                std::vector<std::pair<size_t , const void *> > args;
-
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&u.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&u.step));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&data.step));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&d.data));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&l.data));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&r.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&rows));
-                args.push_back( std::make_pair( sizeof(cl_float) , (void *)&cmax_disc_term));
-                args.push_back( std::make_pair( sizeof(cl_float) , (void *)&cdisc_single_jump));
-
-                size_t gt[3] = {cols, rows, 1}, lt[3] = {16, 16, 1};
-                char opt[80] = "";
-                sprintf(opt, "-D %s -D CNDISP=%d", data_type == CV_16S ? "T_SHORT":"T_FLOAT", cndisp);
-                openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, opt);
-            }
-
-            static void calc_all_iterations_calls(int cols, int rows, int iters, oclMat &u,
-                                           oclMat &d, oclMat &l, oclMat &r,
-                                           oclMat &data, int cndisp, float cmax_disc_term,
-                                           float cdisc_single_jump)
-            {
-                for(int t = 0; t < iters; ++t)
-                    calc_all_iterations_call(cols, rows, u, d, l, r, data, t, cndisp,
-                                             cmax_disc_term, cdisc_single_jump);
-            }
-            ///////////////////////////////////////////////////////////////////////////////
-            ///////////////////////output///////////////////////////////////////////////////
-            ////////////////////////////////////////////////////////////////////////////////
-            static void output_call(const oclMat &u, const oclMat &d, const oclMat l, const oclMat &r,
-                const oclMat &data, oclMat &disp, int ndisp)
-            {
-                Context  *clCxt = u.clCxt;
-                int data_type = u.type();
-
-                String kernelName = "output";
-
-                std::vector<std::pair<size_t , const void *> > args;
-
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&u.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&u.step));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&d.data));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&l.data));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&r.data));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data));
-                args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&disp.data));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&disp.rows));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&disp.cols));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&disp.step));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&ndisp));
-
-                size_t gt[3] = {disp.cols, disp.rows, 1}, lt[3] = {16, 16, 1};
-                const char* t_opt  = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT";
-                openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt);
-            }
-        }
-    }
-}
-namespace
-{
-    const float DEFAULT_MAX_DATA_TERM = 10.0f;
-    const float DEFAULT_DATA_WEIGHT = 0.07f;
-    const float DEFAULT_MAX_DISC_TERM = 1.7f;
-    const float DEFAULT_DISC_SINGLE_JUMP = 1.0f;
-}
-
-void cv::ocl::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels)
-{
-    ndisp = width / 4;
-    if ((ndisp & 1) != 0)
-        ndisp++;
-
-    int mm = ::max(width, height);
-    iters = mm / 100 + 2;
-
-    levels = (int)(::log(static_cast<double>(mm)) + 1) * 4 / 5;
-    if (levels == 0) levels++;
-}
-
-cv::ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, int msg_type_)
-    : ndisp(ndisp_), iters(iters_), levels(levels_),
-      max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
-      max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP),
-      msg_type(msg_type_), datas(levels_)
-{
-}
-
-cv::ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_, int msg_type_)
-    : ndisp(ndisp_), iters(iters_), levels(levels_),
-      max_data_term(max_data_term_), data_weight(data_weight_),
-      max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_),
-      msg_type(msg_type_), datas(levels_)
-{
-}
-
-namespace
-{
-    class StereoBeliefPropagationImpl
-    {
-    public:
-        StereoBeliefPropagationImpl(StereoBeliefPropagation &rthis_,
-                                    oclMat &u_, oclMat &d_, oclMat &l_, oclMat &r_,
-                                    oclMat &u2_, oclMat &d2_, oclMat &l2_, oclMat &r2_,
-                                    std::vector<oclMat> &datas_, oclMat &out_)
-            : rthis(rthis_), u(u_), d(d_), l(l_), r(r_), u2(u2_), d2(d2_), l2(l2_), r2(r2_), datas(datas_), out(out_),
-              zero(Scalar::all(0)), scale(rthis_.msg_type == CV_32F ? 1.0f : 10.0f)
-        {
-            CV_Assert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels);
-            CV_Assert(rthis.msg_type == CV_32F || rthis.msg_type == CV_16S);
-            CV_Assert(rthis.msg_type == CV_32F || (1 << (rthis.levels - 1)) * scale * rthis.max_data_term < std::numeric_limits<short>::max());
-        }
-
-        void operator()(const oclMat &left, const oclMat &right, oclMat &disp)
-        {
-            CV_Assert(left.size() == right.size() && left.type() == right.type());
-            CV_Assert(left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4);
-
-            rows = left.rows;
-            cols = left.cols;
-
-            int divisor = (int)pow(2.f, rthis.levels - 1.0f);
-            int lowest_cols = cols / divisor;
-            int lowest_rows = rows / divisor;
-            const int min_image_dim_size = 2;
-            CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size);
-
-            init();
-
-            datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type);
-            datas[0].setTo(Scalar_<short>::all(0));
-
-            cv::ocl::stereoBP::comp_data_call(left, right, datas[0], rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight);
-            calcBP(disp);
-        }
-
-        void operator()(const oclMat &data, oclMat &disp)
-        {
-            CV_Assert((data.type() == rthis.msg_type) && (data.rows % rthis.ndisp == 0));
-
-            rows = data.rows / rthis.ndisp;
-            cols = data.cols;
-
-            int divisor = (int)pow(2.f, rthis.levels - 1.0f);
-            int lowest_cols = cols / divisor;
-            int lowest_rows = rows / divisor;
-            const int min_image_dim_size = 2;
-            CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size);
-
-            init();
-
-            datas[0] = data;
-
-            calcBP(disp);
-        }
-    private:
-        void init()
-        {
-            u.create(rows * rthis.ndisp, cols, rthis.msg_type);
-            d.create(rows * rthis.ndisp, cols, rthis.msg_type);
-            l.create(rows * rthis.ndisp, cols, rthis.msg_type);
-            r.create(rows * rthis.ndisp, cols, rthis.msg_type);
-
-            if (rthis.levels & 1)
-            {
-                //can clear less area
-                u = zero;
-                d = zero;
-                l = zero;
-                r = zero;
-            }
-
-            if (rthis.levels > 1)
-            {
-                int less_rows = (rows + 1) / 2;
-                int less_cols = (cols + 1) / 2;
-
-                u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
-                d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
-                l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
-                r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
-
-                if ((rthis.levels & 1) == 0)
-                {
-                    u2 = zero;
-                    d2 = zero;
-                    l2 = zero;
-                    r2 = zero;
-                }
-            }
-
-            cv::ocl::stereoBP::load_constants(rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight,
-                                              scale * rthis.max_disc_term, scale * rthis.disc_single_jump);
-
-            datas.resize(rthis.levels);
-            cols_all.resize(rthis.levels);
-            rows_all.resize(rthis.levels);
-
-            cols_all[0] = cols;
-            rows_all[0] = rows;
-        }
-
-        void calcBP(oclMat &disp)
-        {
-            using namespace cv::ocl::stereoBP;
-
-            for (int i = 1; i < rthis.levels; ++i)
-            {
-                cols_all[i] = (cols_all[i - 1] + 1) / 2;
-                rows_all[i] = (rows_all[i - 1] + 1) / 2;
-
-                datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type);
-                datas[i].setTo(Scalar_<short>::all(0));
-
-                data_step_down_call(cols_all[i], rows_all[i], rows_all[i - 1],
-                                    datas[i - 1], datas[i], rthis.ndisp);
-            }
-
-            oclMat mus[] = {u, u2};
-            oclMat mds[] = {d, d2};
-            oclMat mrs[] = {r, r2};
-            oclMat mls[] = {l, l2};
-
-            int mem_idx = (rthis.levels & 1) ? 0 : 1;
-
-            for (int i = rthis.levels - 1; i >= 0; --i)
-            {
-                // for lower level we have already computed messages by setting to zero
-                if (i != rthis.levels - 1)
-                    level_up_messages_calls(mem_idx, cols_all[i], rows_all[i], rows_all[i + 1],
-                                            mus, mds, mls, mrs, rthis.ndisp);
-
-                calc_all_iterations_calls(cols_all[i], rows_all[i], rthis.iters, mus[mem_idx],
-                                          mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas[i],
-                                          rthis.ndisp, scale * rthis.max_disc_term,
-                                          scale * rthis.disc_single_jump);
-
-                mem_idx = (mem_idx + 1) & 1;
-            }
-            if (disp.empty())
-                disp.create(rows, cols, CV_16S);
-
-            out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
-            out = zero;
-
-            output_call(u, d, l, r, datas.front(), out, rthis.ndisp);
-
-            if (disp.type() != CV_16S)
-                out.convertTo(disp, disp.type());
-
-            release_constants();
-        }
-        StereoBeliefPropagationImpl& operator=(const StereoBeliefPropagationImpl&);
-
-        StereoBeliefPropagation &rthis;
-
-        oclMat &u;
-        oclMat &d;
-        oclMat &l;
-        oclMat &r;
-
-        oclMat &u2;
-        oclMat &d2;
-        oclMat &l2;
-        oclMat &r2;
-
-        std::vector<oclMat> &datas;
-        oclMat &out;
-
-        const Scalar zero;
-        const float scale;
-
-        int rows, cols;
-
-        std::vector<int> cols_all, rows_all;
-    };
-}
-
-void cv::ocl::StereoBeliefPropagation::operator()(const oclMat &left, const oclMat &right, oclMat &disp)
-{
-    ::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
-    impl(left, right, disp);
-}
-
-void cv::ocl::StereoBeliefPropagation::operator()(const oclMat &data, oclMat &disp)
-{
-    ::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
-    impl(data, disp);
-}
diff --git a/modules/ocl/src/svm.cpp b/modules/ocl/src/svm.cpp
deleted file mode 100644
index 8927438..0000000
--- a/modules/ocl/src/svm.cpp
+++ /dev/null
@@ -1,1136 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Erping Pang, erping@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-// TODO Remove this after HAVE_CLAMDBLAS eliminating
-#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 8)
-#  pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-
-using namespace cv;
-using namespace ocl;
-
-namespace cv { namespace ocl {
-
-#if 1
-typedef float Qfloat;
-#define QFLOAT_TYPE CV_32F
-#else
-typedef double Qfloat;
-#define QFLOAT_TYPE CV_64F
-#endif
-
-class CvSVMKernel_ocl: public CvSVMKernel
-{
-public:
-    typedef void (CvSVMKernel_ocl::*Calc_ocl)( int vec_count, const int row_idx, Qfloat* results, Mat& src);
-    CvSVMKernel_ocl(const CvSVMParams* params, Calc_ocl _calc_func , Calc _calc_func1);
-
-    Calc_ocl calc_func_ocl;
-    bool create( const CvSVMParams* params, Calc_ocl _calc_func, Calc _calc_func1);
-
-    void calc( int vcount, const int row_idx, Qfloat* results, Mat& src);
-    void calc_linear( int vec_count, const int row_idx, Qfloat* results, Mat& src);
-
-    void calc_poly( int vec_count, const int row_idx, Qfloat* results, Mat& src);
-    void calc_sigmoid( int vec_count, const int row_idx, Qfloat* results, Mat& src);
-    void calc_non_rbf_base( int vec_count, const int row_idx, Qfloat* results, Mat& src);
-    void calc_rbf( int vec_count, const int row_idx, Qfloat* results, Mat& src);
-};
-
-class CvSVMSolver_ocl: public CvSVMSolver
-{
-public:
-    CvSVMSolver_ocl();
-    CvSVMSolver_ocl(const CvSVMParams *);
-    float* get_row_base( int i, bool* _existed, Mat& src);
-    bool solve_generic( CvSVMSolutionInfo& si );
-    float* get_row( int i, float* dst, Mat& src);
-};
-
-typedef struct CvSparseVecElem32f
-{
-    int idx;
-    float val;
-} CvSparseVecElem32f;
-
-static int icvCmpSparseVecElems( const void* a, const void* b )
-{
-    return ((CvSparseVecElem32f*)a)->idx - ((CvSparseVecElem32f*)b)->idx;
-}
-
-void cvPreparePredictData( const CvArr* sample, int dims_all, const CvMat* comp_idx,
-                           int class_count, const CvMat* prob, float** row_sample,
-                           int as_sparse CV_DEFAULT(0) );
-
-void  cvPreparePredictData( const CvArr* _sample, int dims_all,
-                            const CvMat* comp_idx, int class_count,
-                            const CvMat* prob, float** _row_sample,
-                            int as_sparse )
-{
-    float* row_sample = 0;
-    int* inverse_comp_idx = 0;
-
-    CV_FUNCNAME( "cvPreparePredictData" );
-
-    __CV_BEGIN__;
-
-    const CvMat* sample = (const CvMat*)_sample;
-    float* sample_data;
-    int sample_step;
-    int is_sparse = CV_IS_SPARSE_MAT(sample);
-    int d, sizes[CV_MAX_DIM];
-    int i, dims_selected;
-    int vec_size;
-
-    if( !is_sparse && !CV_IS_MAT(sample) )
-    {
-        CV_ERROR( !sample ? CV_StsNullPtr : CV_StsBadArg, "The sample is not a valid vector" );
-    }
-
-    if( cvGetElemType( sample ) != CV_32FC1 )
-    {
-        CV_ERROR( CV_StsUnsupportedFormat, "Input sample must have 32fC1 type" );
-    }
-
-    CV_CALL( d = cvGetDims( sample, sizes ));
-
-    if( !((is_sparse && d == 1) || (!is_sparse && d == 2 && (sample->rows == 1 || sample->cols == 1))) )
-    {
-        CV_ERROR( CV_StsBadSize, "Input sample must be 1-dimensional vector" );
-    }
-
-    if( d == 1 )
-        sizes[1] = 1;
-
-    if( sizes[0] + sizes[1] - 1 != dims_all )
-        CV_ERROR( CV_StsUnmatchedSizes,
-                  "The sample size is different from what has been used for training" );
-
-    if( !_row_sample )
-    {
-        CV_ERROR( CV_StsNullPtr, "INTERNAL ERROR: The row_sample pointer is NULL" );
-    }
-
-    if( comp_idx && (!CV_IS_MAT(comp_idx) || comp_idx->rows != 1 ||
-                     CV_MAT_TYPE(comp_idx->type) != CV_32SC1) )
-    {
-        CV_ERROR( CV_StsBadArg, "INTERNAL ERROR: invalid comp_idx" );
-    }
-
-    dims_selected = comp_idx ? comp_idx->cols : dims_all;
-
-    if( prob )
-    {
-        if( !CV_IS_MAT(prob) )
-        {
-            CV_ERROR( CV_StsBadArg, "The output matrix of probabilities is invalid" );
-        }
-
-        if( (prob->rows != 1 && prob->cols != 1) ||
-                (CV_MAT_TYPE(prob->type) != CV_32FC1 &&
-                 CV_MAT_TYPE(prob->type) != CV_64FC1) )
-            CV_ERROR( CV_StsBadSize,
-                      "The matrix of probabilities must be 1-dimensional vector of 32fC1 type" );
-
-        if( prob->rows + prob->cols - 1 != class_count )
-            CV_ERROR( CV_StsUnmatchedSizes,
-                      "The vector of probabilities must contain as many elements as "
-                      "the number of classes in the training set" );
-    }
-
-    vec_size = !as_sparse ? dims_selected * sizeof(row_sample[0]) :
-               (dims_selected + 1) * sizeof(CvSparseVecElem32f);
-
-    if( CV_IS_MAT(sample) )
-    {
-        sample_data = sample->data.fl;
-        sample_step = CV_IS_MAT_CONT(sample->type) ? 1 : sample->step / sizeof(row_sample[0]);
-
-        if( !comp_idx && CV_IS_MAT_CONT(sample->type) && !as_sparse )
-            *_row_sample = sample_data;
-        else
-        {
-            CV_CALL( row_sample = (float*)cvAlloc( vec_size ));
-
-            if( !comp_idx )
-                for( i = 0; i < dims_selected; i++ )
-                    row_sample[i] = sample_data[sample_step * i];
-            else
-            {
-                int* comp = comp_idx->data.i;
-                for( i = 0; i < dims_selected; i++ )
-                    row_sample[i] = sample_data[sample_step * comp[i]];
-            }
-
-            *_row_sample = row_sample;
-        }
-
-        if( as_sparse )
-        {
-            const float* src = (const float*)row_sample;
-            CvSparseVecElem32f* dst = (CvSparseVecElem32f*)row_sample;
-
-            dst[dims_selected].idx = -1;
-            for( i = dims_selected - 1; i >= 0; i-- )
-            {
-                dst[i].idx = i;
-                dst[i].val = src[i];
-            }
-        }
-    }
-    else
-    {
-        CvSparseNode* node;
-        CvSparseMatIterator mat_iterator;
-        const CvSparseMat* sparse = (const CvSparseMat*)sample;
-        assert( is_sparse );
-
-        node = cvInitSparseMatIterator( sparse, &mat_iterator );
-        CV_CALL( row_sample = (float*)cvAlloc( vec_size ));
-
-        if( comp_idx )
-        {
-            CV_CALL( inverse_comp_idx = (int*)cvAlloc( dims_all * sizeof(int) ));
-            memset( inverse_comp_idx, -1, dims_all * sizeof(int) );
-            for( i = 0; i < dims_selected; i++ )
-                inverse_comp_idx[comp_idx->data.i[i]] = i;
-        }
-
-        if( !as_sparse )
-        {
-            memset( row_sample, 0, vec_size );
-
-            for( ; node != 0; node = cvGetNextSparseNode(&mat_iterator) )
-            {
-                int idx = *CV_NODE_IDX( sparse, node );
-                if( inverse_comp_idx )
-                {
-                    idx = inverse_comp_idx[idx];
-                    if( idx < 0 )
-                        continue;
-                }
-                row_sample[idx] = *(float*)CV_NODE_VAL( sparse, node );
-            }
-        }
-        else
-        {
-            CvSparseVecElem32f* ptr = (CvSparseVecElem32f*)row_sample;
-
-            for( ; node != 0; node = cvGetNextSparseNode(&mat_iterator) )
-            {
-                int idx = *CV_NODE_IDX( sparse, node );
-                if( inverse_comp_idx )
-                {
-                    idx = inverse_comp_idx[idx];
-                    if( idx < 0 )
-                        continue;
-                }
-                ptr->idx = idx;
-                ptr->val = *(float*)CV_NODE_VAL( sparse, node );
-                ptr++;
-            }
-
-            qsort( row_sample, ptr - (CvSparseVecElem32f*)row_sample,
-                   sizeof(ptr[0]), icvCmpSparseVecElems );
-            ptr->idx = -1;
-        }
-
-        *_row_sample = row_sample;
-    }
-
-    __CV_END__;
-
-    if( inverse_comp_idx )
-        cvFree( &inverse_comp_idx );
-
-    if( cvGetErrStatus() < 0 && _row_sample )
-    {
-        cvFree( &row_sample );
-        *_row_sample = 0;
-    }
-}
-
-float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool returnDFVal ) const
-{
-    assert( kernel );
-
-    (void)row_len;
-
-    int class_count = class_labels ? class_labels->cols :
-                      params.svm_type == ONE_CLASS ? 1 : 0;
-
-    float result = 0;
-    cv::AutoBuffer<float> _buffer(sv_total + (class_count + 1) * 2);
-    float* buffer = _buffer;
-
-    if( params.svm_type == EPS_SVR ||
-            params.svm_type == NU_SVR ||
-            params.svm_type == ONE_CLASS )
-    {
-        CvSVMDecisionFunc* df = (CvSVMDecisionFunc*)decision_func;
-        int i, sv_count = df->sv_count;
-        double sum = -df->rho;
-
-        ((CvSVMKernel_ocl*)kernel)->calc( sv_count, row_index, buffer, src);
-        for( i = 0; i < sv_count; i++ )
-            sum += buffer[i] * df->alpha[i];
-
-        result = params.svm_type == ONE_CLASS ? (float)(sum > 0) : (float)sum;
-    }
-    else if( params.svm_type == C_SVC ||
-             params.svm_type == NU_SVC )
-    {
-        CvSVMDecisionFunc* df = (CvSVMDecisionFunc*)decision_func;
-        int* vote = (int*)(buffer + sv_total);
-        int i, j, k;
-
-        memset( vote, 0, class_count * sizeof(vote[0]));
-        ((CvSVMKernel_ocl*)kernel)->calc( sv_total, row_index, buffer, src);
-        double sum = 0.;
-
-        for( i = 0; i < class_count; i++ )
-            for( j = i + 1; j < class_count; j++, df++ )
-            {
-                sum = -df->rho;
-                int sv_count = df->sv_count;
-                for( k = 0; k < sv_count; k++ )
-                    sum += df->alpha[k] * buffer[df->sv_index[k]];
-
-                vote[sum > 0 ? i : j]++;
-            }
-
-        for( i = 1, k = 0; i < class_count; i++ )
-            if( vote[i] > vote[k] )
-                k = i;
-
-        result = returnDFVal && class_count == 2 ? (float)sum : (float)(class_labels->data.i[k]);
-    }
-    else
-        CV_Error( CV_StsBadArg, "INTERNAL ERROR: Unknown SVM type, "
-                  "the SVM structure is probably corrupted" );
-
-    return result;
-}
-
-float CvSVM_OCL::predict( const Mat& _sample, bool returnDFVal ) const
-{
-    CvMat sample = _sample;
-    return CvSVM::predict(&sample, returnDFVal);
-}
-
-float CvSVM_OCL::predict( const int row_index, Mat& src, bool returnDFVal) const
-{
-    float result = 0;
-
-    result = predict( row_index, get_var_count(), src, returnDFVal);
-
-    return result;
-}
-
-#undef get_C
-#define get_C(i) (C[y[i]>0])
-#undef is_upper_bound
-#define is_upper_bound(i) (alpha_status[i] > 0)
-#undef is_lower_bound
-#define is_lower_bound(i) (alpha_status[i] < 0)
-#undef update_alpha_status
-#define update_alpha_status(i) \
-    alpha_status[i] = (schar)(alpha[i] >= get_C(i) ? 1 : alpha[i] <= 0 ? -1 : 0)
-
-CvSVMSolver_ocl::CvSVMSolver_ocl(const CvSVMParams* _params)
-{
-    params = _params;
-}
-
-float* CvSVMSolver_ocl::get_row( int i, float* dst, Mat& src )
-{
-    bool existed = false;
-    float* row = get_row_base( i, &existed, src);
-    return (this->*get_row_func)( i, row, dst, existed );
-}
-
-float* CvSVMSolver_ocl::get_row_base( int i, bool* _existed, Mat& src )
-{
-    int i1 = i < sample_count ? i : i - sample_count;
-    CvSVMKernelRow* row = rows + i1;
-    bool existed = row->data != 0;
-    Qfloat* data;
-
-    if( existed || cache_size <= 0 )
-    {
-        CvSVMKernelRow* del_row = existed ? row : lru_list.prev;
-        data = del_row->data;
-        assert( data != 0 );
-
-        // delete row from the LRU list
-        del_row->data = 0;
-        del_row->prev->next = del_row->next;
-        del_row->next->prev = del_row->prev;
-    }
-    else
-    {
-        data = (Qfloat*)cvMemStorageAlloc( storage, cache_line_size );
-        cache_size -= cache_line_size;
-    }
-
-    // insert row into the LRU list
-    row->data = data;
-    row->prev = &lru_list;
-    row->next = lru_list.next;
-    row->prev->next = row->next->prev = row;
-
-    if( !existed )
-        ((CvSVMKernel_ocl*)kernel)->calc( sample_count, i1, row->data, src);
-
-    if( _existed )
-        *_existed = existed;
-
-    return row->data;
-}
-
-#ifndef HAVE_CLAMDBLAS
-
-static void matmul_sigmod(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "svm_sigmod";
-    int src_step = (int)src.step / src.elemSize();
-    int src2_step = (int)src2.step / src2.elemSize();
-    int dst_step = (int)dst.step / dst.elemSize();
-    int x = MIN(16, src_rows);
-    int y = MIN(16, src2_cols);
-    size_t localThreads[] = {x, y, 1};
-    size_t globalThreads[] = {src2_cols, src_rows, 1};
-    int width = var_count;
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src2.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&width));
-
-    float alpha = 0.0f, beta = 0.0f;
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-    {
-        alpha = (float)alpha1;
-        beta = (float)beta1;
-        args.push_back(std::make_pair(sizeof(cl_float), (void* )&alpha));
-        args.push_back(std::make_pair(sizeof(cl_float), (void* )&beta));
-    }
-    else
-    {
-        args.push_back(std::make_pair(sizeof(cl_double), (void* )&alpha1));
-        args.push_back(std::make_pair(sizeof(cl_double), (void* )&beta1));
-    }
-    openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-static void matmul_poly(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1, double degree1, bool flag)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "svm_poly";
-    int src_step = (int)src.step / src.elemSize();
-    int src2_step = (int)src2.step / src2.elemSize();
-    int dst_step = (int)dst.step / dst.elemSize();
-    int x = MIN(16, src_rows);
-    int y = MIN(16, src2_cols);
-    size_t localThreads[] = {x, y, 1};
-    size_t globalThreads[] = {src2_cols, src_rows, 1};
-    int width = var_count;
-
-    char build_options[50];
-
-    if(flag)
-    {
-        sprintf(build_options, "-D ADDPOW");
-    }
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src2.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&width));
-
-    float alpha = 0.0f, beta = 0.0f, degree = 0.0f;
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-    {
-        alpha = (float)alpha1;
-        beta = (float)beta1;
-        degree = (float)degree1;
-        args.push_back(std::make_pair(sizeof(cl_float), (void* )&alpha));
-        args.push_back(std::make_pair(sizeof(cl_float), (void* )&beta));
-        args.push_back(std::make_pair(sizeof(cl_float), (void* )&degree));
-    }
-    else
-    {
-        args.push_back(std::make_pair(sizeof(cl_double), (void* )&alpha1));
-        args.push_back(std::make_pair(sizeof(cl_double), (void* )&beta1));
-        args.push_back(std::make_pair(sizeof(cl_double), (void* )&degree1));
-    }
-    openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
-}
-
-static void matmul_linear(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1)
-{
-    Context *clCxt = Context::getContext();
-    String kernelName = "svm_linear";
-    int src_step = (int)src.step / src.elemSize();
-    int src2_step = (int)src2.step / src2.elemSize();
-    int dst_step = (int)dst.step / dst.elemSize();
-    int x = MIN(16, src_rows);
-    int y = MIN(16, src2_cols);
-    size_t localThreads[] = {x, y, 1};
-    size_t globalThreads[] = {src2_cols, src_rows, 1};
-    int width = var_count;
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src2.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&width));
-
-    float alpha = 0.0f, beta = 0.0f;
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-    {
-        alpha = (float)alpha1;
-        beta = (float)beta1;
-        args.push_back(std::make_pair(sizeof(cl_float), (void* )&alpha));
-        args.push_back(std::make_pair(sizeof(cl_float), (void* )&beta));
-    }
-    else
-    {
-        args.push_back(std::make_pair(sizeof(cl_double), (void* )&alpha1));
-        args.push_back(std::make_pair(sizeof(cl_double), (void* )&beta1));
-    }
-    openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-#endif // #ifndef HAVE_CLAMDBLAS
-
-static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, int src2_cols, int var_count, double gamma1, bool flag)
-{
-
-    Context *clCxt = Context::getContext();
-
-    String kernelName = "svm_rbf";
-
-    int width = var_count;
-    int src_step = (int)src.step / src.elemSize();
-    int src_e_step = (int)src_e.step / src_e.elemSize();
-    int dst_step = (int)dst.step / dst.elemSize();
-
-    int x = MIN(16, src_rows);
-    int y = MIN(16, src2_cols);
-    size_t localThreads[] = {x, y, 1};
-    size_t globalThreads[] = {src2_cols,  src_rows, 1};
-    char build_options[50];
-
-    if(flag)
-        sprintf(build_options, "-D ADDEXP");
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src_e.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_e_step));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void* )&width));
-    float gamma = 0.0f;
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-    {
-        gamma = (float)gamma1;
-        args.push_back(std::make_pair(sizeof(cl_float), (void* )&gamma));
-    }
-    else
-        args.push_back(std::make_pair(sizeof(cl_double), (void* )&gamma1));
-
-    openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
-}
-
-float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
-{
-    int var_count = get_var_count();
-    int sample_count = samples->rows;
-
-    //float* row_sample = 0;
-    Mat src_temp = Mat(sample_count, var_count, CV_32FC1);
-    CV_FUNCNAME( "CvSVM::predict" );
-
-
-    for(int i = 0; i < samples->rows; i++)
-    {
-        __CV_BEGIN__;
-        CvMat sample;
-        float* row_sample = 0;
-        cvGetRow( samples, &sample, i );
-        int class_count;
-        if( !kernel )
-        {
-            CV_ERROR( CV_StsBadArg, "The SVM should be trained first" );
-        }
-
-        class_count = class_labels ? class_labels->cols :
-                      params.svm_type == ONE_CLASS ? 1 : 0;
-
-        CV_CALL( cvPreparePredictData(&sample, var_all, var_idx,
-                                      class_count, 0, &row_sample ));
-        for(int j = 0; j < var_count; ++j)
-            src_temp.at<float>(i, j) = row_sample[j];
-        __CV_END__;
-    }
-
-    Mat dst1;
-    double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0;
-    if(params.kernel_type == CvSVM::LINEAR)
-    {
-        alpha1 = 1;
-        beta1 = 0;
-    }
-    if(params.kernel_type == CvSVM::POLY)
-    {
-        alpha1 = params.gamma;
-        beta1 = params.coef0;
-    }
-    if(params.kernel_type == CvSVM::SIGMOID)
-    {
-        alpha1 = - 2 * params.gamma;
-        beta1 = - 2 * params.coef0;
-    }
-    if(params.kernel_type == CvSVM::RBF)
-        gamma1 = - params.gamma;
-
-    Mat sv_temp = Mat(sv_total, var_count, CV_32FC1, Scalar::all(0));
-
-
-    for(int i = 0; i < sv_total; ++i)
-        for(int j = 0; j < var_count; ++j)
-            sv_temp.at<float>(i, j) = sv[i][j];
-
-    oclMat src(sample_count, var_count, CV_32FC1, Scalar::all(0));
-    oclMat sv_;
-
-    src.upload(src_temp);
-    oclMat dst;
-
-#ifdef HAVE_CLAMDBLAS
-
-    dst = oclMat(sample_count, sv_total, CV_32FC1);
-    oclMat src3(sample_count, sv_total, CV_32FC1, Scalar::all(1));
-    if(params.kernel_type != CvSVM::RBF)
-    {
-        Mat sv_temp1;
-        transpose(sv_temp, sv_temp1);
-        sv_.upload(sv_temp1);
-        gemm(src, sv_, alpha1, src3, beta1, dst);
-    }
-
-#else
-    double degree1 = 0.0;
-    if (params.kernel_type == CvSVM::POLY)
-        degree1 = params.degree;
-
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-        dst = oclMat(sample_count, sv_total, CV_32FC1);
-    else
-        dst = oclMat(sample_count, sv_total, CV_64FC1);
-
-    if(params.kernel_type == CvSVM::LINEAR)
-    {
-        sv_.upload(sv_temp);
-        matmul_linear(src, sv_, dst, sample_count, sv_total, var_count, alpha1, beta1);
-    }
-    if( params.kernel_type == CvSVM::SIGMOID)
-    {
-        sv_.upload(sv_temp);
-        matmul_sigmod(src, sv_, dst, sample_count, sv_total, var_count, alpha1, beta1);
-    }
-
-    if(params.kernel_type == CvSVM::POLY)
-    {
-        sv_.upload(sv_temp);
-        if(sample_count > 0)
-            matmul_poly(src, sv_, dst, sample_count, sv_total, var_count, alpha1, beta1, degree1, true);
-        else
-            matmul_poly(src, sv_, dst, sample_count, sv_total, var_count, alpha1, beta1, degree1, false);
-    }
-#endif
-
-    if(params.kernel_type == CvSVM::RBF)
-    {
-        sv_.upload(sv_temp);
-        if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-            dst = oclMat(sample_count, sv_total, CV_32FC1);
-        else
-            dst = oclMat(sample_count, sv_total, CV_64FC1);
-
-        if(sample_count > 0)
-            matmul_rbf(src, sv_, dst, sample_count, sv_total, var_count, gamma1, true);
-        else
-            matmul_rbf(src, sv_, dst, sample_count, sv_total, var_count, gamma1, false);
-    }
-    dst.download(dst1);
-
-    float result = 0;
-    for(int i = 0; i < samples->rows; i++ )
-    {
-        int r = (int)this->predict(i, dst1);
-        if (results)
-            results->data.fl[i] = (float)r;
-        if (i == 0)
-            result = (float)r;
-    }
-    return result;
-}
-
-void CvSVM_OCL::predict( cv::InputArray _samples, cv::OutputArray _results ) const
-{
-    _results.create(_samples.size().height, 1, CV_32F);
-    CvMat samples = _samples.getMat(), results = _results.getMat();
-    predict(&samples, &results);
-}
-
-bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
-{
-    int iter = 0;
-    int i, j, k;
-
-    // 1. initialize gradient and alpha status
-    for( i = 0; i < alpha_count; i++ )
-    {
-        update_alpha_status(i);
-        G[i] = b[i];
-        if( fabs(G[i]) > 1e200 )
-        {
-            return false;
-        }
-    }
-    Mat dst1;
-    double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0;
-    if(params->kernel_type == CvSVM::LINEAR)
-    {
-        alpha1 = 1;
-        beta1 = 0;
-    }
-    if(params->kernel_type == CvSVM::POLY)
-    {
-        alpha1 = params->gamma;
-        beta1 = params->coef0;
-    }
-    if(params->kernel_type == CvSVM::SIGMOID)
-    {
-        alpha1 = -2 * params->gamma;
-        beta1 = -2 * params->coef0;
-    }
-    if(params->kernel_type == CvSVM::RBF)
-    {
-        gamma1 = -params->gamma;
-    }
-    Mat src1 = Mat(sample_count, var_count, CV_32FC1);
-
-    for(int i = 0; i < sample_count; ++i)
-    {
-        for(int j = 0; j < var_count; ++j)
-        {
-            src1.at<float>(i, j) = samples[i][j];
-        }
-    }
-    oclMat src, src_e;
-    src.upload(src1);
-    oclMat dst;
-
-#ifdef HAVE_CLAMDBLAS
-
-    dst = oclMat(sample_count, sample_count, CV_32FC1);
-    oclMat src3(sample_count, sample_count, CV_32FC1, Scalar::all(1));
-    if(params->kernel_type != CvSVM::RBF)
-    {
-        ocl::transpose(src, src_e);
-        gemm(src, src_e, alpha1, src3, beta1, dst);
-    }
-
-#else
-    double degree1 = 0.0;
-    if(params->kernel_type == CvSVM::POLY)
-        degree1 = params->degree;
-
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-        dst = oclMat(sample_count, sample_count, CV_32FC1);
-    else
-        dst = oclMat(sample_count, sample_count, CV_64FC1);
-
-    if(params->kernel_type == CvSVM::LINEAR )
-    {
-        src_e = src;
-        matmul_linear(src, src_e, dst, sample_count, sample_count, var_count, alpha1, beta1);
-    }
-    if( params->kernel_type == CvSVM::SIGMOID)
-    {
-        src_e = src;
-        matmul_sigmod(src, src_e, dst, sample_count, sample_count, var_count, alpha1, beta1);
-    }
-
-    if(params->kernel_type == CvSVM::POLY)
-    {
-        src_e = src;
-        if(sample_count > 0)
-            matmul_poly(src, src_e, dst, sample_count, sample_count, var_count, alpha1, beta1, degree1, true);
-        else
-            matmul_poly(src, src_e, dst, sample_count, sample_count, var_count, alpha1, beta1, degree1, false);
-    }
-
-#endif
-
-    if(params->kernel_type == CvSVM::RBF)
-    {
-        src_e = src;
-        if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-            dst = oclMat(sample_count, sample_count, CV_32FC1);
-        else
-            dst = oclMat(sample_count, sample_count, CV_64FC1);
-
-        if(sample_count > 0)
-            matmul_rbf(src, src_e, dst, sample_count, sample_count, var_count, gamma1, true);
-        else
-            matmul_rbf(src, src_e, dst, sample_count, sample_count, var_count, gamma1, false);
-    }
-    dst.download(dst1);
-    for( i = 0; i < alpha_count; i++ )
-    {
-        if( !is_lower_bound(i) )
-        {
-            const Qfloat *Q_i = CvSVMSolver::get_row( i, buf[0]);
-            double alpha_i = alpha[i];
-
-            for( j = 0; j < alpha_count; j++ )
-                G[j] += alpha_i * Q_i[j];
-        }
-    }
-
-    // 2. optimization loop
-    for(;;)
-    {
-        const Qfloat *Q_i, *Q_j;
-        double C_i, C_j;
-        double old_alpha_i, old_alpha_j, alpha_i, alpha_j;
-        double delta_alpha_i, delta_alpha_j;
-
-#ifdef _DEBUG
-        for( i = 0; i < alpha_count; i++ )
-        {
-            if( fabs(G[i]) > 1e+300 )
-                return false;
-
-            if( fabs(alpha[i]) > 1e16 )
-                return false;
-        }
-#endif
-
-        if( (this->*select_working_set_func)( i, j ) != 0 || iter++ >= max_iter )
-        {
-            break;
-        }
-        Q_i = get_row( i, buf[0], dst1);
-        Q_j = get_row( j, buf[1], dst1);
-
-        C_i = get_C(i);
-        C_j = get_C(j);
-
-        alpha_i = old_alpha_i = alpha[i];
-        alpha_j = old_alpha_j = alpha[j];
-
-        if( y[i] != y[j] )
-        {
-            double denom = Q_i[i] + Q_j[j] + 2 * Q_i[j];
-            double delta = (-G[i] - G[j]) / MAX(fabs(denom), FLT_EPSILON);
-            double diff = alpha_i - alpha_j;
-            alpha_i += delta;
-            alpha_j += delta;
-
-            if( diff > 0 && alpha_j < 0 )
-            {
-                alpha_j = 0;
-                alpha_i = diff;
-            }
-            else if( diff <= 0 && alpha_i < 0 )
-            {
-                alpha_i = 0;
-                alpha_j = -diff;
-            }
-
-            if( diff > C_i - C_j && alpha_i > C_i )
-            {
-                alpha_i = C_i;
-                alpha_j = C_i - diff;
-            }
-            else if( diff <= C_i - C_j && alpha_j > C_j )
-            {
-                alpha_j = C_j;
-                alpha_i = C_j + diff;
-            }
-        }
-        else
-        {
-            double denom = Q_i[i] + Q_j[j] - 2 * Q_i[j];
-            double delta = (G[i] - G[j]) / MAX(fabs(denom), FLT_EPSILON);
-            double sum = alpha_i + alpha_j;
-            alpha_i -= delta;
-            alpha_j += delta;
-
-            if( sum > C_i && alpha_i > C_i )
-            {
-                alpha_i = C_i;
-                alpha_j = sum - C_i;
-            }
-            else if( sum <= C_i && alpha_j < 0)
-            {
-                alpha_j = 0;
-                alpha_i = sum;
-            }
-
-            if( sum > C_j && alpha_j > C_j )
-            {
-                alpha_j = C_j;
-                alpha_i = sum - C_j;
-            }
-            else if( sum <= C_j && alpha_i < 0 )
-            {
-                alpha_i = 0;
-                alpha_j = sum;
-            }
-        }
-        // update alpha
-        alpha[i] = alpha_i;
-        alpha[j] = alpha_j;
-        update_alpha_status(i);
-        update_alpha_status(j);
-
-        // update G
-        delta_alpha_i = alpha_i - old_alpha_i;
-        delta_alpha_j = alpha_j - old_alpha_j;
-
-        for( k = 0; k < alpha_count; k++ )
-            G[k] += Q_i[k] * delta_alpha_i + Q_j[k] * delta_alpha_j;
-    }
-
-    // calculate rho
-    (this->*calc_rho_func)( si.rho, si.r );
-
-    // calculate objective value
-    for( i = 0, si.obj = 0; i < alpha_count; i++ )
-        si.obj += alpha[i] * (G[i] + b[i]);
-
-    si.obj *= 0.5;
-
-    si.upper_bound_p = C[1];
-    si.upper_bound_n = C[0];
-
-    return true;
-}
-
-void CvSVMKernel_ocl::calc( int vcount, const int row_idx, Qfloat* results, Mat& src)
-{
-    //const Qfloat max_val = (Qfloat)(FLT_MAX*1e-3);
-    //int j;
-    (this->*calc_func_ocl)( vcount, row_idx, results, src);
-
-#if !defined(HAVE_CLAMDBLAS)
-    // nothing
-#else
-    const Qfloat max_val = (Qfloat)(FLT_MAX * 1e-3);
-    int j;
-    for( j = 0; j < vcount; j++ )
-        if( results[j] > max_val )
-            results[j] = max_val;
-#endif
-}
-
-bool CvSVMKernel_ocl::create( const CvSVMParams* _params, Calc_ocl _calc_func, Calc _calc_func1 )
-{
-    clear();
-    params = _params;
-    calc_func_ocl = _calc_func;
-    calc_func = _calc_func1;
-    if( !calc_func_ocl )
-        calc_func_ocl = params->kernel_type == CvSVM::RBF ? &CvSVMKernel_ocl::calc_rbf :
-                        params->kernel_type == CvSVM::POLY ? &CvSVMKernel_ocl::calc_poly :
-                        params->kernel_type == CvSVM::SIGMOID ? &CvSVMKernel_ocl::calc_sigmoid :
-                        &CvSVMKernel_ocl::calc_linear;
-    if( !calc_func)
-        calc_func = params->kernel_type == CvSVM::RBF ? &CvSVMKernel::calc_rbf :
-                    params->kernel_type == CvSVM::POLY ? &CvSVMKernel::calc_poly :
-                    params->kernel_type == CvSVM::SIGMOID ? &CvSVMKernel::calc_sigmoid :
-                    &CvSVMKernel::calc_linear;
-    return true;
-}
-CvSVMKernel_ocl::CvSVMKernel_ocl(const CvSVMParams* params, CvSVMKernel_ocl::Calc_ocl _calc_func, CvSVMKernel::Calc _calc_func1)
-{
-    CvSVMKernel::clear();
-    CvSVMKernel_ocl::create( params, _calc_func, _calc_func1 );
-}
-
-void CvSVMKernel_ocl::calc_non_rbf_base( int vcount, const int row_idx, Qfloat* results, Mat& src)
-{
-#ifdef HAVE_CLAMDBLAS
-
-    for(int i = 0; i < vcount; i++)
-    {
-        results[i] = (Qfloat) * src.ptr<float>(row_idx, i);
-    }
-#else
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-    {
-        for(int i = 0; i < vcount; i++)
-        {
-            results[i] = (Qfloat) * src.ptr<float>(row_idx, i);
-        }
-    }
-    else
-    {
-        for(int i = 0; i < vcount; i++)
-        {
-            results[i] = (Qfloat) * src.ptr<double>(row_idx, i);
-        }
-    }
-#endif
-}
-
-void CvSVMKernel_ocl::calc_rbf( int vcount, const int row_idx, Qfloat* results, Mat& src)
-{
-    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
-        for(int m = 0; m < vcount; m++)
-            results[m] = (Qfloat) * src.ptr<float>(row_idx, m);
-    else
-        for(int m = 0; m < vcount; m++)
-            results[m] = (Qfloat) * src.ptr<double>(row_idx, m);
-}
-
-void CvSVMKernel_ocl::calc_linear( int vcount, const int row_idx, Qfloat* results, Mat& src )
-{
-    calc_non_rbf_base( vcount, row_idx, results, src);
-}
-
-void CvSVMKernel_ocl::calc_poly( int vcount, const int row_idx, Qfloat* results, Mat& src)
-{
-    calc_non_rbf_base( vcount, row_idx, results, src);
-
-#if !defined(HAVE_CLAMDBLAS)
-    // nothing
-#else
-    CvMat R = cvMat( 1, vcount, QFLOAT_TYPE, results );
-    if( vcount > 0 )
-        cvPow( &R, &R, params->degree );
-#endif
-}
-
-
-void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* results, Mat& src)
-{
-    calc_non_rbf_base( vcount, row_idx, results, src);
-    // TODO: speedup this
-#if !defined(HAVE_CLAMDBLAS)
-    // nothing
-#else
-    for(int j = 0; j < vcount; j++ )
-    {
-        Qfloat t = results[j];
-        double e = ::exp(-fabs(t));
-        if( t > 0 )
-            results[j] = (Qfloat)((1. - e) / (1. + e));
-        else
-            results[j] = (Qfloat)((e - 1.) / (e + 1.));
-    }
-#endif
-}
-
-CvSVM_OCL::CvSVM_OCL()
-{
-    CvSVM();
-}
-
-CvSVM_OCL::CvSVM_OCL( const Mat& _train_data, const Mat& _responses,
-                      const Mat& _var_idx, const Mat& _sample_idx, CvSVMParams _params )
-{
-    decision_func = 0;
-    class_labels = 0;
-    class_weights = 0;
-    storage = 0;
-    var_idx = 0;
-    kernel = 0;
-    solver = 0;
-    default_model_name = "my_svm";
-
-    train( _train_data, _responses, _var_idx, _sample_idx, _params );
-}
-
-void CvSVM_OCL::create_kernel()
-{
-    kernel = new CvSVMKernel_ocl(&params, 0, 0);
-}
-
-void CvSVM_OCL::create_solver( )
-{
-    solver = new CvSVMSolver_ocl(&params);
-}
-
-} }
diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp
deleted file mode 100644
index 6e75ee2..0000000
--- a/modules/ocl/src/tvl1flow.cpp
+++ /dev/null
@@ -1,477 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//        Jin Ma, jin@multicorewareinc.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencl_kernels.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL()
-{
-    tau            = 0.25;
-    lambda         = 0.15;
-    theta          = 0.3;
-    nscales        = 5;
-    warps          = 5;
-    epsilon        = 0.01;
-    iterations     = 300;
-    useInitialFlow = false;
-}
-
-void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy)
-{
-    CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
-    CV_Assert( I0.size() == I1.size() );
-    CV_Assert( I0.type() == I1.type() );
-    CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
-    CV_Assert( nscales > 0 );
-
-    // allocate memory for the pyramid structure
-    I0s.resize(nscales);
-    I1s.resize(nscales);
-    u1s.resize(nscales);
-    u2s.resize(nscales);
-    //I0s_step == I1s_step
-    I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
-    I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
-
-
-    if (!useInitialFlow)
-    {
-        flowx.create(I0.size(), CV_32FC1);
-        flowy.create(I0.size(), CV_32FC1);
-    }
-    //u1s_step != u2s_step
-    u1s[0] = flowx;
-    u2s[0] = flowy;
-
-    I1x_buf.create(I0.size(), CV_32FC1);
-    I1y_buf.create(I0.size(), CV_32FC1);
-
-    I1w_buf.create(I0.size(), CV_32FC1);
-    I1wx_buf.create(I0.size(), CV_32FC1);
-    I1wy_buf.create(I0.size(), CV_32FC1);
-
-    grad_buf.create(I0.size(), CV_32FC1);
-    rho_c_buf.create(I0.size(), CV_32FC1);
-
-    p11_buf.create(I0.size(), CV_32FC1);
-    p12_buf.create(I0.size(), CV_32FC1);
-    p21_buf.create(I0.size(), CV_32FC1);
-    p22_buf.create(I0.size(), CV_32FC1);
-
-    diff_buf.create(I0.size(), CV_32FC1);
-
-    // create the scales
-    for (int s = 1; s < nscales; ++s)
-    {
-        ocl::pyrDown(I0s[s - 1], I0s[s]);
-        ocl::pyrDown(I1s[s - 1], I1s[s]);
-
-        if (I0s[s].cols < 16 || I0s[s].rows < 16)
-        {
-            nscales = s;
-            break;
-        }
-
-        if (useInitialFlow)
-        {
-            ocl::pyrDown(u1s[s - 1], u1s[s]);
-            ocl::pyrDown(u2s[s - 1], u2s[s]);
-
-            ocl::multiply(0.5, u1s[s], u1s[s]);
-            ocl::multiply(0.5, u2s[s], u2s[s]);
-        }
-    }
-
-    // pyramidal structure for computing the optical flow
-    for (int s = nscales - 1; s >= 0; --s)
-    {
-        // compute the optical flow at the current scale
-        procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]);
-
-        // if this was the last scale, finish now
-        if (s == 0)
-            break;
-
-        // otherwise, upsample the optical flow
-
-        // zoom the optical flow for the next finer scale
-        ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
-        ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
-
-        // scale the optical flow with the appropriate zoom factor
-        multiply(2, u1s[s - 1], u1s[s - 1]);
-        multiply(2, u2s[s - 1], u2s[s - 1]);
-
-    }
-
-}
-
-namespace ocl_tvl1flow
-{
-    void centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy);
-
-    void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y,
-        oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy,
-        oclMat &grad, oclMat &rho);
-
-    void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
-        oclMat &rho_c, oclMat &p11, oclMat &p12,
-        oclMat &p21, oclMat &p22, oclMat &u1,
-        oclMat &u2, oclMat &error, float l_t, float theta, char calc_error);
-
-    void estimateDualVariables(oclMat &u1, oclMat &u2,
-        oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut);
-}
-
-void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2)
-{
-    using namespace ocl_tvl1flow;
-
-    const double scaledEpsilon = epsilon * epsilon * I0.size().area();
-
-    CV_DbgAssert( I1.size() == I0.size() );
-    CV_DbgAssert( I1.type() == I0.type() );
-    CV_DbgAssert( u1.empty() || u1.size() == I0.size() );
-    CV_DbgAssert( u2.size() == u1.size() );
-
-    if (u1.empty())
-    {
-        u1.create(I0.size(), CV_32FC1);
-        u1.setTo(Scalar::all(0));
-
-        u2.create(I0.size(), CV_32FC1);
-        u2.setTo(Scalar::all(0));
-    }
-
-    oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
-    oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
-
-    centeredGradient(I1, I1x, I1y);
-
-    oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
-    oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
-    oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
-
-    oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
-    oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
-
-    oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
-    oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
-    oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
-    oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
-    p11.setTo(Scalar::all(0));
-    p12.setTo(Scalar::all(0));
-    p21.setTo(Scalar::all(0));
-    p22.setTo(Scalar::all(0));
-
-    oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
-
-    const float l_t = static_cast<float>(lambda * theta);
-    const float taut = static_cast<float>(tau / theta);
-
-    for (int warpings = 0; warpings < warps; ++warpings)
-    {
-        warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
-
-        double error = std::numeric_limits<double>::max();
-        double prev_error = 0;
-        for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
-        {
-            // some tweaks to make sum operation less frequently
-            char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
-            estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
-                      u1, u2, diff, l_t, static_cast<float>(theta), calc_error);
-            if(calc_error)
-            {
-                error = ocl::sum(diff)[0];
-                prev_error = error;
-            }
-            else
-            {
-                error = std::numeric_limits<double>::max();
-                prev_error -= scaledEpsilon;
-            }
-            estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
-
-        }
-    }
-
-
-}
-
-void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage()
-{
-    I0s.clear();
-    I1s.clear();
-    u1s.clear();
-    u2s.clear();
-
-    I1x_buf.release();
-    I1y_buf.release();
-
-    I1w_buf.release();
-    I1wx_buf.release();
-    I1wy_buf.release();
-
-    grad_buf.release();
-    rho_c_buf.release();
-
-    p11_buf.release();
-    p12_buf.release();
-    p21_buf.release();
-    p22_buf.release();
-
-    diff_buf.release();
-    norm_buf.release();
-}
-
-void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy)
-{
-    Context  *clCxt = src.clCxt;
-    size_t localThreads[3] = {32, 8, 1};
-    size_t globalThreads[3] = {src.cols, src.rows, 1};
-
-    int srcElementSize = src.elemSize();
-    int src_step = src.step/srcElementSize;
-
-    int dElememntSize = dx.elemSize();
-    int dx_step = dx.step/dElememntSize;
-
-    String kernelName = "centeredGradientKernel";
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&src.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&src.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&src.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&src_step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&dx.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&dy.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&dx_step));
-    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1);
-
-}
-
-void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut)
-{
-    Context *clCxt = u1.clCxt;
-
-    size_t localThread[] = {32, 8, 1};
-    size_t globalThread[] =
-    {
-        u1.cols,
-        u1.rows,
-        1
-    };
-
-    int u1_element_size = u1.elemSize();
-    int u1_step = u1.step/u1_element_size;
-
-    int u2_element_size = u2.elemSize();
-    int u2_step = u2.step/u2_element_size;
-
-    int p11_element_size = p11.elemSize();
-    int p11_step = p11.step/p11_element_size;
-
-    int u1_offset_y = u1.offset/u1.step;
-    int u1_offset_x = u1.offset%u1.step;
-    u1_offset_x = u1_offset_x/u1.elemSize();
-
-    int u2_offset_y = u2.offset/u2.step;
-    int u2_offset_x = u2.offset%u2.step;
-    u2_offset_x = u2_offset_x/u2.elemSize();
-
-    String kernelName = "estimateDualVariablesKernel";
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&u1.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1_step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&u2.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&p11.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&p11_step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&p12.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&p21.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&p22.data));
-    args.push_back( std::make_pair( sizeof(cl_float), (void*)&taut));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1_offset_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1_offset_y));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2_offset_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2_offset_y));
-
-    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
-}
-
-void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
-    oclMat &rho_c, oclMat &p11, oclMat &p12,
-    oclMat &p21, oclMat &p22, oclMat &u1,
-    oclMat &u2, oclMat &error, float l_t, float theta, char calc_error)
-{
-    Context* clCxt = I1wx.clCxt;
-
-    size_t localThread[] = {32, 8, 1};
-    size_t globalThread[] =
-    {
-        I1wx.cols,
-        I1wx.rows,
-        1
-    };
-
-    int I1wx_element_size = I1wx.elemSize();
-    int I1wx_step = I1wx.step/I1wx_element_size;
-
-    int u1_element_size = u1.elemSize();
-    int u1_step = u1.step/u1_element_size;
-
-    int u2_element_size = u2.elemSize();
-    int u2_step = u2.step/u2_element_size;
-
-    int u1_offset_y = u1.offset/u1.step;
-    int u1_offset_x = u1.offset%u1.step;
-    u1_offset_x = u1_offset_x/u1.elemSize();
-
-    int u2_offset_y = u2.offset/u2.step;
-    int u2_offset_x = u2.offset%u2.step;
-    u2_offset_x = u2_offset_x/u2.elemSize();
-
-    String kernelName = "estimateUKernel";
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I1wx.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&I1wx.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&I1wx.rows));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&I1wx_step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I1wy.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&grad.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&rho_c.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&p11.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&p12.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&p21.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&p22.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&u1.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1_step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&u2.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&error.data));
-    args.push_back( std::make_pair( sizeof(cl_float), (void*)&l_t));
-    args.push_back( std::make_pair( sizeof(cl_float), (void*)&theta));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1_offset_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1_offset_y));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2_offset_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2_offset_y));
-    args.push_back( std::make_pair( sizeof(cl_char), (void*)&calc_error));
-
-    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
-}
-
-void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho)
-{
-    Context* clCxt = I0.clCxt;
-
-    int u1ElementSize = u1.elemSize();
-    int u1Step = u1.step/u1ElementSize;
-
-    int u2ElementSize = u2.elemSize();
-    int u2Step = u2.step/u2ElementSize;
-
-    int I0ElementSize = I0.elemSize();
-    int I0Step = I0.step/I0ElementSize;
-
-    int I1w_element_size = I1w.elemSize();
-    int I1w_step = I1w.step/I1w_element_size;
-
-    int u1_offset_y = u1.offset/u1.step;
-    int u1_offset_x = u1.offset%u1.step;
-    u1_offset_x = u1_offset_x/u1.elemSize();
-
-    int u2_offset_y = u2.offset/u2.step;
-    int u2_offset_x = u2.offset%u2.step;
-    u2_offset_x = u2_offset_x/u2.elemSize();
-
-    size_t localThread[] = {32, 8, 1};
-    size_t globalThread[] =
-    {
-        I0.cols,
-        I0.rows,
-        1
-    };
-
-    cl_mem I1_tex;
-    cl_mem I1x_tex;
-    cl_mem I1y_tex;
-    I1_tex = bindTexture(I1);
-    I1x_tex = bindTexture(I1x);
-    I1y_tex = bindTexture(I1y);
-
-    String kernelName = "warpBackwardKernel";
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I0.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&I0Step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&I0.cols));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&I0.rows));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I1_tex));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I1x_tex));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I1y_tex));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&u1.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1Step));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&u2.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I1w.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I1wx.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&I1wy.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&grad.data));
-    args.push_back( std::make_pair( sizeof(cl_mem), (void*)&rho.data));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&I1w_step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2Step));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1_offset_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u1_offset_y));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2_offset_x));
-    args.push_back( std::make_pair( sizeof(cl_int), (void*)&u2_offset_y));
-
-    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
-
-    releaseTexture(I1_tex);
-    releaseTexture(I1x_tex);
-    releaseTexture(I1y_tex);
-}
diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp
deleted file mode 100644
index d284fcf..0000000
--- a/modules/ocl/test/main.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#define DUMP_PROPERTY_XML(propertyName, propertyValue) \
-    do { \
-        std::stringstream ssName, ssValue;\
-        ssName << propertyName;\
-        ssValue << propertyValue; \
-        ::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \
-    } while (false)
-
-#define DUMP_MESSAGE_STDOUT(msg) \
-    do { \
-        std::cout << msg << std::endl; \
-    } while (false)
-
-#include "opencv2/ocl/private/opencl_dumpinfo.hpp"
-
-int LOOP_TIMES = 1;
-
-void readLoopTimes(int argc, char ** argv)
-{
-    const char * const command_line_keys =
-            "{   test_loop_times             |1        |count of iterations per each test}"
-            "{h  help                        |false    |print help info}";
-
-    cv::CommandLineParser parser(argc, argv, command_line_keys);
-    if (parser.has("help"))
-    {
-        std::cout << "\nAvailable options besides google test option: \n";
-        parser.printMessage();
-    }
-
-    LOOP_TIMES = parser.get<int>("test_loop_times");
-    CV_Assert(LOOP_TIMES > 0);
-}
-
-CV_TEST_MAIN(".", ::dumpOpenCLDevice(),
-                  readLoopTimes(argc, argv))
diff --git a/modules/ocl/test/test_api.cpp b/modules/ocl/test/test_api.cpp
deleted file mode 100644
index 6ca4027..0000000
--- a/modules/ocl/test/test_api.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include "opencv2/core/opencl/runtime/opencl_core.hpp" // for OpenCL types & functions
-#include "opencv2/core/ocl.hpp"
-
-TEST(TestAPI, openCLExecuteKernelInterop)
-{
-    cv::RNG rng;
-    Size sz(10000, 1);
-    cv::Mat cpuMat = cvtest::randomMat(rng, sz, CV_32FC4, -10, 10, false);
-
-    cv::ocl::oclMat gpuMat(cpuMat);
-    cv::ocl::oclMat gpuMatDst(sz, CV_32FC4);
-
-    const char* kernelStr =
-"__kernel void test_kernel(__global float4* src, __global float4* dst) {\n"
-"    int x = get_global_id(0);\n"
-"    dst[x] = src[x];\n"
-"}\n";
-
-    cv::ocl::ProgramSource program("test_interop", kernelStr);
-
-    using namespace std;
-    vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMat.data ));
-    args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMatDst.data ));
-
-    size_t globalThreads[3] = { sz.width, 1, 1 };
-    cv::ocl::openCLExecuteKernelInterop(
-        gpuMat.clCxt,
-        program,
-        "test_kernel",
-        globalThreads, NULL, args,
-        -1, -1,
-        "");
-
-    cv::Mat dst;
-    gpuMatDst.download(dst);
-
-    EXPECT_LE(checkNorm(cpuMat, dst), 1e-3);
-}
-
-TEST(OCL_TestTAPI, performance)
-{
-    cv::RNG rng;
-    cv::Mat src(1280,768,CV_8UC4), dst;
-    rng.fill(src, RNG::UNIFORM, 0, 255);
-
-    cv::UMat usrc, udst;
-    src.copyTo(usrc);
-
-    cv::ocl::oclMat osrc(src);
-    cv::ocl::oclMat odst;
-
-    int cvtcode = cv::COLOR_BGR2GRAY;
-    int i, niters = 10;
-    double t;
-
-    cv::ocl::cvtColor(osrc, odst, cvtcode);
-    cv::ocl::finish();
-    t = (double)cv::getTickCount();
-    for(i = 0; i < niters; i++)
-    {
-        cv::ocl::cvtColor(osrc, odst, cvtcode);
-    }
-    cv::ocl::finish();
-    t = (double)cv::getTickCount() - t;
-    printf("ocl exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
-
-    cv::cvtColor(usrc, udst, cvtcode);
-    cv::ocl::finish2();
-    t = (double)cv::getTickCount();
-    for(i = 0; i < niters; i++)
-    {
-        cv::cvtColor(usrc, udst, cvtcode);
-    }
-    cv::ocl::finish2();
-    t = (double)cv::getTickCount() - t;
-    printf("t-api exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
-
-    cv::cvtColor(src, dst, cvtcode);
-    t = (double)cv::getTickCount();
-    for(i = 0; i < niters; i++)
-    {
-        cv::cvtColor(src, dst, cvtcode);
-    }
-    t = (double)cv::getTickCount() - t;
-    printf("cpu exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
-}
-
-// This test must be DISABLED by default!
-// (We can't restore original context for other tests)
-TEST(TestAPI, DISABLED_InitializationFromHandles)
-{
-#define MAX_PLATFORMS 16
-    cl_platform_id platforms[MAX_PLATFORMS] = { NULL };
-    cl_uint numPlatforms = 0;
-    cl_int status = ::clGetPlatformIDs(MAX_PLATFORMS, &platforms[0], &numPlatforms);
-    ASSERT_EQ(CL_SUCCESS, status) << "clGetPlatformIDs";
-    ASSERT_NE(0, (int)numPlatforms);
-
-    int selectedPlatform = 0;
-    cl_platform_id platform = platforms[selectedPlatform];
-
-    ASSERT_NE((void*)NULL, platform);
-
-    cl_device_id device = NULL;
-    status = ::clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
-    ASSERT_EQ(CL_SUCCESS, status) << "clGetDeviceIDs";
-    ASSERT_NE((void*)NULL, device);
-
-    cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platform), 0 };
-    cl_context context = ::clCreateContext(cps, 1, &device, NULL, NULL, &status);
-    ASSERT_EQ(CL_SUCCESS, status) << "clCreateContext";
-    ASSERT_NE((void*)NULL, context);
-
-    ASSERT_NO_THROW(cv::ocl::initializeContext(&platform, &context, &device));
-
-    status = ::clReleaseContext(context);
-    ASSERT_EQ(CL_SUCCESS, status) << "clReleaseContext";
-
-#ifdef CL_VERSION_1_2
-#if 1
-    {
-        cv::ocl::Context* ctx = cv::ocl::Context::getContext();
-        ASSERT_NE((void*)NULL, ctx);
-        if (ctx->supportsFeature(cv::ocl::FEATURE_CL_VER_1_2)) // device supports OpenCL 1.2+
-        {
-            status = ::clReleaseDevice(device);
-            ASSERT_EQ(CL_SUCCESS, status) << "clReleaseDevice";
-        }
-    }
-#else // code below doesn't work on Linux (SEGFAULTs on 1.1- devices are not handled via exceptions)
-    try
-    {
-        status = ::clReleaseDevice(device); // NOTE This works only with !DEVICES! that supports OpenCL 1.2
-        (void)status; // no check
-    }
-    catch (...)
-    {
-        // nothing, there is no problem
-    }
-#endif
-#endif
-
-    // print the name of current device
-    cv::ocl::Context* ctx = cv::ocl::Context::getContext();
-    ASSERT_NE((void*)NULL, ctx);
-    const cv::ocl::DeviceInfo& deviceInfo = ctx->getDeviceInfo();
-    std::cout << "Device name: " << deviceInfo.deviceName << std::endl;
-    std::cout << "Platform name: " << deviceInfo.platform->platformName << std::endl;
-
-    ASSERT_EQ(context, *(cl_context*)ctx->getOpenCLContextPtr());
-    ASSERT_EQ(device, *(cl_device_id*)ctx->getOpenCLDeviceIDPtr());
-
-    // do some calculations and check results
-    cv::RNG rng;
-    Size sz(100, 100);
-    cv::Mat srcMat = cvtest::randomMat(rng, sz, CV_32FC4, -10, 10, false);
-    cv::Mat dstMat;
-
-    cv::ocl::oclMat srcGpuMat(srcMat);
-    cv::ocl::oclMat dstGpuMat;
-
-    cv::Scalar v = cv::Scalar::all(1);
-    cv::add(srcMat, v, dstMat);
-    cv::ocl::add(srcGpuMat, v, dstGpuMat);
-
-    cv::Mat dstGpuMatMap;
-    dstGpuMat.download(dstGpuMatMap);
-
-    EXPECT_LE(checkNorm(dstMat, dstGpuMatMap), 1e-3);
-}
diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp
deleted file mode 100644
index bf37afd..0000000
--- a/modules/ocl/test/test_arithm.cpp
+++ /dev/null
@@ -1,1621 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Shengen Yan, yanshengen@gmail.com
-//    Jiang Liyuan,jlyuan001.good@163.com
-//    Rock Li, Rock.Li@amd.com
-//    Zailong Wu, bullet@yeah.net
-//    Yao Wang, bitwangyaoyao@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include <iomanip>
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-
-static bool relativeError(double actual, double expected, double eps)
-{
-    return std::abs(actual - expected) / actual < eps;
-}
-
-//////////////////////////////// LUT /////////////////////////////////////////////////
-
-PARAM_TEST_CASE(Lut, MatDepth, MatDepth, bool, bool)
-{
-    int lut_depth;
-    int cn;
-    bool use_roi, same_cn;
-
-    // src mat
-    cv::Mat src;
-    cv::Mat lut;
-    cv::Mat dst;
-
-    // src mat with roi
-    cv::Mat src_roi;
-    cv::Mat lut_roi;
-    cv::Mat dst_roi;
-
-    // ocl dst mat for testing
-    cv::ocl::oclMat gsrc_whole;
-    cv::ocl::oclMat glut_whole;
-    cv::ocl::oclMat gdst_whole;
-
-    // ocl mat with roi
-    cv::ocl::oclMat gsrc_roi;
-    cv::ocl::oclMat glut_roi;
-    cv::ocl::oclMat gdst_roi;
-
-    virtual void SetUp()
-    {
-        lut_depth = GET_PARAM(0);
-        cn = GET_PARAM(1);
-        same_cn = GET_PARAM(2);
-        use_roi = GET_PARAM(3);
-    }
-
-    void random_roi()
-    {
-        const int src_type = CV_MAKE_TYPE(CV_8U, cn);
-        const int lut_type = CV_MAKE_TYPE(lut_depth, same_cn ? cn : 1);
-        const int dst_type = CV_MAKE_TYPE(lut_depth, cn);
-
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, src_type, 0, 256);
-
-        Size lutRoiSize = Size(256, 1);
-        Border lutBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(lut, lut_roi, lutRoiSize, lutBorder, lut_type, 5, 16);
-
-        Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst, dst_roi, roiSize, dstBorder, dst_type, 5, 16);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(glut_whole, glut_roi, lut, lutRoiSize, lutBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder);
-    }
-
-    void Near(double threshold = 0.)
-    {
-        Mat whole, roi;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-};
-
-OCL_TEST_P(Lut, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::LUT(src_roi, lut_roi, dst_roi);
-        cv::ocl::LUT(gsrc_roi, glut_roi, gdst_roi);
-
-        Near();
-    }
-}
-
-///////////////////////// ArithmTestBase ///////////////////////////
-
-PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
-{
-    int depth;
-    int cn;
-    bool use_roi;
-    cv::Scalar val;
-
-    // src mat
-    cv::Mat src1;
-    cv::Mat src2;
-    cv::Mat mask;
-    cv::Mat dst1;
-    cv::Mat dst2;
-
-    // src mat with roi
-    cv::Mat src1_roi;
-    cv::Mat src2_roi;
-    cv::Mat mask_roi;
-    cv::Mat dst1_roi;
-    cv::Mat dst2_roi;
-
-    // ocl dst mat for testing
-    cv::ocl::oclMat gsrc1_whole;
-    cv::ocl::oclMat gsrc2_whole;
-    cv::ocl::oclMat gdst1_whole;
-    cv::ocl::oclMat gdst2_whole;
-    cv::ocl::oclMat gmask_whole;
-
-    // ocl mat with roi
-    cv::ocl::oclMat gsrc1_roi;
-    cv::ocl::oclMat gsrc2_roi;
-    cv::ocl::oclMat gdst1_roi;
-    cv::ocl::oclMat gdst2_roi;
-    cv::ocl::oclMat gmask_roi;
-
-    virtual void SetUp()
-    {
-        depth = GET_PARAM(0);
-        cn = GET_PARAM(1);
-        use_roi = GET_PARAM(2);
-    }
-
-    virtual void random_roi()
-    {
-        const int type = CV_MAKE_TYPE(depth, cn);
-
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src1, src1_roi, roiSize, src1Border, type, 2, 11);
-
-        Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src2, src2_roi, roiSize, src2Border, type, -1540, 1740);
-
-        Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type, 5, 16);
-
-        Border dst2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst2, dst2_roi, roiSize, dst2Border, type, 5, 16);
-
-        Border maskBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC1, 0, 2);
-        cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
-
-
-        generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, src1Border);
-        generateOclMat(gsrc2_whole, gsrc2_roi, src2, roiSize, src2Border);
-        generateOclMat(gdst1_whole, gdst1_roi, dst1, roiSize, dst1Border);
-        generateOclMat(gdst2_whole, gdst2_roi, dst2, roiSize, dst2Border);
-        generateOclMat(gmask_whole, gmask_roi, mask, roiSize, maskBorder);
-
-        val = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0),
-                         rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0));
-    }
-
-    void Near(double threshold = 0.)
-    {
-        Mat whole, roi;
-        gdst1_whole.download(whole);
-        gdst1_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst1, whole, threshold);
-        EXPECT_MAT_NEAR(dst1_roi, roi, threshold);
-    }
-
-    void Near1(double threshold = 0.)
-    {
-        Mat whole, roi;
-        gdst2_whole.download(whole);
-        gdst2_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst2, whole, threshold);
-        EXPECT_MAT_NEAR(dst2_roi, roi, threshold);
-    }
-};
-
-//////////////////////////////// Exp /////////////////////////////////////////////////
-
-typedef ArithmTestBase Exp;
-
-OCL_TEST_P(Exp, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::exp(src1_roi, dst1_roi);
-        cv::ocl::exp(gsrc1_roi, gdst1_roi);
-
-        Near(2);
-    }
-}
-
-//////////////////////////////// Log /////////////////////////////////////////////////
-
-typedef ArithmTestBase Log;
-
-OCL_TEST_P(Log, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::log(src1_roi, dst1_roi);
-        cv::ocl::log(gsrc1_roi, gdst1_roi);
-        Near(1);
-    }
-}
-
-//////////////////////////////// Sqrt ////////////////////////////////////////////////
-
-typedef ArithmTestBase Sqrt;
-
-OCL_TEST_P(Sqrt, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::sqrt(src1_roi, dst1_roi);
-        cv::ocl::sqrt(gsrc1_roi, gdst1_roi);
-        Near(1);
-    }
-}
-
-//////////////////////////////// Add /////////////////////////////////////////////////
-
-typedef ArithmTestBase Add;
-
-OCL_TEST_P(Add, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::add(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::add(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Add, Mat_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::add(src1_roi, src2_roi, dst1_roi, mask_roi);
-        cv::ocl::add(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Add, Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::add(src1_roi, val, dst1_roi);
-        cv::ocl::add(gsrc1_roi, val, gdst1_roi);
-        Near(1e-5);
-    }
-}
-
-OCL_TEST_P(Add, Scalar_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::add(src1_roi, val, dst1_roi, mask_roi);
-        cv::ocl::add(gsrc1_roi, val, gdst1_roi, gmask_roi);
-        Near(1e-5);
-    }
-}
-
-//////////////////////////////// Sub /////////////////////////////////////////////////
-
-typedef ArithmTestBase Sub;
-
-OCL_TEST_P(Sub, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::subtract(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::subtract(gsrc1_roi, gsrc2_roi, gdst1_roi);
-
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Sub, Mat_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::subtract(src1_roi, src2_roi, dst1_roi, mask_roi);
-        cv::ocl::subtract(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Sub, Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::subtract(src1_roi, val, dst1_roi);
-        cv::ocl::subtract(gsrc1_roi, val, gdst1_roi);
-
-        Near(1e-5);
-    }
-}
-
-OCL_TEST_P(Sub, Scalar_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::subtract(src1_roi, val, dst1_roi, mask_roi);
-        cv::ocl::subtract(gsrc1_roi, val, gdst1_roi, gmask_roi);
-        Near(1e-5);
-    }
-}
-
-//////////////////////////////// Mul /////////////////////////////////////////////////
-
-typedef ArithmTestBase Mul;
-
-OCL_TEST_P(Mul, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::multiply(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::multiply(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Mul, Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::multiply(Scalar::all(val[0]), src1_roi, dst1_roi);
-        cv::ocl::multiply(val[0], gsrc1_roi, gdst1_roi);
-
-        Near(gdst1_roi.depth() >= CV_32F ? 1e-3 : 1);
-    }
-}
-
-OCL_TEST_P(Mul, Mat_Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::multiply(src1_roi, src2_roi, dst1_roi, val[0]);
-        cv::ocl::multiply(gsrc1_roi, gsrc2_roi, gdst1_roi, val[0]);
-
-        Near(gdst1_roi.depth() >= CV_32F ? 1e-3 : 1);
-    }
-}
-
-//////////////////////////////// Div /////////////////////////////////////////////////
-
-typedef ArithmTestBase Div;
-
-OCL_TEST_P(Div, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::divide(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::divide(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(1);
-    }
-}
-
-OCL_TEST_P(Div, Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::divide(val[0], src1_roi, dst1_roi);
-        cv::ocl::divide(val[0], gsrc1_roi, gdst1_roi);
-
-        Near(gdst1_roi.depth() >= CV_32F ? 1e-3 : 1);
-    }
-}
-
-OCL_TEST_P(Div, Mat_Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::divide(src1_roi, src2_roi, dst1_roi, val[0]);
-        cv::ocl::divide(gsrc1_roi, gsrc2_roi, gdst1_roi, val[0]);
-
-        Near(gdst1_roi.depth() >= CV_32F ? 4e-3 : 1);
-    }
-}
-
-//////////////////////////////// Absdiff /////////////////////////////////////////////////
-
-typedef ArithmTestBase Min;
-
-OCL_TEST_P(Min, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        dst1_roi = cv::min(src1_roi, src2_roi);
-        cv::ocl::min(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-typedef ArithmTestBase Max;
-
-OCL_TEST_P(Max, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        dst1_roi = cv::min(src1_roi, src2_roi);
-        cv::ocl::min(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-//////////////////////////////// Abs /////////////////////////////////////////////////////
-
-typedef ArithmTestBase Abs;
-
-OCL_TEST_P(Abs, Abs)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        dst1_roi = cv::abs(src1_roi);
-        cv::ocl::abs(gsrc1_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-//////////////////////////////// Absdiff /////////////////////////////////////////////////
-
-typedef ArithmTestBase Absdiff;
-
-OCL_TEST_P(Absdiff, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::absdiff(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::absdiff(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Absdiff, Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::absdiff(src1_roi, val, dst1_roi);
-        cv::ocl::absdiff(gsrc1_roi, val, gdst1_roi);
-        Near(1e-5);
-    }
-}
-
-//////////////////////////////// CartToPolar /////////////////////////////////////////////////
-
-typedef ArithmTestBase CartToPolar;
-
-OCL_TEST_P(CartToPolar, angleInDegree)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::cartToPolar(src1_roi, src2_roi, dst1_roi, dst2_roi, true);
-        cv::ocl::cartToPolar(gsrc1_roi, gsrc2_roi, gdst1_roi, gdst2_roi, true);
-        Near(.5);
-        Near1(.5);
-    }
-}
-
-OCL_TEST_P(CartToPolar, angleInRadians)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::cartToPolar(src1_roi, src2_roi, dst1_roi, dst2_roi);
-        cv::ocl::cartToPolar(gsrc1_roi, gsrc2_roi, gdst1_roi, gdst2_roi);
-        Near(.5);
-        Near1(.5);
-    }
-}
-
-//////////////////////////////// PolarToCart /////////////////////////////////////////////////
-
-typedef ArithmTestBase PolarToCart;
-
-OCL_TEST_P(PolarToCart, angleInDegree)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::polarToCart(src1_roi, src2_roi, dst1_roi, dst2_roi, true);
-        cv::ocl::polarToCart(gsrc1_roi, gsrc2_roi, gdst1_roi, gdst2_roi, true);
-
-        Near(.5);
-        Near1(.5);
-    }
-}
-
-OCL_TEST_P(PolarToCart, angleInRadians)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::polarToCart(src1_roi, src2_roi, dst1_roi, dst2_roi);
-        cv::ocl::polarToCart(gsrc1_roi, gsrc2_roi, gdst1_roi, gdst2_roi);
-
-        Near(.5);
-        Near1(.5);
-    }
-}
-
-//////////////////////////////// Magnitude /////////////////////////////////////////////////
-
-typedef ArithmTestBase Magnitude;
-
-OCL_TEST_P(Magnitude, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::magnitude(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::magnitude(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(depth == CV_64F ? 1e-5 : 1e-2);
-    }
-}
-
-//////////////////////////////// Transpose /////////////////////////////////////////////////
-
-typedef ArithmTestBase Transpose;
-
-OCL_TEST_P(Transpose, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::transpose(src1_roi, dst1_roi);
-        cv::ocl::transpose(gsrc1_roi, gdst1_roi);
-
-        Near(1e-5);
-    }
-}
-
-OCL_TEST_P(Transpose, SquareInplace)
-{
-    const int type = CV_MAKE_TYPE(depth, cn);
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        Size roiSize = randomSize(1, MAX_VALUE);
-        roiSize.height = roiSize.width; // make it square
-
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src1, src1_roi, roiSize, srcBorder, type, 5, 16);
-
-        generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, srcBorder);
-
-        cv::transpose(src1_roi, src1_roi);
-        cv::ocl::transpose(gsrc1_roi, gsrc1_roi);
-
-        EXPECT_MAT_NEAR(src1, Mat(gsrc1_whole), 0.0);
-        EXPECT_MAT_NEAR(src1_roi, Mat(gsrc1_roi), 0.0);
-    }
-}
-
-//////////////////////////////// Flip /////////////////////////////////////////////////
-
-typedef ArithmTestBase Flip;
-
-OCL_TEST_P(Flip, X)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::flip(src1_roi, dst1_roi, 0);
-        cv::ocl::flip(gsrc1_roi, gdst1_roi, 0);
-        Near(1e-5);
-    }
-}
-
-OCL_TEST_P(Flip, Y)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::flip(src1_roi, dst1_roi, 1);
-        cv::ocl::flip(gsrc1_roi, gdst1_roi, 1);
-        Near(1e-5);
-    }
-}
-
-OCL_TEST_P(Flip, BOTH)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::flip(src1_roi, dst1_roi, -1);
-        cv::ocl::flip(gsrc1_roi, gdst1_roi, -1);
-        Near(1e-5);
-    }
-}
-
-//////////////////////////////// MinMax /////////////////////////////////////////////////
-
-typedef ArithmTestBase MinMax;
-
-OCL_TEST_P(MinMax, MAT)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        double minVal, maxVal;
-
-        if (src1.depth() != CV_8S)
-            cv::minMaxIdx(src1_roi, &minVal, &maxVal, NULL, NULL);
-        else
-        {
-            minVal = std::numeric_limits<double>::max();
-            maxVal = -std::numeric_limits<double>::max();
-            for (int i = 0; i < src1_roi.rows; ++i)
-                for (int j = 0; j < src1_roi.cols; ++j)
-                {
-                    signed char val = src1_roi.at<signed char>(i, j);
-                    if (val < minVal) minVal = val;
-                    if (val > maxVal) maxVal = val;
-                }
-        }
-
-        double minVal_, maxVal_;
-        cv::ocl::minMax(gsrc1_roi, &minVal_, &maxVal_);
-
-        EXPECT_DOUBLE_EQ(minVal_, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_, maxVal);
-    }
-}
-
-OCL_TEST_P(MinMax, MASK)
-{
-    enum { MAX_IDX = 0, MIN_IDX };
-    static const double minMaxGolds[2][7] =
-    {
-        { std::numeric_limits<uchar>::min(), std::numeric_limits<char>::min(), std::numeric_limits<ushort>::min(),
-          std::numeric_limits<short>::min(), std::numeric_limits<int>::min(), -std::numeric_limits<float>::max(), -std::numeric_limits<double>::max() },
-        { std::numeric_limits<uchar>::max(), std::numeric_limits<char>::max(), std::numeric_limits<ushort>::max(),
-          std::numeric_limits<short>::max(), std::numeric_limits<int>::max(), std::numeric_limits<float>::max(), std::numeric_limits<double>::max() },
-    };
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
-
-        if (src1.depth() != CV_8S)
-            cv::minMaxLoc(src1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi);
-        else
-        {
-            minVal = std::numeric_limits<double>::max();
-            maxVal = -std::numeric_limits<double>::max();
-            for (int i = 0; i < src1_roi.rows; ++i)
-                for (int j = 0; j < src1_roi.cols; ++j)
-                {
-                    signed char val = src1_roi.at<signed char>(i, j);
-                    unsigned char m = mask_roi.at<unsigned char>(i, j);
-                    if (val < minVal && m) minVal = val;
-                    if (val > maxVal && m) maxVal = val;
-                }
-        }
-
-        double minVal_, maxVal_;
-        cv::ocl::minMax(gsrc1_roi, &minVal_, &maxVal_, gmask_roi);
-
-        if (cv::countNonZero(mask_roi) == 0)
-        {
-            EXPECT_DOUBLE_EQ(minMaxGolds[MIN_IDX][depth], minVal_);
-            EXPECT_DOUBLE_EQ(minMaxGolds[MAX_IDX][depth], maxVal_);
-        }
-        else
-        {
-            EXPECT_DOUBLE_EQ(minVal, minVal_);
-            EXPECT_DOUBLE_EQ(maxVal, maxVal_);
-        }
-    }
-}
-
-//////////////////////////////// MinMaxLoc /////////////////////////////////////////////////
-
-typedef ArithmTestBase MinMaxLoc;
-
-OCL_TEST_P(MinMaxLoc, MAT)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
-        int depth = src1.depth();
-
-        if (depth != CV_8S)
-            cv::minMaxLoc(src1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
-        else
-        {
-            minVal = std::numeric_limits<double>::max();
-            maxVal = -std::numeric_limits<double>::max();
-            for (int i = 0; i < src1_roi.rows; ++i)
-                for (int j = 0; j < src1_roi.cols; ++j)
-                {
-                    signed char val = src1_roi.at<signed char>(i, j);
-                    if (val < minVal)
-                    {
-                        minVal = val;
-                        minLoc.x = j;
-                        minLoc.y = i;
-                    }
-                    if (val > maxVal)
-                    {
-                        maxVal = val;
-                        maxLoc.x = j;
-                        maxLoc.y = i;
-                    }
-                }
-        }
-
-        double minVal_, maxVal_;
-        cv::Point minLoc_, maxLoc_;
-        cv::ocl::minMaxLoc(gsrc1_roi, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat());
-
-        double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.;
-        if (depth == 0)
-        {
-            minlocVal = src1_roi.at<unsigned char>(minLoc);
-            minlocVal_ = src1_roi.at<unsigned char>(minLoc_);
-            maxlocVal = src1_roi.at<unsigned char>(maxLoc);
-            maxlocVal_ = src1_roi.at<unsigned char>(maxLoc_);
-            error0 = ::abs(src1_roi.at<unsigned char>(minLoc_) - src1_roi.at<unsigned char>(minLoc));
-            error1 = ::abs(src1_roi.at<unsigned char>(maxLoc_) - src1_roi.at<unsigned char>(maxLoc));
-        }
-        if (depth == 1)
-        {
-            minlocVal = src1_roi.at<signed char>(minLoc);
-            minlocVal_ = src1_roi.at<signed char>(minLoc_);
-            maxlocVal = src1_roi.at<signed char>(maxLoc);
-            maxlocVal_ = src1_roi.at<signed char>(maxLoc_);
-            error0 = ::abs(src1_roi.at<signed char>(minLoc_) - src1_roi.at<signed char>(minLoc));
-            error1 = ::abs(src1_roi.at<signed char>(maxLoc_) - src1_roi.at<signed char>(maxLoc));
-        }
-        if (depth == 2)
-        {
-            minlocVal = src1_roi.at<unsigned short>(minLoc);
-            minlocVal_ = src1_roi.at<unsigned short>(minLoc_);
-            maxlocVal = src1_roi.at<unsigned short>(maxLoc);
-            maxlocVal_ = src1_roi.at<unsigned short>(maxLoc_);
-            error0 = ::abs(src1_roi.at<unsigned short>(minLoc_) - src1_roi.at<unsigned short>(minLoc));
-            error1 = ::abs(src1_roi.at<unsigned short>(maxLoc_) - src1_roi.at<unsigned short>(maxLoc));
-        }
-        if (depth == 3)
-        {
-            minlocVal = src1_roi.at<signed short>(minLoc);
-            minlocVal_ = src1_roi.at<signed short>(minLoc_);
-            maxlocVal = src1_roi.at<signed short>(maxLoc);
-            maxlocVal_ = src1_roi.at<signed short>(maxLoc_);
-            error0 = ::abs(src1_roi.at<signed short>(minLoc_) - src1_roi.at<signed short>(minLoc));
-            error1 = ::abs(src1_roi.at<signed short>(maxLoc_) - src1_roi.at<signed short>(maxLoc));
-        }
-        if (depth == 4)
-        {
-            minlocVal = src1_roi.at<int>(minLoc);
-            minlocVal_ = src1_roi.at<int>(minLoc_);
-            maxlocVal = src1_roi.at<int>(maxLoc);
-            maxlocVal_ = src1_roi.at<int>(maxLoc_);
-            error0 = ::abs(src1_roi.at<int>(minLoc_) - src1_roi.at<int>(minLoc));
-            error1 = ::abs(src1_roi.at<int>(maxLoc_) - src1_roi.at<int>(maxLoc));
-        }
-        if (depth == 5)
-        {
-            minlocVal = src1_roi.at<float>(minLoc);
-            minlocVal_ = src1_roi.at<float>(minLoc_);
-            maxlocVal = src1_roi.at<float>(maxLoc);
-            maxlocVal_ = src1_roi.at<float>(maxLoc_);
-            error0 = ::abs(src1_roi.at<float>(minLoc_) - src1_roi.at<float>(minLoc));
-            error1 = ::abs(src1_roi.at<float>(maxLoc_) - src1_roi.at<float>(maxLoc));
-        }
-        if (depth == 6)
-        {
-            minlocVal = src1_roi.at<double>(minLoc);
-            minlocVal_ = src1_roi.at<double>(minLoc_);
-            maxlocVal = src1_roi.at<double>(maxLoc);
-            maxlocVal_ = src1_roi.at<double>(maxLoc_);
-            error0 = ::abs(src1_roi.at<double>(minLoc_) - src1_roi.at<double>(minLoc));
-            error1 = ::abs(src1_roi.at<double>(maxLoc_) - src1_roi.at<double>(maxLoc));
-        }
-
-        EXPECT_DOUBLE_EQ(minVal_, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_, maxVal);
-        EXPECT_DOUBLE_EQ(minlocVal_, minlocVal);
-        EXPECT_DOUBLE_EQ(maxlocVal_, maxlocVal);
-
-        EXPECT_DOUBLE_EQ(error0, 0.0);
-        EXPECT_DOUBLE_EQ(error1, 0.0);
-    }
-}
-
-OCL_TEST_P(MinMaxLoc, MASK)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
-        int depth = src1.depth();
-        if (depth != CV_8S)
-            cv::minMaxLoc(src1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi);
-        else
-        {
-            minVal = std::numeric_limits<double>::max();
-            maxVal = -std::numeric_limits<double>::max();
-            for (int i = 0; i < src1_roi.rows; ++i)
-                for (int j = 0; j < src1_roi.cols; ++j)
-                {
-                    signed char val = src1_roi.at<signed char>(i, j);
-                    unsigned char m = mask_roi.at<unsigned char>(i , j);
-                    if (val < minVal && m)
-                    {
-                        minVal = val;
-                        minLoc.x = j;
-                        minLoc.y = i;
-                    }
-                    if (val > maxVal && m)
-                    {
-                        maxVal = val;
-                        maxLoc.x = j;
-                        maxLoc.y = i;
-                    }
-                }
-        }
-
-        double minVal_, maxVal_;
-        cv::Point minLoc_, maxLoc_;
-        cv::ocl::minMaxLoc(gsrc1_roi, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask_roi);
-
-        double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.;
-        if (minLoc_.x == -1 || minLoc_.y == -1 || maxLoc_.x == -1 || maxLoc_.y == -1) continue;
-        if (depth == 0)
-        {
-            minlocVal = src1_roi.at<unsigned char>(minLoc);
-            minlocVal_ = src1_roi.at<unsigned char>(minLoc_);
-            maxlocVal = src1_roi.at<unsigned char>(maxLoc);
-            maxlocVal_ = src1_roi.at<unsigned char>(maxLoc_);
-            error0 = ::abs(src1_roi.at<unsigned char>(minLoc_) - src1_roi.at<unsigned char>(minLoc));
-            error1 = ::abs(src1_roi.at<unsigned char>(maxLoc_) - src1_roi.at<unsigned char>(maxLoc));
-        }
-        if (depth == 1)
-        {
-            minlocVal = src1_roi.at<signed char>(minLoc);
-            minlocVal_ = src1_roi.at<signed char>(minLoc_);
-            maxlocVal = src1_roi.at<signed char>(maxLoc);
-            maxlocVal_ = src1_roi.at<signed char>(maxLoc_);
-            error0 = ::abs(src1_roi.at<signed char>(minLoc_) - src1_roi.at<signed char>(minLoc));
-            error1 = ::abs(src1_roi.at<signed char>(maxLoc_) - src1_roi.at<signed char>(maxLoc));
-        }
-        if (depth == 2)
-        {
-            minlocVal = src1_roi.at<unsigned short>(minLoc);
-            minlocVal_ = src1_roi.at<unsigned short>(minLoc_);
-            maxlocVal = src1_roi.at<unsigned short>(maxLoc);
-            maxlocVal_ = src1_roi.at<unsigned short>(maxLoc_);
-            error0 = ::abs(src1_roi.at<unsigned short>(minLoc_) - src1_roi.at<unsigned short>(minLoc));
-            error1 = ::abs(src1_roi.at<unsigned short>(maxLoc_) - src1_roi.at<unsigned short>(maxLoc));
-        }
-        if (depth == 3)
-        {
-            minlocVal = src1_roi.at<signed short>(minLoc);
-            minlocVal_ = src1_roi.at<signed short>(minLoc_);
-            maxlocVal = src1_roi.at<signed short>(maxLoc);
-            maxlocVal_ = src1_roi.at<signed short>(maxLoc_);
-            error0 = ::abs(src1_roi.at<signed short>(minLoc_) - src1_roi.at<signed short>(minLoc));
-            error1 = ::abs(src1_roi.at<signed short>(maxLoc_) - src1_roi.at<signed short>(maxLoc));
-        }
-        if (depth == 4)
-        {
-            minlocVal = src1_roi.at<int>(minLoc);
-            minlocVal_ = src1_roi.at<int>(minLoc_);
-            maxlocVal = src1_roi.at<int>(maxLoc);
-            maxlocVal_ = src1_roi.at<int>(maxLoc_);
-            error0 = ::abs(src1_roi.at<int>(minLoc_) - src1_roi.at<int>(minLoc));
-            error1 = ::abs(src1_roi.at<int>(maxLoc_) - src1_roi.at<int>(maxLoc));
-        }
-        if (depth == 5)
-        {
-            minlocVal = src1_roi.at<float>(minLoc);
-            minlocVal_ = src1_roi.at<float>(minLoc_);
-            maxlocVal = src1_roi.at<float>(maxLoc);
-            maxlocVal_ = src1_roi.at<float>(maxLoc_);
-            error0 = ::abs(src1_roi.at<float>(minLoc_) - src1_roi.at<float>(minLoc));
-            error1 = ::abs(src1_roi.at<float>(maxLoc_) - src1_roi.at<float>(maxLoc));
-        }
-        if (depth == 6)
-        {
-            minlocVal = src1_roi.at<double>(minLoc);
-            minlocVal_ = src1_roi.at<double>(minLoc_);
-            maxlocVal = src1_roi.at<double>(maxLoc);
-            maxlocVal_ = src1_roi.at<double>(maxLoc_);
-            error0 = ::abs(src1_roi.at<double>(minLoc_) - src1_roi.at<double>(minLoc));
-            error1 = ::abs(src1_roi.at<double>(maxLoc_) - src1_roi.at<double>(maxLoc));
-        }
-
-        EXPECT_DOUBLE_EQ(minVal_, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_, maxVal);
-        EXPECT_DOUBLE_EQ(minlocVal_, minlocVal);
-        EXPECT_DOUBLE_EQ(maxlocVal_, maxlocVal);
-
-        EXPECT_DOUBLE_EQ(error0, 0.0);
-        EXPECT_DOUBLE_EQ(error1, 0.0);
-    }
-}
-
-//////////////////////////////// Sum /////////////////////////////////////////////////
-
-typedef ArithmTestBase Sum;
-
-OCL_TEST_P(Sum, MAT)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Scalar cpures = cv::sum(src1_roi);
-        Scalar gpures = cv::ocl::sum(gsrc1_roi);
-
-        // check results
-        EXPECT_NEAR(cpures[0], gpures[0], 0.1);
-        EXPECT_NEAR(cpures[1], gpures[1], 0.1);
-        EXPECT_NEAR(cpures[2], gpures[2], 0.1);
-        EXPECT_NEAR(cpures[3], gpures[3], 0.1);
-    }
-}
-
-typedef ArithmTestBase SqrSum;
-
-template <typename T, typename WT>
-static Scalar sqrSum(const Mat & src)
-{
-    Scalar sum = Scalar::all(0);
-    int cn = src.channels();
-    WT data[4] = { 0, 0, 0, 0 };
-
-    int cols = src.cols * cn;
-    for (int y = 0; y < src.rows; ++y)
-    {
-        const T * const sdata = src.ptr<T>(y);
-        for (int x = 0; x < cols; )
-            for (int i = 0; i < cn; ++i, ++x)
-            {
-                WT t = static_cast<WT>(sdata[x]);
-                data[i] += t * t;
-            }
-    }
-
-    for (int i = 0; i < cn; ++i)
-        sum[i] = static_cast<double>(data[i]);
-
-    return sum;
-}
-
-typedef Scalar (*sumFunc)(const Mat &);
-
-OCL_TEST_P(SqrSum, MAT)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        static sumFunc funcs[] = { sqrSum<uchar, int>,
-                                 sqrSum<char, int>,
-                                 sqrSum<ushort, int>,
-                                 sqrSum<short, int>,
-                                 sqrSum<int, int>,
-                                 sqrSum<float, double>,
-                                 sqrSum<double, double>,
-                                 0 };
-
-        sumFunc func = funcs[src1_roi.depth()];
-        CV_Assert(func != 0);
-
-        Scalar cpures = func(src1_roi);
-        Scalar gpures = cv::ocl::sqrSum(gsrc1_roi);
-
-        // check results
-        EXPECT_NEAR(cpures[0], gpures[0], 1.0);
-        EXPECT_NEAR(cpures[1], gpures[1], 1.0);
-        EXPECT_NEAR(cpures[2], gpures[2], 1.0);
-        EXPECT_NEAR(cpures[3], gpures[3], 1.0);
-    }
-}
-
-typedef ArithmTestBase AbsSum;
-
-template <typename T, typename WT>
-static Scalar absSum(const Mat & src)
-{
-    Scalar sum = Scalar::all(0);
-    int cn = src.channels();
-    WT data[4] = { 0, 0, 0, 0 };
-
-    int cols = src.cols * cn;
-    for (int y = 0; y < src.rows; ++y)
-    {
-        const T * const sdata = src.ptr<T>(y);
-        for (int x = 0; x < cols; )
-            for (int i = 0; i < cn; ++i, ++x)
-            {
-                WT t = static_cast<WT>(sdata[x]);
-                data[i] += t >= 0 ? t : -t;
-            }
-    }
-
-    for (int i = 0; i < cn; ++i)
-        sum[i] = static_cast<double>(data[i]);
-
-    return sum;
-}
-
-OCL_TEST_P(AbsSum, MAT)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        static sumFunc funcs[] = { absSum<uchar, int>,
-                                 absSum<char, int>,
-                                 absSum<ushort, int>,
-                                 absSum<short, int>,
-                                 absSum<int, int>,
-                                 absSum<float, double>,
-                                 absSum<double, double>,
-                                 0 };
-
-        sumFunc func = funcs[src1_roi.depth()];
-        CV_Assert(func != 0);
-
-        Scalar cpures = func(src1_roi);
-        Scalar gpures = cv::ocl::absSum(gsrc1_roi);
-
-        // check results
-        EXPECT_NEAR(cpures[0], gpures[0], 0.1);
-        EXPECT_NEAR(cpures[1], gpures[1], 0.1);
-        EXPECT_NEAR(cpures[2], gpures[2], 0.1);
-        EXPECT_NEAR(cpures[3], gpures[3], 0.1);
-    }
-}
-
-//////////////////////////////// CountNonZero /////////////////////////////////////////////////
-
-typedef ArithmTestBase CountNonZero;
-
-OCL_TEST_P(CountNonZero, MAT)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-        int cpures = cv::countNonZero(src1_roi);
-        int gpures = cv::ocl::countNonZero(gsrc1_roi);
-
-        EXPECT_DOUBLE_EQ((double)cpures, (double)gpures);
-    }
-}
-
-//////////////////////////////// Phase /////////////////////////////////////////////////
-
-typedef ArithmTestBase Phase;
-
-OCL_TEST_P(Phase, angleInDegrees)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-        cv::phase(src1_roi, src2_roi, dst1_roi, true);
-        cv::ocl::phase(gsrc1_roi, gsrc2_roi, gdst1_roi, true);
-
-        Near(1e-2);
-    }
-}
-
-OCL_TEST_P(Phase, angleInRadians)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-        cv::phase(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::phase(gsrc1_roi, gsrc2_roi, gdst1_roi);
-
-        Near(1e-2);
-    }
-}
-
-//////////////////////////////// Bitwise_and /////////////////////////////////////////////////
-
-typedef ArithmTestBase Bitwise_and;
-
-OCL_TEST_P(Bitwise_and, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_and(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::bitwise_and(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Bitwise_and, Mat_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_and(src1_roi, src2_roi, dst1_roi, mask_roi);
-        cv::ocl::bitwise_and(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Bitwise_and, Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_and(src1_roi, val, dst1_roi);
-        cv::ocl::bitwise_and(gsrc1_roi, val, gdst1_roi);
-        Near(1e-5);
-    }
-}
-
-OCL_TEST_P(Bitwise_and, Scalar_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_and(src1_roi, val, dst1_roi, mask_roi);
-        cv::ocl::bitwise_and(gsrc1_roi, val, gdst1_roi, gmask_roi);
-        Near(1e-5);
-    }
-}
-
-//////////////////////////////// Bitwise_or /////////////////////////////////////////////////
-
-typedef ArithmTestBase Bitwise_or;
-
-OCL_TEST_P(Bitwise_or, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_or(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::bitwise_or(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Bitwise_or, Mat_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_or(src1_roi, src2_roi, dst1_roi, mask_roi);
-        cv::ocl::bitwise_or(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Bitwise_or, Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_or(src1_roi, val, dst1_roi);
-        cv::ocl::bitwise_or(gsrc1_roi, val, gdst1_roi);
-        Near(1e-5);
-    }
-}
-
-OCL_TEST_P(Bitwise_or, Scalar_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_or(src1_roi, val, dst1_roi, mask_roi);
-        cv::ocl::bitwise_or(gsrc1_roi, val, gdst1_roi, gmask_roi);
-        Near(1e-5);
-    }
-}
-
-//////////////////////////////// Bitwise_xor /////////////////////////////////////////////////
-
-typedef ArithmTestBase Bitwise_xor;
-
-OCL_TEST_P(Bitwise_xor, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_xor(src1_roi, src2_roi, dst1_roi);
-        cv::ocl::bitwise_xor(gsrc1_roi, gsrc2_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Bitwise_xor, Mat_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_xor(src1_roi, src2_roi, dst1_roi, mask_roi);
-        cv::ocl::bitwise_xor(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi);
-        Near(0);
-    }
-}
-
-OCL_TEST_P(Bitwise_xor, Scalar)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_xor(src1_roi, val, dst1_roi);
-        cv::ocl::bitwise_xor(gsrc1_roi, val, gdst1_roi);
-        Near(1e-5);
-    }
-}
-
-OCL_TEST_P(Bitwise_xor, Scalar_Mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_xor(src1_roi, val, dst1_roi, mask_roi);
-        cv::ocl::bitwise_xor(gsrc1_roi, val, gdst1_roi, gmask_roi);
-        Near(1e-5);
-    }
-}
-
-//////////////////////////////// Bitwise_not /////////////////////////////////////////////////
-
-typedef ArithmTestBase Bitwise_not;
-
-OCL_TEST_P(Bitwise_not, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::bitwise_not(src1_roi, dst1_roi);
-        cv::ocl::bitwise_not(gsrc1_roi, gdst1_roi);
-        Near(0);
-    }
-}
-
-//////////////////////////////// Compare /////////////////////////////////////////////////
-
-typedef ArithmTestBase Compare;
-
-OCL_TEST_P(Compare, Mat)
-{
-    int cmp_codes[] = { CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE };
-    int cmp_num = sizeof(cmp_codes) / sizeof(int);
-
-    for (int i = 0; i < cmp_num; ++i)
-        for (int j = 0; j < LOOP_TIMES; j++)
-        {
-            random_roi();
-
-            cv::compare(src1_roi, src2_roi, dst1_roi, cmp_codes[i]);
-            cv::ocl::compare(gsrc1_roi, gsrc2_roi, gdst1_roi, cmp_codes[i]);
-
-            Near(0);
-        }
-}
-
-//////////////////////////////// Pow /////////////////////////////////////////////////
-
-typedef ArithmTestBase Pow;
-
-OCL_TEST_P(Pow, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-        double p = 4.5;
-        cv::pow(src1_roi, p, dst1_roi);
-        cv::ocl::pow(gsrc1_roi, p, gdst1_roi);
-        Near(1);
-    }
-}
-
-//////////////////////////////// AddWeighted /////////////////////////////////////////////////
-
-typedef ArithmTestBase AddWeighted;
-
-OCL_TEST_P(AddWeighted, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        const double alpha = 2.0, beta = 1.0, gama = 3.0;
-
-        cv::addWeighted(src1_roi, alpha, src2_roi, beta, gama, dst1_roi);
-        cv::ocl::addWeighted(gsrc1_roi, alpha, gsrc2_roi, beta, gama, gdst1_roi);
-
-        Near(3e-4);
-    }
-}
-
-//////////////////////////////// setIdentity /////////////////////////////////////////////////
-
-typedef ArithmTestBase SetIdentity;
-
-OCL_TEST_P(SetIdentity, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::setIdentity(dst1_roi, val);
-        cv::ocl::setIdentity(gdst1_roi, val);
-
-        Near(0);
-    }
-}
-
-//////////////////////////////// meanStdDev /////////////////////////////////////////////////
-
-typedef ArithmTestBase MeanStdDev;
-
-OCL_TEST_P(MeanStdDev, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Scalar cpu_mean, cpu_stddev;
-        Scalar gpu_mean, gpu_stddev;
-
-        cv::meanStdDev(src1_roi, cpu_mean, cpu_stddev);
-        cv::ocl::meanStdDev(gsrc1_roi, gpu_mean, gpu_stddev);
-
-        for (int i = 0; i < 4; ++i)
-        {
-            EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1);
-            EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1);
-        }
-    }
-}
-
-//////////////////////////////// Norm /////////////////////////////////////////////////
-
-typedef ArithmTestBase Norm;
-
-OCL_TEST_P(Norm, NORM_INF)
-{
-    for (int relative = 0; relative < 2; ++relative)
-        for (int j = 0; j < LOOP_TIMES; j++)
-        {
-            random_roi();
-
-            int type = NORM_INF;
-            if (relative == 1)
-                type |= NORM_RELATIVE;
-
-            const double cpuRes = cv::norm(src1_roi, src2_roi, type);
-            const double gpuRes = cv::ocl::norm(gsrc1_roi, gsrc2_roi, type);
-
-            EXPECT_NEAR(cpuRes, gpuRes, 0.1);
-        }
-}
-
-OCL_TEST_P(Norm, NORM_L1)
-{
-    for (int relative = 0; relative < 2; ++relative)
-        for (int j = 0; j < LOOP_TIMES; j++)
-        {
-            random_roi();
-
-            int type = NORM_L1;
-            if (relative == 1)
-                type |= NORM_RELATIVE;
-
-            const double cpuRes = cv::norm(src1_roi, src2_roi, type);
-            const double gpuRes = cv::ocl::norm(gsrc1_roi, gsrc2_roi, type);
-
-            EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
-        }
-}
-
-OCL_TEST_P(Norm, NORM_L2)
-{
-    for (int relative = 0; relative < 2; ++relative)
-        for (int j = 0; j < LOOP_TIMES; j++)
-        {
-            random_roi();
-
-            int type = NORM_L2;
-            if (relative == 1)
-                type |= NORM_RELATIVE;
-
-            const double cpuRes = cv::norm(src1_roi, src2_roi, type);
-            const double gpuRes = cv::ocl::norm(gsrc1_roi, gsrc2_roi, type);
-
-            EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
-        }
-}
-
-//// Repeat
-
-struct RepeatTestCase :
-        public ArithmTestBase
-{
-    int nx, ny;
-
-    virtual void random_roi()
-    {
-        const int type = CV_MAKE_TYPE(depth, cn);
-
-        nx = randomInt(1, 4);
-        ny = randomInt(1, 4);
-
-        Size srcRoiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src1, src1_roi, srcRoiSize, srcBorder, type, 2, 11);
-
-        Size dstRoiSize(srcRoiSize.width * nx, srcRoiSize.height * ny);
-        Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst1, dst1_roi, dstRoiSize, dst1Border, type, 5, 16);
-
-        generateOclMat(gsrc1_whole, gsrc1_roi, src1, srcRoiSize, srcBorder);
-        generateOclMat(gdst1_whole, gdst1_roi, dst1, dstRoiSize, dst1Border);
-    }
-};
-
-typedef RepeatTestCase Repeat;
-
-OCL_TEST_P(Repeat, Mat)
-{
-    for (int i = 0; i < LOOP_TIMES; ++i)
-    {
-        random_roi();
-
-        cv::repeat(src1_roi, ny, nx, dst1_roi);
-        cv::ocl::repeat(gsrc1_roi, ny, nx, gdst1_roi);
-
-        Near();
-    }
-}
-
-//////////////////////////////////////// Instantiation /////////////////////////////////////////
-
-INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool(), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Min, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Max, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Abs, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(Channels(1)), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(Channels(1)), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, SqrSum, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, AbsSum, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(Channels(1)), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(Channels(1)), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-INSTANTIATE_TEST_CASE_P(Arithm, Repeat, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp
deleted file mode 100644
index 8b4c865..0000000
--- a/modules/ocl/test/test_bgfg.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma, jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-
-#if defined(HAVE_XINE)         || \
-    defined(HAVE_GSTREAMER)    || \
-    defined(HAVE_QUICKTIME)    || \
-    defined(HAVE_AVFOUNDATION) || \
-    defined(HAVE_FFMPEG)       || \
-    defined(WIN32)
-
-#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
-#else
-#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
-#endif
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-//////////////////////////////////////////////////////
-// MOG
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(UseGray, bool)
-    IMPLEMENT_PARAM_CLASS(LearningRate, double)
-}
-
-PARAM_TEST_CASE(mog, UseGray, LearningRate, bool)
-{
-    bool useGray;
-    double learningRate;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        useGray = GET_PARAM(0);
-        learningRate = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-    }
-};
-
-OCL_TEST_P(mog, Update)
-{
-    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    cv::ocl::MOG mog;
-    cv::ocl::oclMat foreground = createMat_ocl(rng, frame.size(), CV_8UC1, useRoi);
-
-    Ptr<cv::BackgroundSubtractorMOG> mog_gold = createBackgroundSubtractorMOG();
-    cv::Mat foreground_gold;
-
-    for (int i = 0; i < 10; ++i)
-    {
-        cap >> frame;
-        ASSERT_FALSE(frame.empty());
-
-        if (useGray)
-        {
-            cv::Mat temp;
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-            cv::swap(temp, frame);
-        }
-
-        mog(loadMat_ocl(rng, frame, useRoi), foreground, (float)learningRate);
-
-        mog_gold->apply(frame, foreground_gold, learningRate);
-
-        EXPECT_MAT_NEAR(foreground_gold, foreground, 0.0);
-    }
-}
-INSTANTIATE_TEST_CASE_P(OCL_Video, mog, testing::Combine(
-    testing::Values(UseGray(false), UseGray(true)),
-    testing::Values(LearningRate(0.0), LearningRate(0.01)),
-    Values(true, false)));
-
-//////////////////////////////////////////////////////
-// MOG2
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
-}
-
-PARAM_TEST_CASE(mog2, UseGray, DetectShadow, bool)
-{
-    bool useGray;
-    bool detectShadow;
-    bool useRoi;
-    virtual void SetUp()
-    {
-        useGray = GET_PARAM(0);
-        detectShadow = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-    }
-};
-
-OCL_TEST_P(mog2, Update)
-{
-    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    cv::ocl::MOG2 mog2;
-    mog2.bShadowDetection = detectShadow;
-    cv::ocl::oclMat foreground = createMat_ocl(rng, frame.size(), CV_8UC1, useRoi);
-
-    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = createBackgroundSubtractorMOG2();
-    mog2_gold->setDetectShadows(detectShadow);
-    cv::Mat foreground_gold;
-
-    for (int i = 0; i < 10; ++i)
-    {
-        cap >> frame;
-        ASSERT_FALSE(frame.empty());
-
-        if (useGray)
-        {
-            cv::Mat temp;
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-            cv::swap(temp, frame);
-        }
-
-        mog2(loadMat_ocl(rng, frame, useRoi), foreground);
-
-        mog2_gold->apply(frame, foreground_gold);
-
-        if (detectShadow)
-            EXPECT_MAT_SIMILAR(foreground_gold, foreground, 15e-3)
-        else
-            EXPECT_MAT_NEAR(foreground_gold, foreground, 0)
-    }
-}
-
-OCL_TEST_P(mog2, getBackgroundImage)
-{
-    if (useGray)
-        return;
-
-    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-
-    cv::ocl::MOG2 mog2;
-    mog2.bShadowDetection = detectShadow;
-    cv::ocl::oclMat foreground;
-
-    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = createBackgroundSubtractorMOG2();
-    mog2_gold->setDetectShadows(detectShadow);
-    cv::Mat foreground_gold;
-
-    for (int i = 0; i < 10; ++i)
-    {
-        cap >> frame;
-        ASSERT_FALSE(frame.empty());
-
-        mog2(loadMat_ocl(rng, frame, useRoi), foreground);
-
-        mog2_gold->apply(frame, foreground_gold);
-    }
-
-    cv::ocl::oclMat background = createMat_ocl(rng, frame.size(), frame.type(), useRoi);
-    mog2.getBackgroundImage(background);
-
-    cv::Mat background_gold;
-    mog2_gold->getBackgroundImage(background_gold);
-
-    EXPECT_MAT_NEAR(background_gold, background, 1.0);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_Video, mog2, testing::Combine(
-    testing::Values(UseGray(true), UseGray(false)),
-    testing::Values(DetectShadow(true), DetectShadow(false)),
-    Values(true, false)));
-
-#endif
-
-#endif
diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp
deleted file mode 100644
index 1576891..0000000
--- a/modules/ocl/test/test_blend.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Nathan, liujun@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "test_precomp.hpp"
-#include <iomanip>
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace testing;
-using namespace std;
-
-template <typename T>
-static void blendLinearGold(const Mat &img1, const Mat &img2,
-                            const Mat &weights1, const Mat &weights2,
-                            Mat &result_gold)
-{
-    CV_Assert(img1.size() == img2.size() && img1.type() == img2.type());
-    CV_Assert(weights1.size() == weights2.size() && weights1.size() == img1.size() &&
-              weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);
-
-    result_gold.create(img1.size(), img1.type());
-
-    int cn = img1.channels();
-    int step1 = img1.cols * img1.channels();
-
-    for (int y = 0; y < img1.rows; ++y)
-    {
-        const float * const weights1_row = weights1.ptr<float>(y);
-        const float * const weights2_row = weights2.ptr<float>(y);
-        const T * const img1_row = img1.ptr<T>(y);
-        const T * const img2_row = img2.ptr<T>(y);
-        T * const result_gold_row = result_gold.ptr<T>(y);
-
-        for (int x = 0; x < step1; ++x)
-        {
-            int x1 = x / cn;
-            float w1 = weights1_row[x1], w2 = weights2_row[x1];
-            result_gold_row[x] = saturate_cast<T>(((float)img1_row[x] * w1
-                                                 + (float)img2_row[x] * w2) / (w1 + w2 + 1e-5f));
-        }
-    }
-}
-
-PARAM_TEST_CASE(Blend, MatDepth, int, bool)
-{
-    int depth, channels;
-    bool useRoi;
-
-    Mat src1, src2, weights1, weights2, dst;
-    Mat src1_roi, src2_roi, weights1_roi, weights2_roi, dst_roi;
-    oclMat gsrc1, gsrc2, gweights1, gweights2, gdst, gst;
-    oclMat gsrc1_roi, gsrc2_roi, gweights1_roi, gweights2_roi, gdst_roi;
-
-    virtual void SetUp()
-    {
-        depth = GET_PARAM(0);
-        channels = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-    }
-
-    void random_roi()
-    {
-        const int type = CV_MAKE_TYPE(depth, channels);
-
-        const double upValue = 256;
-        const double sumMinValue = 0.01; // we don't want to divide by "zero"
-
-        Size roiSize = randomSize(1, 20);
-        Border src1Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src1, src1_roi, roiSize, src1Border, type, -upValue, upValue);
-
-        Border src2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src2, src2_roi, roiSize, src2Border, type, -upValue, upValue);
-
-        Border weights1Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(weights1, weights1_roi, roiSize, weights1Border, CV_32FC1, -upValue, upValue);
-
-        Border weights2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(weights2, weights2_roi, roiSize, weights2Border, CV_32FC1, sumMinValue, upValue); // fill it as a (w1 + w12)
-
-        weights2_roi = weights2_roi - weights1_roi;
-        // check that weights2_roi is still a part of weights2 (not a new matrix)
-        CV_Assert(checkNorm(weights2_roi,
-            weights2(Rect(weights2Border.lef, weights2Border.top, roiSize.width, roiSize.height))) < 1e-6);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 16);
-
-        generateOclMat(gsrc1, gsrc1_roi, src1, roiSize, src1Border);
-        generateOclMat(gsrc2, gsrc2_roi, src2, roiSize, src2Border);
-        generateOclMat(gweights1, gweights1_roi, weights1, roiSize, weights1Border);
-        generateOclMat(gweights2, gweights2_roi, weights2, roiSize, weights2Border);
-        generateOclMat(gdst, gdst_roi, dst, roiSize, dstBorder);
-    }
-
-    void Near(double eps = 0.0)
-    {
-        Mat whole, roi;
-        gdst.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst, whole, eps);
-        EXPECT_MAT_NEAR(dst_roi, roi, eps);
-    }
-};
-
-typedef void (*blendLinearFunc)(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold);
-
-OCL_TEST_P(Blend, Accuracy)
-{
-    for (int i = 0; i < LOOP_TIMES; ++i)
-    {
-        random_roi();
-
-        cv::ocl::blendLinear(gsrc1_roi, gsrc2_roi, gweights1_roi, gweights2_roi, gdst_roi);
-
-        static blendLinearFunc funcs[] = {
-            blendLinearGold<uchar>,
-            blendLinearGold<schar>,
-            blendLinearGold<ushort>,
-            blendLinearGold<short>,
-            blendLinearGold<int>,
-            blendLinearGold<float>,
-        };
-
-        blendLinearFunc func = funcs[depth];
-        func(src1_roi, src2_roi, weights1_roi, weights2_roi, dst_roi);
-
-        Near(depth <= CV_32S ? 1.0 : 0.2);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend,
-                        Combine(testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F),
-                                testing::Range(1, 5), Bool()));
diff --git a/modules/ocl/test/test_brief.cpp b/modules/ocl/test/test_brief.cpp
deleted file mode 100644
index 8ca2c90..0000000
--- a/modules/ocl/test/test_brief.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Matthias Bady aegirxx ==> gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace std;
-using namespace cv;
-using namespace ocl;
-
-#ifdef HAVE_OPENCL
-
-namespace
-{
-IMPLEMENT_PARAM_CLASS( BRIEF_Bytes, int )
-}
-
-PARAM_TEST_CASE( BRIEF, BRIEF_Bytes )
-{
-    int bytes;
-
-    virtual void SetUp( )
-    {
-        bytes = GET_PARAM( 0 );
-    }
-};
-
-OCL_TEST_P( BRIEF, Accuracy )
-{
-    Mat img = readImage( "gpu/opticalflow/rubberwhale1.png", IMREAD_GRAYSCALE );
-    ASSERT_TRUE( !img.empty( ) ) << "no input image";
-
-    FastFeatureDetector fast( 20 );
-    std::vector<KeyPoint> keypoints;
-    fast.detect( img, keypoints, Mat( ) );
-
-    Mat descriptorsGold;
-    BriefDescriptorExtractor brief( bytes );
-    brief.compute( img, keypoints, descriptorsGold );
-
-    Mat kpMat( 2, int( keypoints.size() ), CV_32FC1 );
-    for ( int i = 0, size = (int)keypoints.size( ); i < size; ++i )
-    {
-        kpMat.col( i ).row( 0 ) = int( keypoints[i].pt.x );
-        kpMat.col( i ).row( 1 ) = int( keypoints[i].pt.y );
-    }
-    oclMat imgOcl( img ), keypointsOcl( kpMat ), descriptorsOcl, maskOcl;
-
-    BRIEF_OCL briefOcl( bytes );
-    briefOcl.compute( imgOcl, keypointsOcl, maskOcl, descriptorsOcl );
-    Mat mask, descriptors;
-    maskOcl.download( mask );
-    descriptorsOcl.download( descriptors );
-
-    const int numDesc = cv::countNonZero( mask );
-    if ( numDesc != descriptors.cols )
-    {
-        int idx = 0;
-        Mat tmp( numDesc, bytes, CV_8UC1 );
-        for ( int i = 0; i < descriptors.rows; ++i )
-        {
-            if ( mask.at<uchar>(i) )
-            {
-                descriptors.row( i ).copyTo( tmp.row( idx++ ) );
-            }
-        }
-        descriptors = tmp;
-    }
-    ASSERT_TRUE( descriptors.size( ) == descriptorsGold.size( ) ) << "Different number of descriptors";
-    ASSERT_TRUE( 0 == norm( descriptors, descriptorsGold, NORM_HAMMING ) ) << "Descriptors different";
-}
-
-INSTANTIATE_TEST_CASE_P( OCL_Features2D, BRIEF, testing::Values( 16, 32, 64 ) );
-#endif
diff --git a/modules/ocl/test/test_brute_force_matcher.cpp b/modules/ocl/test/test_brute_force_matcher.cpp
deleted file mode 100644
index 04ca9e2..0000000
--- a/modules/ocl/test/test_brute_force_matcher.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Nathan, liujun@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#ifdef HAVE_OPENCL
-namespace
-{
-    /////////////////////////////////////////////////////////////////////////////////////////////////
-    // BruteForceMatcher
-    CV_ENUM(DistType, BruteForceMatcher_OCL_base::L1Dist,
-                      BruteForceMatcher_OCL_base::L2Dist,
-                      BruteForceMatcher_OCL_base::HammingDist)
-    IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
-    PARAM_TEST_CASE(BruteForceMatcher, DistType, DescriptorSize)
-    {
-        cv::ocl::BruteForceMatcher_OCL_base::DistType distType;
-        int normCode;
-        int dim;
-
-        int queryDescCount;
-        int countFactor;
-
-        cv::Mat query, train;
-
-        virtual void SetUp()
-        {
-            distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0);
-            dim = GET_PARAM(1);
-
-            queryDescCount = 300; // must be even number because we split train data in some cases in two
-            countFactor = 4; // do not change it
-
-            cv::Mat queryBuf, trainBuf;
-
-            // Generate query descriptors randomly.
-            // Descriptor vector elements are integer values.
-            queryBuf.create(queryDescCount, dim, CV_32SC1);
-            rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
-            queryBuf.convertTo(queryBuf, CV_32FC1);
-
-            // Generate train decriptors as follows:
-            // copy each query descriptor to train set countFactor times
-            // and perturb some one element of the copied descriptors in
-            // in ascending order. General boundaries of the perturbation
-            // are (0.f, 1.f).
-            trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
-            float step = 1.f / countFactor;
-            for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
-            {
-                cv::Mat queryDescriptor = queryBuf.row(qIdx);
-                for (int c = 0; c < countFactor; c++)
-                {
-                    int tIdx = qIdx * countFactor + c;
-                    cv::Mat trainDescriptor = trainBuf.row(tIdx);
-                    queryDescriptor.copyTo(trainDescriptor);
-                    int elem = rng(dim);
-                    float diff = rng.uniform(step * c, step * (c + 1));
-                    trainDescriptor.at<float>(0, elem) += diff;
-                }
-            }
-
-            queryBuf.convertTo(query, CV_32F);
-            trainBuf.convertTo(train, CV_32F);
-        }
-    };
-
-#ifdef ANDROID
-    OCL_TEST_P(BruteForceMatcher, DISABLED_Match_Single)
-#else
-    OCL_TEST_P(BruteForceMatcher, Match_Single)
-#endif
-    {
-        cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
-
-        std::vector<cv::DMatch> matches;
-        matcher.match(cv::ocl::oclMat(query),  cv::ocl::oclMat(train),  matches);
-
-        ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
-
-        int badCount = 0;
-        for (size_t i = 0; i < matches.size(); i++)
-        {
-            cv::DMatch match = matches[i];
-            if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
-                badCount++;
-        }
-
-        ASSERT_EQ(0, badCount);
-    }
-
-#ifdef ANDROID
-    OCL_TEST_P(BruteForceMatcher, DISABLED_KnnMatch_2_Single)
-#else
-    OCL_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
-#endif
-    {
-        const int knn = 2;
-
-        cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
-
-        std::vector< std::vector<cv::DMatch> > matches;
-        matcher.knnMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, knn);
-
-        ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
-
-        int badCount = 0;
-        for (size_t i = 0; i < matches.size(); i++)
-        {
-            if ((int)matches[i].size() != knn)
-                badCount++;
-            else
-            {
-                int localBadCount = 0;
-                for (int k = 0; k < knn; k++)
-                {
-                    cv::DMatch match = matches[i][k];
-                    if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
-                        localBadCount++;
-                }
-                badCount += localBadCount > 0 ? 1 : 0;
-            }
-        }
-
-        ASSERT_EQ(0, badCount);
-    }
-
-#ifdef ANDROID
-    OCL_TEST_P(BruteForceMatcher, DISABLED_RadiusMatch_Single)
-#else
-    OCL_TEST_P(BruteForceMatcher, RadiusMatch_Single)
-#endif
-    {
-        float radius = 1.f / countFactor;
-
-        cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
-
-        std::vector< std::vector<cv::DMatch> > matches;
-        matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius);
-
-        ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
-
-        int badCount = 0;
-        for (size_t i = 0; i < matches.size(); i++)
-        {
-            if ((int)matches[i].size() != 1)
-            {
-                badCount++;
-            }
-            else
-            {
-                cv::DMatch match = matches[i][0];
-                if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
-                    badCount++;
-            }
-        }
-
-        ASSERT_EQ(0, badCount);
-    }
-
-    INSTANTIATE_TEST_CASE_P(OCL_Features2D, BruteForceMatcher,
-        testing::Combine(
-        testing::Values(
-            DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist),
-            DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)/*,
-            DistType(cv::ocl::BruteForceMatcher_OCL_base::HammingDist)*/
-        ),
-        testing::Values(
-            DescriptorSize(57),
-            DescriptorSize(64),
-            DescriptorSize(83),
-            DescriptorSize(128),
-            DescriptorSize(179),
-            DescriptorSize(256),
-            DescriptorSize(304))
-        )
-    );
-} // namespace
-#endif
diff --git a/modules/ocl/test/test_calib3d.cpp b/modules/ocl/test/test_calib3d.cpp
deleted file mode 100644
index 9fd0b23..0000000
--- a/modules/ocl/test/test_calib3d.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//     Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include <iomanip>
-
-using namespace cv;
-
-#ifdef HAVE_OPENCL
-
-PARAM_TEST_CASE(StereoMatchBM, int, int)
-{
-    int n_disp;
-    int winSize;
-
-    virtual void SetUp()
-    {
-        n_disp  = GET_PARAM(0);
-        winSize = GET_PARAM(1);
-    }
-};
-
-OCL_TEST_P(StereoMatchBM, Regression)
-{
-
-    Mat left_image  = readImage("gpu/stereobm/aloe-L.png", IMREAD_GRAYSCALE);
-    Mat right_image = readImage("gpu/stereobm/aloe-R.png", IMREAD_GRAYSCALE);
-    Mat disp_gold   = readImage("gpu/stereobm/aloe-disp.png", IMREAD_GRAYSCALE);
-    ocl::oclMat d_left, d_right;
-    ocl::oclMat d_disp(left_image.size(), CV_8U);
-    Mat  disp;
-
-    ASSERT_FALSE(left_image.empty());
-    ASSERT_FALSE(right_image.empty());
-    ASSERT_FALSE(disp_gold.empty());
-    d_left.upload(left_image);
-    d_right.upload(right_image);
-
-    ocl::StereoBM_OCL bm(0, n_disp, winSize);
-
-
-    bm(d_left, d_right, d_disp);
-    d_disp.download(disp);
-
-    EXPECT_MAT_SIMILAR(disp_gold, disp, 1e-3);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchBM, testing::Combine(testing::Values(128),
-                                       testing::Values(19)));
-
-PARAM_TEST_CASE(StereoMatchBP, int, int, int, float, float, float, float)
-{
-    int ndisp_;
-    int iters_;
-    int levels_;
-    float max_data_term_;
-    float data_weight_;
-    float max_disc_term_;
-    float disc_single_jump_;
-    virtual void SetUp()
-    {
-        ndisp_          = GET_PARAM(0);
-        iters_          = GET_PARAM(1);
-        levels_         = GET_PARAM(2);
-        max_data_term_  = GET_PARAM(3);
-        data_weight_    = GET_PARAM(4);
-        max_disc_term_     = GET_PARAM(5);
-        disc_single_jump_  = GET_PARAM(6);
-    }
-};
-OCL_TEST_P(StereoMatchBP, Regression)
-{
-    Mat left_image  = readImage("gpu/stereobp/aloe-L.png");
-    Mat right_image = readImage("gpu/stereobp/aloe-R.png");
-    Mat disp_gold   = readImage("gpu/stereobp/aloe-disp.png", IMREAD_GRAYSCALE);
-    ocl::oclMat d_left, d_right;
-    ocl::oclMat d_disp;
-    Mat  disp;
-    ASSERT_FALSE(left_image.empty());
-    ASSERT_FALSE(right_image.empty());
-    ASSERT_FALSE(disp_gold.empty());
-    d_left.upload(left_image);
-    d_right.upload(right_image);
-    ocl::StereoBeliefPropagation bp(ndisp_, iters_, levels_, max_data_term_, data_weight_,
-        max_disc_term_, disc_single_jump_, CV_16S);
-    bp(d_left, d_right, d_disp);
-    d_disp.download(disp);
-    disp.convertTo(disp, disp_gold.depth());
-    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
-}
-INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchBP, testing::Combine(testing::Values(64),
-    testing::Values(8),testing::Values(2),testing::Values(25.0f),
-    testing::Values(0.1f),testing::Values(15.0f),testing::Values(1.0f)));
-
-//////////////////////////////////////////////////////////////////////////
-//  ConstSpaceBeliefPropagation
-PARAM_TEST_CASE(StereoMatchConstSpaceBP, int, int, int, int, float, float, float, float, int, int)
-{
-    int ndisp_;
-    int iters_;
-    int levels_;
-    int nr_plane_;
-    float max_data_term_;
-    float data_weight_;
-    float max_disc_term_;
-    float disc_single_jump_;
-    int min_disp_th_;
-    int msg_type_;
-
-    virtual void SetUp()
-    {
-        ndisp_          = GET_PARAM(0);
-        iters_          = GET_PARAM(1);
-        levels_         = GET_PARAM(2);
-        nr_plane_ = GET_PARAM(3);
-        max_data_term_  = GET_PARAM(4);
-        data_weight_    = GET_PARAM(5);
-        max_disc_term_     = GET_PARAM(6);
-        disc_single_jump_  = GET_PARAM(7);
-        min_disp_th_ = GET_PARAM(8);
-        msg_type_  = GET_PARAM(9);
-    }
-};
-OCL_TEST_P(StereoMatchConstSpaceBP, Regression)
-{
-    Mat left_image  = readImage("gpu/csstereobp/aloe-L.png");
-    Mat right_image = readImage("gpu/csstereobp/aloe-R.png");
-    Mat disp_gold   = readImage("gpu/csstereobp/aloe-disp.png", IMREAD_GRAYSCALE);
-
-    ocl::oclMat d_left, d_right;
-    ocl::oclMat d_disp;
-
-    Mat  disp;
-    ASSERT_FALSE(left_image.empty());
-    ASSERT_FALSE(right_image.empty());
-    ASSERT_FALSE(disp_gold.empty());
-
-    d_left.upload(left_image);
-    d_right.upload(right_image);
-
-    ocl::StereoConstantSpaceBP bp(ndisp_, iters_, levels_, nr_plane_, max_data_term_, data_weight_,
-        max_disc_term_, disc_single_jump_, 0, CV_32F);
-    bp(d_left, d_right, d_disp);
-    d_disp.download(disp);
-    disp.convertTo(disp, disp_gold.depth());
-
-    EXPECT_MAT_SIMILAR(disp_gold, disp, 1e-4);
-    //EXPECT_MAT_NEAR(disp_gold, disp, 1.0, "");
-}
-INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchConstSpaceBP, testing::Combine(testing::Values(128),
-    testing::Values(16),testing::Values(4), testing::Values(4), testing::Values(30.0f),
-    testing::Values(1.0f),testing::Values(160.0f),
-    testing::Values(10.0f), testing::Values(0), testing::Values(CV_32F)));
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_canny.cpp b/modules/ocl/test/test_canny.cpp
deleted file mode 100644
index 6bd7f26..0000000
--- a/modules/ocl/test/test_canny.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#ifdef HAVE_OPENCL
-
-////////////////////////////////////////////////////////
-// Canny
-IMPLEMENT_PARAM_CLASS(AppertureSize, int)
-IMPLEMENT_PARAM_CLASS(L2gradient, bool)
-
-PARAM_TEST_CASE(Canny, AppertureSize, L2gradient)
-{
-    int apperture_size;
-    bool useL2gradient;
-
-    cv::Mat edges_gold;
-    virtual void SetUp()
-    {
-        apperture_size = GET_PARAM(0);
-        useL2gradient = GET_PARAM(1);
-    }
-};
-
-OCL_TEST_P(Canny, Accuracy)
-{
-    cv::Mat img = readImage("cv/shared/fruits.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    double low_thresh = 50.0;
-    double high_thresh = 100.0;
-
-    cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
-
-    cv::ocl::oclMat edges;
-    cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
-
-    cv::Mat edges_gold;
-    cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
-
-    EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
-                            testing::Values(AppertureSize(3), AppertureSize(5)),
-                            testing::Values(L2gradient(false), L2gradient(true))));
-#endif
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
deleted file mode 100644
index c4641d4..0000000
--- a/modules/ocl/test/test_color.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace cv;
-
-#ifdef HAVE_OPENCL
-
-using namespace testing;
-using namespace cv;
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// cvtColor
-
-PARAM_TEST_CASE(CvtColor, MatDepth, bool)
-{
-    int depth;
-    bool use_roi;
-
-    // src mat
-    Mat src;
-    Mat dst;
-
-    // src mat with roi
-    Mat src_roi;
-    Mat dst_roi;
-
-    // ocl dst mat for testing
-    ocl::oclMat gsrc_whole;
-    ocl::oclMat gdst_whole;
-
-    // ocl mat with roi
-    ocl::oclMat gsrc_roi;
-    ocl::oclMat gdst_roi;
-
-    virtual void SetUp()
-    {
-        depth = GET_PARAM(0);
-        use_roi = GET_PARAM(1);
-    }
-
-    virtual void random_roi(int channelsIn, int channelsOut)
-    {
-        const int srcType = CV_MAKE_TYPE(depth, channelsIn);
-        const int dstType = CV_MAKE_TYPE(depth, channelsOut);
-
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, srcType, 2, 100);
-
-        Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder);
-    }
-
-    void Near(double threshold)
-    {
-        Mat whole, roi;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-
-    void doTest(int channelsIn, int channelsOut, int code, double threshold = 1e-3)
-    {
-        for (int j = 0; j < LOOP_TIMES; j++)
-        {
-            random_roi(channelsIn, channelsOut);
-
-            cvtColor(src_roi, dst_roi, code, channelsOut);
-            ocl::cvtColor(gsrc_roi, gdst_roi, code, channelsOut);
-
-            Near(threshold);
-        }
-    }
-};
-
-#define CVTCODE(name) COLOR_ ## name
-
-// RGB[A] <-> BGR[A]
-
-OCL_TEST_P(CvtColor, BGR2BGRA) { doTest(3, 4, CVTCODE(BGR2BGRA)); }
-OCL_TEST_P(CvtColor, RGB2RGBA) { doTest(3, 4, CVTCODE(RGB2RGBA)); }
-OCL_TEST_P(CvtColor, BGRA2BGR) { doTest(4, 3, CVTCODE(BGRA2BGR)); }
-OCL_TEST_P(CvtColor, RGBA2RGB) { doTest(4, 3, CVTCODE(RGBA2RGB)); }
-OCL_TEST_P(CvtColor, BGR2RGBA) { doTest(3, 4, CVTCODE(BGR2RGBA)); }
-OCL_TEST_P(CvtColor, RGB2BGRA) { doTest(3, 4, CVTCODE(RGB2BGRA)); }
-OCL_TEST_P(CvtColor, RGBA2BGR) { doTest(4, 3, CVTCODE(RGBA2BGR)); }
-OCL_TEST_P(CvtColor, BGRA2RGB) { doTest(4, 3, CVTCODE(BGRA2RGB)); }
-OCL_TEST_P(CvtColor, BGR2RGB) { doTest(3, 3, CVTCODE(BGR2RGB)); }
-OCL_TEST_P(CvtColor, RGB2BGR) { doTest(3, 3, CVTCODE(RGB2BGR)); }
-OCL_TEST_P(CvtColor, BGRA2RGBA) { doTest(4, 4, CVTCODE(BGRA2RGBA)); }
-OCL_TEST_P(CvtColor, RGBA2BGRA) { doTest(4, 4, CVTCODE(RGBA2BGRA)); }
-
-// RGB <-> Gray
-
-OCL_TEST_P(CvtColor, RGB2GRAY) { doTest(3, 1, CVTCODE(RGB2GRAY)); }
-OCL_TEST_P(CvtColor, GRAY2RGB) { doTest(1, 3, CVTCODE(GRAY2RGB)); }
-OCL_TEST_P(CvtColor, BGR2GRAY) { doTest(3, 1, CVTCODE(BGR2GRAY)); }
-OCL_TEST_P(CvtColor, GRAY2BGR) { doTest(1, 3, CVTCODE(GRAY2BGR)); }
-OCL_TEST_P(CvtColor, RGBA2GRAY) { doTest(4, 1, CVTCODE(RGBA2GRAY)); }
-OCL_TEST_P(CvtColor, GRAY2RGBA) { doTest(1, 4, CVTCODE(GRAY2RGBA)); }
-OCL_TEST_P(CvtColor, BGRA2GRAY) { doTest(4, 1, CVTCODE(BGRA2GRAY)); }
-OCL_TEST_P(CvtColor, GRAY2BGRA) { doTest(1, 4, CVTCODE(GRAY2BGRA)); }
-
-// RGB <-> YUV
-
-OCL_TEST_P(CvtColor, RGB2YUV) { doTest(3, 3, CVTCODE(RGB2YUV)); }
-OCL_TEST_P(CvtColor, BGR2YUV) { doTest(3, 3, CVTCODE(BGR2YUV)); }
-OCL_TEST_P(CvtColor, RGBA2YUV) { doTest(4, 3, CVTCODE(RGB2YUV)); }
-OCL_TEST_P(CvtColor, BGRA2YUV) { doTest(4, 3, CVTCODE(BGR2YUV)); }
-OCL_TEST_P(CvtColor, YUV2RGB) { doTest(3, 3, CVTCODE(YUV2RGB)); }
-OCL_TEST_P(CvtColor, YUV2BGR) { doTest(3, 3, CVTCODE(YUV2BGR)); }
-OCL_TEST_P(CvtColor, YUV2RGBA) { doTest(3, 4, CVTCODE(YUV2RGB)); }
-OCL_TEST_P(CvtColor, YUV2BGRA) { doTest(3, 4, CVTCODE(YUV2BGR)); }
-
-// RGB <-> YCrCb
-
-OCL_TEST_P(CvtColor, RGB2YCrCb) { doTest(3, 3, CVTCODE(RGB2YCrCb)); }
-OCL_TEST_P(CvtColor, BGR2YCrCb) { doTest(3, 3, CVTCODE(BGR2YCrCb)); }
-OCL_TEST_P(CvtColor, RGBA2YCrCb) { doTest(4, 3, CVTCODE(RGB2YCrCb)); }
-OCL_TEST_P(CvtColor, BGRA2YCrCb) { doTest(4, 3, CVTCODE(BGR2YCrCb)); }
-OCL_TEST_P(CvtColor, YCrCb2RGB) { doTest(3, 3, CVTCODE(YCrCb2RGB)); }
-OCL_TEST_P(CvtColor, YCrCb2BGR) { doTest(3, 3, CVTCODE(YCrCb2BGR)); }
-OCL_TEST_P(CvtColor, YCrCb2RGBA) { doTest(3, 4, CVTCODE(YCrCb2RGB)); }
-OCL_TEST_P(CvtColor, YCrCb2BGRA) { doTest(3, 4, CVTCODE(YCrCb2BGR)); }
-
-// RGB <-> XYZ
-
-OCL_TEST_P(CvtColor, RGB2XYZ) { doTest(3, 3, CVTCODE(RGB2XYZ)); }
-OCL_TEST_P(CvtColor, BGR2XYZ) { doTest(3, 3, CVTCODE(BGR2XYZ)); }
-OCL_TEST_P(CvtColor, RGBA2XYZ) { doTest(4, 3, CVTCODE(RGB2XYZ)); }
-OCL_TEST_P(CvtColor, BGRA2XYZ) { doTest(4, 3, CVTCODE(BGR2XYZ)); }
-
-OCL_TEST_P(CvtColor, XYZ2RGB) { doTest(3, 3, CVTCODE(XYZ2RGB)); }
-OCL_TEST_P(CvtColor, XYZ2BGR) { doTest(3, 3, CVTCODE(XYZ2BGR)); }
-OCL_TEST_P(CvtColor, XYZ2RGBA) { doTest(3, 4, CVTCODE(XYZ2RGB)); }
-OCL_TEST_P(CvtColor, XYZ2BGRA) { doTest(3, 4, CVTCODE(XYZ2BGR)); }
-
-// RGB <-> HSV
-
-typedef CvtColor CvtColor8u32f;
-
-OCL_TEST_P(CvtColor8u32f, RGB2HSV) { doTest(3, 3, CVTCODE(RGB2HSV)); }
-OCL_TEST_P(CvtColor8u32f, BGR2HSV) { doTest(3, 3, CVTCODE(BGR2HSV)); }
-OCL_TEST_P(CvtColor8u32f, RGBA2HSV) { doTest(4, 3, CVTCODE(RGB2HSV)); }
-OCL_TEST_P(CvtColor8u32f, BGRA2HSV) { doTest(4, 3, CVTCODE(BGR2HSV)); }
-
-OCL_TEST_P(CvtColor8u32f, RGB2HSV_FULL) { doTest(3, 3, CVTCODE(RGB2HSV_FULL)); }
-OCL_TEST_P(CvtColor8u32f, BGR2HSV_FULL) { doTest(3, 3, CVTCODE(BGR2HSV_FULL)); }
-OCL_TEST_P(CvtColor8u32f, RGBA2HSV_FULL) { doTest(4, 3, CVTCODE(RGB2HSV_FULL)); }
-OCL_TEST_P(CvtColor8u32f, BGRA2HSV_FULL) { doTest(4, 3, CVTCODE(BGR2HSV_FULL)); }
-
-OCL_TEST_P(CvtColor8u32f, HSV2RGB) { doTest(3, 3, CVTCODE(HSV2RGB), depth == CV_8U ? 1 : 4e-1); }
-OCL_TEST_P(CvtColor8u32f, HSV2BGR) { doTest(3, 3, CVTCODE(HSV2BGR), depth == CV_8U ? 1 : 4e-1); }
-OCL_TEST_P(CvtColor8u32f, HSV2RGBA) { doTest(3, 4, CVTCODE(HSV2RGB), depth == CV_8U ? 1 : 4e-1); }
-OCL_TEST_P(CvtColor8u32f, HSV2BGRA) { doTest(3, 4, CVTCODE(HSV2BGR), depth == CV_8U ? 1 : 4e-1); }
-
-OCL_TEST_P(CvtColor8u32f, HSV2RGB_FULL) { doTest(3, 3, CVTCODE(HSV2RGB_FULL), depth == CV_8U ? 1 : 4e-1); }
-OCL_TEST_P(CvtColor8u32f, HSV2BGR_FULL) { doTest(3, 3, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
-OCL_TEST_P(CvtColor8u32f, HSV2RGBA_FULL) { doTest(3, 4, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
-OCL_TEST_P(CvtColor8u32f, HSV2BGRA_FULL) { doTest(3, 4, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
-
-// RGB <-> HLS
-
-OCL_TEST_P(CvtColor8u32f, RGB2HLS) { doTest(3, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, BGR2HLS) { doTest(3, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, RGBA2HLS) { doTest(4, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, BGRA2HLS) { doTest(4, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); }
-
-OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { doTest(3, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { doTest(3, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { doTest(4, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { doTest(4, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
-
-OCL_TEST_P(CvtColor8u32f, HLS2RGB) { doTest(3, 3, CVTCODE(HLS2RGB), 1); }
-OCL_TEST_P(CvtColor8u32f, HLS2BGR) { doTest(3, 3, CVTCODE(HLS2BGR), 1); }
-OCL_TEST_P(CvtColor8u32f, HLS2RGBA) { doTest(3, 4, CVTCODE(HLS2RGB), 1); }
-OCL_TEST_P(CvtColor8u32f, HLS2BGRA) { doTest(3, 4, CVTCODE(HLS2BGR), 1); }
-
-OCL_TEST_P(CvtColor8u32f, HLS2RGB_FULL) { doTest(3, 3, CVTCODE(HLS2RGB_FULL), 1); }
-OCL_TEST_P(CvtColor8u32f, HLS2BGR_FULL) { doTest(3, 3, CVTCODE(HLS2BGR_FULL), 1); }
-OCL_TEST_P(CvtColor8u32f, HLS2RGBA_FULL) { doTest(3, 4, CVTCODE(HLS2RGB_FULL), 1); }
-OCL_TEST_P(CvtColor8u32f, HLS2BGRA_FULL) { doTest(3, 4, CVTCODE(HLS2BGR_FULL), 1); }
-
-// RGB5x5 <-> RGB
-
-typedef CvtColor CvtColor8u;
-
-OCL_TEST_P(CvtColor8u, BGR5652BGR) { doTest(2, 3, CVTCODE(BGR5652BGR)); }
-OCL_TEST_P(CvtColor8u, BGR5652RGB) { doTest(2, 3, CVTCODE(BGR5652RGB)); }
-OCL_TEST_P(CvtColor8u, BGR5652BGRA) { doTest(2, 4, CVTCODE(BGR5652BGRA)); }
-OCL_TEST_P(CvtColor8u, BGR5652RGBA) { doTest(2, 4, CVTCODE(BGR5652RGBA)); }
-
-OCL_TEST_P(CvtColor8u, BGR5552BGR) { doTest(2, 3, CVTCODE(BGR5552BGR)); }
-OCL_TEST_P(CvtColor8u, BGR5552RGB) { doTest(2, 3, CVTCODE(BGR5552RGB)); }
-OCL_TEST_P(CvtColor8u, BGR5552BGRA) { doTest(2, 4, CVTCODE(BGR5552BGRA)); }
-OCL_TEST_P(CvtColor8u, BGR5552RGBA) { doTest(2, 4, CVTCODE(BGR5552RGBA)); }
-
-OCL_TEST_P(CvtColor8u, BGR2BGR565) { doTest(3, 2, CVTCODE(BGR2BGR565)); }
-OCL_TEST_P(CvtColor8u, RGB2BGR565) { doTest(3, 2, CVTCODE(RGB2BGR565)); }
-OCL_TEST_P(CvtColor8u, BGRA2BGR565) { doTest(4, 2, CVTCODE(BGRA2BGR565)); }
-OCL_TEST_P(CvtColor8u, RGBA2BGR565) { doTest(4, 2, CVTCODE(RGBA2BGR565)); }
-
-OCL_TEST_P(CvtColor8u, BGR2BGR555) { doTest(3, 2, CVTCODE(BGR2BGR555)); }
-OCL_TEST_P(CvtColor8u, RGB2BGR555) { doTest(3, 2, CVTCODE(RGB2BGR555)); }
-OCL_TEST_P(CvtColor8u, BGRA2BGR555) { doTest(4, 2, CVTCODE(BGRA2BGR555)); }
-OCL_TEST_P(CvtColor8u, RGBA2BGR555) { doTest(4, 2, CVTCODE(RGBA2BGR555)); }
-
-// RGB5x5 <-> Gray
-
-OCL_TEST_P(CvtColor8u, BGR5652GRAY) { doTest(2, 1, CVTCODE(BGR5652GRAY)); }
-OCL_TEST_P(CvtColor8u, BGR5552GRAY) { doTest(2, 1, CVTCODE(BGR5552GRAY)); }
-
-OCL_TEST_P(CvtColor8u, GRAY2BGR565) { doTest(1, 2, CVTCODE(GRAY2BGR565)); }
-OCL_TEST_P(CvtColor8u, GRAY2BGR555) { doTest(1, 2, CVTCODE(GRAY2BGR555)); }
-
-// RGBA <-> mRGBA
-
-OCL_TEST_P(CvtColor8u, RGBA2mRGBA) { doTest(4, 4, CVTCODE(RGBA2mRGBA)); }
-OCL_TEST_P(CvtColor8u, mRGBA2RGBA) { doTest(4, 4, CVTCODE(mRGBA2RGBA)); }
-
-// YUV -> RGBA_NV12
-
-struct CvtColor_YUV420 :
-        public CvtColor
-{
-    void random_roi(int channelsIn, int channelsOut)
-    {
-        const int srcType = CV_MAKE_TYPE(depth, channelsIn);
-        const int dstType = CV_MAKE_TYPE(depth, channelsOut);
-
-        Size roiSize = randomSize(1, MAX_VALUE);
-        roiSize.width *= 2;
-        roiSize.height *= 3;
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, srcType, 2, 100);
-
-        Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder);
-    }
-};
-
-OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { doTest(1, 4, COLOR_YUV2RGBA_NV12); }
-OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { doTest(1, 4, COLOR_YUV2BGRA_NV12); }
-OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { doTest(1, 3, COLOR_YUV2RGB_NV12); }
-OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { doTest(1, 3, COLOR_YUV2BGR_NV12); }
-
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u,
-                            testing::Combine(testing::Values(MatDepth(CV_8U)), Bool()));
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u32f,
-                            testing::Combine(testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), Bool()));
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor,
-                            testing::Combine(
-                                testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
-                                Bool()));
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor_YUV420,
-                            testing::Combine(
-                                testing::Values(MatDepth(CV_8U)),
-                                Bool()));
-
-#endif
diff --git a/modules/ocl/test/test_fast.cpp b/modules/ocl/test/test_fast.cpp
deleted file mode 100644
index 19ff68e..0000000
--- a/modules/ocl/test/test_fast.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Peter Andreas Entschev, peter@entschev.com
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-////////////////////////////////////////////////////////
-// FAST
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(FAST_Threshold, int)
-    IMPLEMENT_PARAM_CLASS(FAST_NonmaxSupression, bool)
-}
-
-PARAM_TEST_CASE(FAST, FAST_Threshold, FAST_NonmaxSupression)
-{
-    int threshold;
-    bool nonmaxSupression;
-
-    virtual void SetUp()
-    {
-        threshold = GET_PARAM(0);
-        nonmaxSupression = GET_PARAM(1);
-    }
-};
-
-OCL_TEST_P(FAST, Accuracy)
-{
-    cv::Mat image = readImage("gpu/perf/aloe.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image.empty());
-
-    cv::ocl::FAST_OCL fast(threshold);
-    fast.nonmaxSupression = nonmaxSupression;
-
-    cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);
-
-    std::vector<cv::KeyPoint> keypoints;
-    fast(ocl_image, cv::ocl::oclMat(), keypoints);
-
-    std::vector<cv::KeyPoint> keypoints_gold;
-    cv::FAST(image, keypoints_gold, threshold, nonmaxSupression);
-
-    ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_Features2D, FAST, testing::Combine(
-                        testing::Values(FAST_Threshold(25), FAST_Threshold(50)),
-                        testing::Values(FAST_NonmaxSupression(false), FAST_NonmaxSupression(true))));
-
-#endif
diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp
deleted file mode 100644
index ddc26e3..0000000
--- a/modules/ocl/test/test_fft.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace std;
-
-////////////////////////////////////////////////////////////////////////////
-// Dft
-
-PARAM_TEST_CASE(Dft, cv::Size, int, bool)
-{
-    cv::Size dft_size;
-    int	 dft_flags;
-    bool doubleFP;
-
-    virtual void SetUp()
-    {
-        dft_size  = GET_PARAM(0);
-        dft_flags = GET_PARAM(1);
-        doubleFP = GET_PARAM(2);
-    }
-};
-
-OCL_TEST_P(Dft, C2C)
-{
-    cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC2 : CV_32FC2, 0.0, 100.0);
-    cv::Mat b_gold;
-
-    cv::ocl::oclMat d_b;
-
-    cv::dft(a, b_gold, dft_flags);
-    cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
-
-    EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4);
-}
-
-OCL_TEST_P(Dft, R2C)
-{
-    cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 100.0);
-    cv::Mat b_gold, b_gold_roi;
-
-    cv::ocl::oclMat d_b, d_c;
-    cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
-    cv::dft(a, b_gold, cv::DFT_COMPLEX_OUTPUT | dft_flags);
-
-    b_gold_roi = b_gold(cv::Rect(0, 0, d_b.cols, d_b.rows));
-    EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4);
-
-    cv::Mat c_gold;
-    cv::dft(b_gold, c_gold, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
-    EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4);
-}
-
-OCL_TEST_P(Dft, R2CthenC2R)
-{
-    cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 10.0);
-
-    cv::ocl::oclMat d_b, d_c;
-    cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0);
-    cv::ocl::dft(d_b, d_c, a.size(), cv::DFT_SCALE | cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT);
-    EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
-                            testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
-                            testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE), testing::Bool()));
-
-////////////////////////////////////////////////////////////////////////////
-// MulSpectrums
-
-PARAM_TEST_CASE(MulSpectrums, cv::Size, DftFlags, bool)
-{
-    cv::Size size;
-    int flag;
-    bool ccorr;
-    cv::Mat a, b;
-
-    virtual void SetUp()
-    {
-        size  = GET_PARAM(0);
-        flag  = GET_PARAM(1);
-        ccorr = GET_PARAM(2);
-
-        a = randomMat(size, CV_32FC2, -100, 100, false);
-        b = randomMat(size, CV_32FC2, -100, 100, false);
-    }
-};
-
-OCL_TEST_P(MulSpectrums, Simple)
-{
-    cv::ocl::oclMat c;
-    cv::ocl::mulSpectrums(cv::ocl::oclMat(a), cv::ocl::oclMat(b), c, flag, 1.0, ccorr);
-
-    cv::Mat c_gold;
-    cv::mulSpectrums(a, b, c_gold, flag, ccorr);
-
-    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
-}
-
-OCL_TEST_P(MulSpectrums, Scaled)
-{
-    float scale = 1.f / size.area();
-
-    cv::ocl::oclMat c;
-    cv::ocl::mulSpectrums(cv::ocl::oclMat(a), cv::ocl::oclMat(b), c, flag, scale, ccorr);
-
-    cv::Mat c_gold;
-    cv::mulSpectrums(a, b, c_gold, flag, ccorr);
-    c_gold.convertTo(c_gold, c_gold.type(), scale);
-
-    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(
-    DIFFERENT_SIZES,
-    testing::Values(DftFlags(0)),
-    testing::Values(false, true)));
-
-
-////////////////////////////////////////////////////////
-// Convolve
-
-void static convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
-{
-    // reallocate the output array if needed
-    C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
-    cv::Size dftSize;
-
-    // compute the size of DFT transform
-    dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
-    dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
-
-    // allocate temporary buffers and initialize them with 0s
-    cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
-    cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
-
-    // copy A and B to the top-left corners of tempA and tempB, respectively
-    cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
-    A.copyTo(roiA);
-    cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
-    B.copyTo(roiB);
-
-    // now transform the padded A & B in-place;
-    // use "nonzeroRows" hint for faster processing
-    cv::dft(tempA, tempA, 0, A.rows);
-    cv::dft(tempB, tempB, 0, B.rows);
-
-    // multiply the spectrums;
-    // the function handles packed spectrum representations well
-    cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
-
-    // transform the product back from the frequency domain.
-    // Even though all the result rows will be non-zero,
-    // you need only the first C.rows of them, and thus you
-    // pass nonzeroRows == C.rows
-    cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
-
-    // now copy the result back to C.
-    tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
-}
-
-IMPLEMENT_PARAM_CLASS(KSize, int)
-IMPLEMENT_PARAM_CLASS(Ccorr, bool)
-
-PARAM_TEST_CASE(Convolve_DFT, cv::Size, KSize, Ccorr)
-{
-    cv::Size size;
-    int ksize;
-    bool ccorr;
-
-    cv::Mat src;
-    cv::Mat kernel;
-
-    cv::Mat dst_gold;
-
-    virtual void SetUp()
-    {
-        size  = GET_PARAM(0);
-        ksize = GET_PARAM(1);
-        ccorr = GET_PARAM(2);
-    }
-};
-
-OCL_TEST_P(Convolve_DFT, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
-    cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
-
-    cv::ocl::oclMat dst;
-    cv::ocl::convolve(cv::ocl::oclMat(src), cv::ocl::oclMat(kernel), dst, ccorr);
-
-    cv::Mat dst_gold;
-    convolveDFT(src, kernel, dst_gold, ccorr);
-
-    EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
-}
-#define DIFFERENT_CONVOLVE_SIZES testing::Values(cv::Size(251, 257), cv::Size(113, 113), cv::Size(200, 480), cv::Size(1300, 1300))
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Convolve_DFT, testing::Combine(
-    DIFFERENT_CONVOLVE_SIZES,
-    testing::Values(KSize(19), KSize(23), KSize(45)),
-    testing::Values(Ccorr(true)/*, Ccorr(false)*/))); // TODO false ccorr cannot pass for some instances
diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp
deleted file mode 100644
index b2caeaf..0000000
--- a/modules/ocl/test/test_filters.cpp
+++ /dev/null
@@ -1,476 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Zero Lin, Zero.Lin@amd.com
-//    Zhang Ying, zhangying913@gmail.com
-//    Yao Wang, bitwangyaoyao@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace testing;
-using namespace std;
-using namespace cv;
-
-PARAM_TEST_CASE(FilterTestBase, MatType,
-                int, // kernel size
-                Size, // dx, dy
-                int, // border type
-                double, // optional parameter
-                bool) // roi or not
-{
-    bool isFP;
-
-    int type, borderType, ksize;
-    Size size;
-    double param;
-    bool useRoi;
-
-    Mat src, dst_whole, src_roi, dst_roi;
-    ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        ksize = GET_PARAM(1);
-        size = GET_PARAM(2);
-        borderType = GET_PARAM(3);
-        param = GET_PARAM(4);
-        useRoi = GET_PARAM(5);
-
-        isFP = (CV_MAT_DEPTH(type) == CV_32F || CV_MAT_DEPTH(type) == CV_64F);
-    }
-
-    void random_roi(int minSize = 1)
-    {
-        if (minSize == 0)
-            minSize = ksize;
-        Size roiSize = randomSize(minSize, MAX_VALUE);
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, type, isFP ? 0 : 5, isFP ? 1 : 256);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, isFP ? 0.20 : 60, isFP ? 0.25 : 70);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
-    }
-
-    void Near()
-    {
-        if (isFP)
-            Near(1e-6, true);
-        else
-            Near(1, false);
-    }
-
-    void Near(double threshold, bool relative)
-    {
-        Mat roi, whole;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        if (relative)
-        {
-            EXPECT_MAT_NEAR_RELATIVE(dst_whole, whole, threshold);
-            EXPECT_MAT_NEAR_RELATIVE(dst_roi, roi, threshold);
-        }
-        else
-        {
-            EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-            EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-        }
-    }
-};
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// blur
-
-typedef FilterTestBase Blur;
-
-#ifdef ANDROID
-OCL_TEST_P(Blur, DISABLED_Mat)
-#else
-OCL_TEST_P(Blur, Mat)
-#endif
-{
-    Size kernelSize(ksize, ksize);
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi(0); // TODO NOTE: min value for size is kernel size (temporary bypass border issues in CPU implementation)
-
-        blur(src_roi, dst_roi, kernelSize, Point(-1, -1), borderType);
-        ocl::blur(gsrc_roi, gdst_roi, kernelSize, Point(-1, -1), borderType); // TODO anchor
-
-        Near();
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// Laplacian
-
-typedef FilterTestBase LaplacianTest;
-
-OCL_TEST_P(LaplacianTest, Accuracy)
-{
-    double scale = param;
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Laplacian(src_roi, dst_roi, -1, ksize, scale, 0, borderType);
-        ocl::Laplacian(gsrc_roi, gdst_roi, -1, ksize, scale, 0, borderType);
-
-        Near();
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// erode & dilate
-
-typedef FilterTestBase Erode;
-
-OCL_TEST_P(Erode, Mat)
-{
-    // erode or dilate kernel
-    Size kernelSize(ksize, ksize);
-    Mat kernel;
-    int iterations = (int)param;
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
-
-        cv::erode(src_roi, dst_roi, kernel, Point(-1, -1), iterations);//, borderType);
-        ocl::erode(gsrc_roi, gdst_roi, kernel, Point(-1, -1), iterations);//, borderType);
-
-        Near();
-    }
-}
-
-typedef FilterTestBase Dilate;
-
-OCL_TEST_P(Dilate, Mat)
-{
-    // erode or dilate kernel
-    Mat kernel;
-    int iterations = (int)param;
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        kernel = randomMat(Size(3, 3), CV_8UC1, 0, 3);
-
-        random_roi();
-
-        cv::dilate(src_roi, dst_roi, kernel, Point(-1, -1), iterations);
-        ocl::dilate(gsrc_roi, gdst_roi, kernel, Point(-1, -1), iterations); // TODO iterations, borderType
-
-        Near();
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// Sobel
-
-typedef FilterTestBase SobelTest;
-
-OCL_TEST_P(SobelTest, Mat)
-{
-    int dx = size.width, dy = size.height;
-    double scale = param;
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Sobel(src_roi, dst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType);
-        ocl::Sobel(gsrc_roi, gdst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType);
-
-        Near();
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// Scharr
-
-typedef FilterTestBase ScharrTest;
-
-OCL_TEST_P(ScharrTest, Mat)
-{
-    int dx = size.width, dy = size.height;
-    double scale = param;
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Scharr(src_roi, dst_roi, -1, dx, dy, scale, /* delta */ 0, borderType);
-        ocl::Scharr(gsrc_roi, gdst_roi, -1, dx, dy, scale, /* delta */ 0, borderType);
-
-        Near();
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// GaussianBlur
-
-typedef FilterTestBase GaussianBlurTest;
-
-OCL_TEST_P(GaussianBlurTest, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        double sigma1 = rng.uniform(0.1, 1.0);
-        double sigma2 = rng.uniform(0.1, 1.0);
-
-        GaussianBlur(src_roi, dst_roi, Size(ksize, ksize), sigma1, sigma2, borderType);
-        ocl::GaussianBlur(gsrc_roi, gdst_roi, Size(ksize, ksize), sigma1, sigma2, borderType);
-
-        Near(CV_MAT_DEPTH(type) == CV_8U ? 3 : 5e-5, false);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Filter2D
-
-typedef FilterTestBase Filter2D;
-
-OCL_TEST_P(Filter2D, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Point anchor(-1, -1);
-        if (size.width >= 0)
-            anchor.x = size.width % ksize;
-        if (size.height >= 0)
-            anchor.y = size.height % ksize;
-
-        const Size kernelSize(ksize, ksize);
-        Mat kernel = randomMat(kernelSize, CV_32FC1, 0, 1.0);
-        kernel *= 1.0 / (double)(ksize * ksize);
-
-        cv::filter2D(src_roi, dst_roi, -1, kernel, anchor, 0.0, borderType);
-        ocl::filter2D(gsrc_roi, gdst_roi, -1, kernel, anchor, 0.0, borderType);
-
-        Near();
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Bilateral
-
-typedef FilterTestBase Bilateral;
-
-OCL_TEST_P(Bilateral, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        double sigmacolor = rng.uniform(20, 100);
-        double sigmaspace = rng.uniform(10, 40);
-
-        cv::bilateralFilter(src_roi, dst_roi, ksize, sigmacolor, sigmaspace, borderType);
-        ocl::bilateralFilter(gsrc_roi, gdst_roi, ksize, sigmacolor, sigmaspace, borderType);
-
-        Near();
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// AdaptiveBilateral
-
-typedef FilterTestBase AdaptiveBilateral;
-
-OCL_TEST_P(AdaptiveBilateral, Mat)
-{
-    const Size kernelSize(ksize, ksize);
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        adaptiveBilateralFilter(src_roi, dst_roi, kernelSize, 5, 1, Point(-1, -1), borderType); // TODO anchor
-        ocl::adaptiveBilateralFilter(gsrc_roi, gdst_roi, kernelSize, 5, 1, Point(-1, -1), borderType);
-
-        Near();
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-// MedianFilter
-
-typedef FilterTestBase MedianFilter;
-
-OCL_TEST_P(MedianFilter, Mat)
-{
-    for (int i = 0; i < LOOP_TIMES; ++i)
-    {
-        random_roi();
-
-        medianBlur(src_roi, dst_roi, ksize);
-        ocl::medianFilter(gsrc_roi, gdst_roi, ksize);
-
-        Near();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define FILTER_BORDER_SET_NO_ISOLATED \
-    Values((int)BORDER_CONSTANT, (int)BORDER_REPLICATE, (int)BORDER_REFLECT, (int)BORDER_WRAP, (int)BORDER_REFLECT_101/*, \
-            (int)BORDER_CONSTANT|BORDER_ISOLATED, (int)BORDER_REPLICATE|BORDER_ISOLATED, \
-            (int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \
-            (int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version
-
-#define FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED \
-    Values((int)BORDER_CONSTANT, (int)BORDER_REPLICATE, (int)BORDER_REFLECT, /*(int)BORDER_WRAP,*/ (int)BORDER_REFLECT_101/*, \
-            (int)BORDER_CONSTANT|BORDER_ISOLATED, (int)BORDER_REPLICATE|BORDER_ISOLATED, \
-            (int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \
-            (int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version
-
-#define FILTER_DATATYPES Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, \
-                                CV_32FC1, CV_32FC3, CV_32FC4, \
-                                CV_64FC1, CV_64FC3, CV_64FC4)
-
-INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
-                            FILTER_DATATYPES,
-                            Values(3, 5, 7),
-                            Values(Size(0, 0)), // not used
-                            FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
-                            Values(0.0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, LaplacianTest, Combine(
-                            FILTER_DATATYPES,
-                            Values(1, 3),
-                            Values(Size(0, 0)), // not used
-                            FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
-                            Values(1.0, 0.2, 3.0), // scalar
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
-                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values(3, 5, 7),
-                            Values(Size(0, 0)), // not used
-                            Values(0), // not used
-                            Values(1.0, 2.0, 3.0),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
-                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values(3, 5, 7),
-                            Values(Size(0, 0)), // not used
-                            Values(0), // not used
-                            Values(1.0, 2.0, 3.0),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
-                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values(3, 5),
-                            Values(Size(1, 0), Size(1, 1), Size(2, 0), Size(2, 1)), // dx, dy
-                            FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
-                            Values(0.0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
-                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values(1),
-                            Values(Size(0, 1), Size(1, 0)), // dx, dy
-                            FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
-                            Values(1.0, 0.2), // scalar
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
-                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
-                            Values(3, 5),
-                            Values(Size(0, 0)), // not used
-                            FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
-                            Values(0.0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
-                            FILTER_DATATYPES,
-                            Values(3, 15), // TODO 25: CPU implementation has some issues
-                            Values(Size(-1, -1), Size(0, 0), Size(2, 1)), // anchor
-                            FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
-                            Values(0.0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
-                            Values(CV_8UC1, CV_8UC3),
-                            Values(5, 9),
-                            Values(Size(0, 0)), // not used
-                            FILTER_BORDER_SET_NO_ISOLATED,
-                            Values(0.0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, AdaptiveBilateral, Combine(
-                            Values(CV_8UC1, CV_8UC3),
-                            Values(5, 9),
-                            Values(Size(0, 0)), // not used
-                            FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
-                            Values(0.0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Filter, MedianFilter, Combine(
-                            Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
-                            Values(3, 5),
-                            Values(Size(0, 0)), // not used
-                            Values(0), // not used
-                            Values(0.0), // not used
-                            Bool()));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_gemm.cpp b/modules/ocl/test/test_gemm.cpp
deleted file mode 100644
index c2a4484..0000000
--- a/modules/ocl/test/test_gemm.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace std;
-
-////////////////////////////////////////////////////////////////////////////
-// GEMM
-
-PARAM_TEST_CASE(Gemm, int, cv::Size, int)
-{
-    int      type;
-    cv::Size mat_size;
-    int		 flags;
-
-    virtual void SetUp()
-    {
-        type     = GET_PARAM(0);
-        mat_size = GET_PARAM(1);
-        flags    = GET_PARAM(2);
-    }
-};
-
-OCL_TEST_P(Gemm, Accuracy)
-{
-    cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
-    cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
-    cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
-
-    cv::Mat dst;
-    cv::ocl::oclMat ocl_dst;
-
-    cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
-    cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
-
-    EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4);
-}
-
-INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
-                            testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
-                            testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
-                            testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
diff --git a/modules/ocl/test/test_hough.cpp b/modules/ocl/test/test_hough.cpp
deleted file mode 100644
index f5d2578..0000000
--- a/modules/ocl/test/test_hough.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// HoughCircles
-
-PARAM_TEST_CASE(HoughCircles, cv::Size)
-{
-    static void drawCircles(cv::Mat& dst, const std::vector<cv::Vec3f>& circles, bool fill)
-    {
-        dst.setTo(cv::Scalar::all(0));
-
-        for (size_t i = 0; i < circles.size(); ++i)
-            cv::circle(dst, cv::Point2f(circles[i][0], circles[i][1]), (int)circles[i][2], cv::Scalar::all(255), fill ? -1 : 1);
-    }
-};
-
-OCL_TEST_P(HoughCircles, Accuracy)
-{
-    const cv::Size size = GET_PARAM(0);
-
-    const float dp = 2.0f;
-    const float minDist = 10.0f;
-    const int minRadius = 10;
-    const int maxRadius = 20;
-    const int cannyThreshold = 100;
-    const int votesThreshold = 15;
-
-    std::vector<cv::Vec3f> circles_gold(4);
-    circles_gold[0] = cv::Vec3i(20, 20, minRadius);
-    circles_gold[1] = cv::Vec3i(90, 87, minRadius + 3);
-    circles_gold[2] = cv::Vec3i(30, 70, minRadius + 8);
-    circles_gold[3] = cv::Vec3i(80, 10, maxRadius);
-
-    cv::Mat src(size, CV_8UC1);
-    drawCircles(src, circles_gold, true);
-    cv::ocl::oclMat d_src(src);
-
-    cv::ocl::oclMat d_circles;
-    cv::ocl::HoughCircles(d_src, d_circles, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
-    ASSERT_TRUE(d_circles.rows > 0);
-
-    cv::Mat circles;
-    d_circles.download(circles);
-
-    for (int i = 0; i < circles.cols; ++i)
-    {
-        cv::Vec3f cur = circles.at<cv::Vec3f>(i);
-
-        bool found = false;
-
-        for (size_t j = 0; j < circles_gold.size(); ++j)
-        {
-            cv::Vec3f gold = circles_gold[j];
-
-            if (std::fabs(cur[0] - gold[0]) < minDist && std::fabs(cur[1] - gold[1]) < minDist && std::fabs(cur[2] - gold[2]) < minDist)
-            {
-                found = true;
-                break;
-            }
-        }
-
-        ASSERT_TRUE(found);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Hough, HoughCircles, DIFFERENT_SIZES);
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
deleted file mode 100644
index 9b25d9f..0000000
--- a/modules/ocl/test/test_imgproc.cpp
+++ /dev/null
@@ -1,622 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Shengen Yan, yanshengen@gmail.com
-//    Jiang Liyuan, lyuan001.good@163.com
-//    Rock Li, Rock.Li@amd.com
-//    Wu Zailong, bullet@yeah.net
-//    Xu Pang, pangxu010@163.com
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace testing;
-using namespace std;
-using namespace cv;
-
-///////////////////////////////////////////////////////////////////////////////
-
-PARAM_TEST_CASE(ImgprocTestBase, MatType,
-                int, // blockSize
-                int, // border type
-                bool) // roi or not
-{
-    int type, borderType, blockSize;
-    bool useRoi;
-
-    Mat src, dst_whole, src_roi, dst_roi;
-    ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        blockSize = GET_PARAM(1);
-        borderType = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-    }
-
-    virtual void random_roi()
-    {
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, 5, 16);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
-    }
-
-    void Near(double threshold = 0.0, bool relative = false)
-    {
-        Mat roi, whole;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        if (relative)
-        {
-            EXPECT_MAT_NEAR_RELATIVE(dst_whole, whole, threshold);
-            EXPECT_MAT_NEAR_RELATIVE(dst_roi, roi, threshold);
-        }
-        else
-        {
-            EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-            EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-        }
-    }
-};
-
-////////////////////////////////copyMakeBorder////////////////////////////////////////////
-
-PARAM_TEST_CASE(CopyMakeBorder, MatDepth, // depth
-                Channels, // channels
-                bool, // isolated or not
-                Border, // border type
-                bool) // roi or not
-{
-    int type, borderType;
-    bool useRoi;
-
-    Border border;
-    Scalar val;
-
-    Mat src, dst_whole, src_roi, dst_roi;
-    ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
-
-    virtual void SetUp()
-    {
-        type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
-        borderType = GET_PARAM(3);
-
-        if (GET_PARAM(2))
-            borderType |= BORDER_ISOLATED;
-
-        useRoi = GET_PARAM(4);
-    }
-
-    void random_roi()
-    {
-        border = randomBorder(0, MAX_VALUE << 2);
-        val = randomScalar(-MAX_VALUE, MAX_VALUE);
-
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        dstBorder.top += border.top;
-        dstBorder.lef += border.lef;
-        dstBorder.rig += border.rig;
-        dstBorder.bot += border.bot;
-
-        randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
-    }
-
-    void Near(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-};
-
-OCL_TEST_P(CopyMakeBorder, Mat)
-{
-    for (int i = 0; i < LOOP_TIMES; ++i)
-    {
-        random_roi();
-
-        cv::copyMakeBorder(src_roi, dst_roi, border.top, border.bot, border.lef, border.rig, borderType, val);
-        ocl::copyMakeBorder(gsrc_roi, gdst_roi, border.top, border.bot, border.lef, border.rig, borderType, val);
-
-        Near();
-    }
-}
-
-////////////////////////////////equalizeHist//////////////////////////////////////////////
-
-typedef ImgprocTestBase EqualizeHist;
-
-OCL_TEST_P(EqualizeHist, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        equalizeHist(src_roi, dst_roi);
-        ocl::equalizeHist(gsrc_roi, gdst_roi);
-
-        Near(1.1);
-    }
-}
-
-////////////////////////////////cornerMinEigenVal//////////////////////////////////////////
-
-struct CornerTestBase :
-        public ImgprocTestBase
-{
-    virtual void random_roi()
-    {
-        Mat image = readImageType("gpu/stereobm/aloe-L.png", type);
-        ASSERT_FALSE(image.empty());
-
-        bool isFP = CV_MAT_DEPTH(type) >= CV_32F;
-        float val = 255.0f;
-        if (isFP)
-        {
-            image.convertTo(image, -1, 1.0 / 255);
-            val /= 255.0f;
-        }
-
-        Size roiSize = image.size();
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-
-        Size wholeSize = Size(roiSize.width + srcBorder.lef + srcBorder.rig, roiSize.height + srcBorder.top + srcBorder.bot);
-        src = randomMat(wholeSize, type, -val, val, false);
-        src_roi = src(Rect(srcBorder.lef, srcBorder.top, roiSize.width, roiSize.height));
-        image.copyTo(src_roi);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, CV_32FC1, 5, 16);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
-    }
-};
-
-typedef CornerTestBase CornerMinEigenVal;
-
-OCL_TEST_P(CornerMinEigenVal, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        int apertureSize = 3;
-
-        cornerMinEigenVal(src_roi, dst_roi, blockSize, apertureSize, borderType);
-        ocl::cornerMinEigenVal(gsrc_roi, gdst_roi, blockSize, apertureSize, borderType);
-
-        Near(1e-5, true);
-    }
-}
-
-////////////////////////////////cornerHarris//////////////////////////////////////////
-struct CornerHarris :
-    public ImgprocTestBase
-{
-    void Near(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        absdiff(whole, dst_whole, whole);
-        absdiff(roi, dst_roi, roi);
-
-        divide(whole, dst_whole, whole);
-        divide(roi, dst_roi, roi);
-
-        absdiff(dst_whole, dst_whole, dst_whole);
-        absdiff(dst_roi, dst_roi, dst_roi);
-
-        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-};
-
-OCL_TEST_P(CornerHarris, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        int apertureSize = 3;
-        double k = randomDouble(0.01, 0.9);
-
-        cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType);
-        ocl::cornerHarris(gsrc_roi, gdst_roi, blockSize, apertureSize, k, borderType);
-
-        Near(1e-5);
-    }
-}
-
-//////////////////////////////////integral/////////////////////////////////////////////////
-
-struct Integral :
-        public ImgprocTestBase
-{
-    int sdepth;
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        blockSize = GET_PARAM(1);
-        sdepth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-    }
-};
-OCL_TEST_P(Integral, Mat1)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        ocl::integral(gsrc_roi, gdst_roi, sdepth);
-        integral(src_roi, dst_roi, sdepth);
-
-        Near();
-    }
-}
-
-OCL_TEST_P(Integral, Mat2)
-{
-    Mat dst1;
-    ocl::oclMat gdst1;
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        integral(src_roi, dst_roi, dst1, sdepth);
-        ocl::integral(gsrc_roi, gdst_roi, gdst1, sdepth);
-
-        Near();
-        if(gdst1.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE))
-            EXPECT_MAT_NEAR(dst1, Mat(gdst1), 0.);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-//// threshold
-
-struct Threshold :
-        public ImgprocTestBase
-{
-    int thresholdType;
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        blockSize = GET_PARAM(1);
-        thresholdType = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-    }
-};
-
-OCL_TEST_P(Threshold, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        double maxVal = randomDouble(20.0, 127.0);
-        double thresh = randomDouble(0.0, maxVal);
-
-        threshold(src_roi, dst_roi, thresh, maxVal, thresholdType);
-        ocl::threshold(gsrc_roi, gdst_roi, thresh, maxVal, thresholdType);
-
-        Near(1);
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////
-// calcHist
-
-static void calcHistGold(const Mat &src, Mat &hist)
-{
-    hist = Mat(1, 256, CV_32SC1, Scalar::all(0));
-
-    int * const hist_row = hist.ptr<int>();
-    for (int y = 0; y < src.rows; ++y)
-    {
-        const uchar * const src_row = src.ptr(y);
-
-        for (int x = 0; x < src.cols; ++x)
-            ++hist_row[src_row[x]];
-    }
-}
-
-typedef ImgprocTestBase CalcHist;
-
-OCL_TEST_P(CalcHist, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        calcHistGold(src_roi, dst_roi);
-        ocl::calcHist(gsrc_roi, gdst_roi);
-
-        Near();
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////
-//// CLAHE
-
-PARAM_TEST_CASE(CLAHETest, Size, double, bool)
-{
-    Size gridSize;
-    double clipLimit;
-    bool useRoi;
-
-    Mat src, dst_whole, src_roi, dst_roi;
-    ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
-
-    virtual void SetUp()
-    {
-        gridSize = GET_PARAM(0);
-        clipLimit = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-    }
-
-    void random_roi()
-    {
-        Size roiSize = randomSize(std::max(gridSize.height, gridSize.width), MAX_VALUE);
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, CV_8UC1, 5, 256);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, CV_8UC1, 5, 16);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
-    }
-
-    void Near(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-};
-
-OCL_TEST_P(CLAHETest, Accuracy)
-{
-    for (int i = 0; i < LOOP_TIMES; ++i)
-    {
-        random_roi();
-
-        Ptr<CLAHE> clahe = ocl::createCLAHE(clipLimit, gridSize);
-        clahe->apply(gsrc_roi, gdst_roi);
-
-        Ptr<CLAHE> clahe_gold = createCLAHE(clipLimit, gridSize);
-        clahe_gold->apply(src_roi, dst_roi);
-
-        Near(1.0);
-    }
-}
-
-/////////////////////////////Convolve//////////////////////////////////
-
-static void convolve_gold(const Mat & src, const Mat & kernel, Mat & dst)
-{
-    for (int i = 0; i < src.rows; i++)
-    {
-        float * const dstptr = dst.ptr<float>(i);
-
-        for (int j = 0; j < src.cols; j++)
-        {
-            float temp = 0;
-
-            for (int m = 0; m < kernel.rows; m++)
-            {
-                const float * const kptr = kernel.ptr<float>(m);
-                for (int n = 0; n < kernel.cols; n++)
-                {
-                    int r = clipInt(i - kernel.rows / 2 + m, 0, src.rows - 1);
-                    int c = clipInt(j - kernel.cols / 2 + n, 0, src.cols - 1);
-
-                    temp += src.ptr<float>(r)[c] * kptr[n];
-                }
-            }
-
-            dstptr[j] = temp;
-        }
-    }
-}
-
-typedef ImgprocTestBase Convolve;
-
-OCL_TEST_P(Convolve, Mat)
-{
-    Mat kernel, kernel_roi;
-    ocl::oclMat gkernel, gkernel_roi;
-    const Size roiSize(7, 7);
-
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Border kernelBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(kernel, kernel_roi, roiSize, kernelBorder, type, 5, 16);
-        generateOclMat(gkernel, gkernel_roi, kernel, roiSize, kernelBorder);
-
-        convolve_gold(src_roi, kernel_roi, dst_roi);
-        ocl::convolve(gsrc_roi, gkernel_roi, gdst_roi);
-
-        Near(1);
-    }
-}
-
-////////////////////////////////// ColumnSum //////////////////////////////////////
-
-static void columnSum_gold(const Mat & src, Mat & dst)
-{
-    float * prevdptr = dst.ptr<float>(0);
-    const float * sptr = src.ptr<float>(0);
-
-    for (int x = 0; x < src.cols; ++x)
-        prevdptr[x] = sptr[x];
-
-    for (int y = 1; y < src.rows; ++y)
-    {
-        sptr = src.ptr<float>(y);
-        float * const dptr = dst.ptr<float>(y);
-
-        for (int x = 0; x < src.cols; ++x)
-            dptr[x] = prevdptr[x] + sptr[x];
-
-        prevdptr = dptr;
-    }
-}
-
-typedef ImgprocTestBase ColumnSum;
-
-OCL_TEST_P(ColumnSum, Accuracy)
-{
-    for (int i = 0; i < LOOP_TIMES; ++i)
-    {
-        random_roi();
-
-        columnSum_gold(src_roi, dst_roi);
-        ocl::columnSum(gsrc_roi, gdst_roi);
-
-        Near(1e-5);
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-
-INSTANTIATE_TEST_CASE_P(Imgproc, EqualizeHist, Combine(
-                            Values((MatType)CV_8UC1),
-                            Values(0), // not used
-                            Values(0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, CornerMinEigenVal, Combine(
-                            Values((MatType)CV_8UC1, (MatType)CV_32FC1),
-                            Values(3, 5),
-                            Values((int)BORDER_CONSTANT, (int)BORDER_REPLICATE, (int)BORDER_REFLECT, (int)BORDER_REFLECT101),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
-                            Values((MatType)CV_8UC1, CV_32FC1),
-                            Values(3, 5),
-                            Values( (int)BORDER_CONSTANT, (int)BORDER_REPLICATE, (int)BORDER_REFLECT, (int)BORDER_REFLECT_101),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, Integral, Combine(
-                            Values((MatType)CV_8UC1), // TODO does not work with CV_32F, CV_64F
-                            Values(0), // not used
-                            Values((MatType)CV_32SC1, (MatType)CV_32FC1),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
-                            Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
-                                   CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
-                                   CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4),
-                            Values(0),
-                            Values(ThreshOp(THRESH_BINARY),
-                                   ThreshOp(THRESH_BINARY_INV), ThreshOp(THRESH_TRUNC),
-                                   ThreshOp(THRESH_TOZERO), ThreshOp(THRESH_TOZERO_INV)),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, CalcHist, Combine(
-                            Values((MatType)CV_8UC1),
-                            Values(0), // not used
-                            Values(0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, CLAHETest, Combine(
-                            Values(Size(4, 4), Size(32, 8), Size(8, 64)),
-                            Values(0.0, 10.0, 62.0, 300.0),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, Convolve, Combine(
-                            Values((MatType)CV_32FC1),
-                            Values(0), // not used
-                            Values(0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, Combine(
-                            Values(MatType(CV_32FC1)),
-                            Values(0), // not used
-                            Values(0), // not used
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
-                            testing::Values((MatDepth)CV_8U, (MatDepth)CV_16S, (MatDepth)CV_32S, (MatDepth)CV_32F),
-                            testing::Values(Channels(1), Channels(3), (Channels)4),
-                            Bool(), // border isolated or not
-                            Values((Border)BORDER_REPLICATE, (Border)BORDER_REFLECT,
-                                   (Border)BORDER_WRAP, (Border)BORDER_REFLECT_101),
-                            Bool()));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_kalman.cpp b/modules/ocl/test/test_kalman.cpp
deleted file mode 100644
index 045cd98..0000000
--- a/modules/ocl/test/test_kalman.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma, jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-
-//////////////////////////////////////////////////////////////////////////
-
-PARAM_TEST_CASE(Kalman, int, int)
-{
-    int size_;
-    int iteration;
-    virtual void SetUp()
-    {
-        size_ = GET_PARAM(0);
-        iteration = GET_PARAM(1);
-    }
-};
-
-OCL_TEST_P(Kalman, Accuracy)
-{
-    const int Dim = size_;
-    const int Steps = iteration;
-    const double max_init = 1;
-    const double max_noise = 0.1;
-
-    Mat sample_mat(Dim, 1, CV_32F), temp_mat;
-    oclMat Sample(Dim, 1, CV_32F);
-    oclMat Temp(Dim, 1, CV_32F);
-    Mat Temp_cpu(Dim, 1, CV_32F);
-
-    Size size(Sample.cols, Sample.rows);
-
-    sample_mat =  randomMat(size, Sample.type(), -max_init, max_init, false);
-    Sample.upload(sample_mat);
-
-    //ocl start
-    cv::ocl::KalmanFilter kalman_filter_ocl;
-    kalman_filter_ocl.init(Dim, Dim);
-
-    cv::ocl::setIdentity(kalman_filter_ocl.errorCovPre, 1);
-    cv::ocl::setIdentity(kalman_filter_ocl.measurementMatrix, 1);
-    cv::ocl::setIdentity(kalman_filter_ocl.errorCovPost, 1);
-
-    kalman_filter_ocl.measurementNoiseCov.setTo(Scalar::all(0));
-    kalman_filter_ocl.statePre.setTo(Scalar::all(0));
-    kalman_filter_ocl.statePost.setTo(Scalar::all(0));
-
-    kalman_filter_ocl.correct(Sample);
-    //ocl end
-
-    //cpu start
-    cv::KalmanFilter kalman_filter_cpu;
-
-    kalman_filter_cpu.init(Dim, Dim);
-
-    cv::setIdentity(kalman_filter_cpu.errorCovPre, 1);
-    cv::setIdentity(kalman_filter_cpu.measurementMatrix, 1);
-    cv::setIdentity(kalman_filter_cpu.errorCovPost, 1);
-
-    kalman_filter_cpu.measurementNoiseCov.setTo(Scalar::all(0));
-    kalman_filter_cpu.statePre.setTo(Scalar::all(0));
-    kalman_filter_cpu.statePost.setTo(Scalar::all(0));
-
-    kalman_filter_cpu.correct(sample_mat);
-    //cpu end
-    //test begin
-    for(int i = 0; i<Steps; i++)
-    {
-        kalman_filter_ocl.predict();
-        kalman_filter_cpu.predict();
-
-        cv::gemm(kalman_filter_cpu.transitionMatrix, sample_mat, 1, cv::Mat(), 0, Temp_cpu);
-
-        Size size1(Temp.cols, Temp.rows);
-        Mat temp = randomMat(size1, Temp.type(), 0, 0xffff, false);
-
-
-        cv::multiply(2, temp, temp);
-
-        cv::subtract(temp, 1, temp);
-
-        cv::multiply(max_noise, temp, temp);
-
-        cv::add(temp, Temp_cpu, Temp_cpu);
-
-        Temp.upload(Temp_cpu);
-        Temp.copyTo(Sample);
-        Temp_cpu.copyTo(sample_mat);
-
-        kalman_filter_ocl.correct(Temp);
-        kalman_filter_cpu.correct(Temp_cpu);
-    }
-    //test end
-    EXPECT_MAT_NEAR(kalman_filter_cpu.statePost, kalman_filter_ocl.statePost, 0);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_Video, Kalman, Combine(Values(3, 7), Values(30)));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_kmeans.cpp b/modules/ocl/test/test_kmeans.cpp
deleted file mode 100644
index eb36274..0000000
--- a/modules/ocl/test/test_kmeans.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Erping Pang,   pang_er_ping@163.com
-//    Xiaopeng Fu,   fuxiaopeng2222@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-using namespace cv;
-
-#define OCL_KMEANS_USE_INITIAL_LABELS 1
-#define OCL_KMEANS_PP_CENTERS         2
-
-PARAM_TEST_CASE(Kmeans, int, int, int)
-{
-    int type;
-    int K;
-    int flags;
-    Mat src ;
-    ocl::oclMat d_src, d_dists;
-
-    Mat labels, centers;
-    ocl::oclMat d_labels, d_centers;
-    virtual void SetUp()
-    {
-        K = GET_PARAM(0);
-        type = GET_PARAM(1);
-        flags = GET_PARAM(2);
-
-        // MWIDTH=256, MHEIGHT=256. defined in utility.hpp
-        Size size = Size(MWIDTH, MHEIGHT);
-        src.create(size, type);
-        int row_idx = 0;
-        const int max_neighbour = MHEIGHT / K - 1;
-        CV_Assert(K <= MWIDTH);
-        for(int i = 0; i < K; i++ )
-        {
-            Mat center_row_header = src.row(row_idx);
-            center_row_header.setTo(0);
-            int nchannel = center_row_header.channels();
-            for(int j = 0; j < nchannel; j++)
-                center_row_header.at<float>(0, i*nchannel+j) = 50000.0;
-
-            for(int j = 0; (j < max_neighbour) ||
-                           (i == K-1 && j < max_neighbour + MHEIGHT%K); j ++)
-            {
-                Mat cur_row_header = src.row(row_idx + 1 + j);
-                center_row_header.copyTo(cur_row_header);
-                Mat tmpmat = randomMat(cur_row_header.size(), cur_row_header.type(), -200, 200, false);
-                cur_row_header += tmpmat;
-            }
-            row_idx += 1 + max_neighbour;
-        }
-    }
-};
-OCL_TEST_P(Kmeans, Mat){
-    if(flags & KMEANS_USE_INITIAL_LABELS)
-    {
-        // inital a given labels
-        labels.create(src.rows, 1, CV_32S);
-        int *label = labels.ptr<int>();
-        for(int i = 0; i < src.rows; i++)
-            label[i] = rng.uniform(0, K);
-        d_labels.upload(labels);
-    }
-    d_src.upload(src);
-
-    for(int j = 0; j < LOOP_TIMES; j++)
-    {
-        kmeans(src, K, labels,
-            TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
-            1, flags, centers);
-        ocl::kmeans(d_src, K, d_labels,
-            TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
-            1, flags, d_centers);
-        Mat dd_labels(d_labels);
-        Mat dd_centers(d_centers);
-        if(flags & KMEANS_USE_INITIAL_LABELS)
-        {
-            EXPECT_MAT_NEAR(labels, dd_labels, 0);
-            EXPECT_MAT_NEAR(centers, dd_centers, 1e-3);
-        }
-        else
-        {
-            int row_idx = 0;
-            for(int i = 0; i < K; i++)
-            {
-                // verify lables with ground truth resutls
-                int label = labels.at<int>(row_idx);
-                int header_label = dd_labels.at<int>(row_idx);
-                for(int j = 0; (j < MHEIGHT/K)||(i == K-1 && j < MHEIGHT/K+MHEIGHT%K); j++)
-                {
-                    ASSERT_NEAR(labels.at<int>(row_idx+j), label, 0);
-                    ASSERT_NEAR(dd_labels.at<int>(row_idx+j), header_label, 0);
-                }
-
-                // verify centers
-                float *center = centers.ptr<float>(label);
-                float *header_center = dd_centers.ptr<float>(header_label);
-                for(int t = 0; t < centers.cols; t++)
-                    ASSERT_NEAR(center[t], header_center[t], 1e-3);
-
-                row_idx += MHEIGHT/K;
-            }
-        }
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine(
-    Values(3, 5, 8),
-    Values(CV_32FC1, CV_32FC2, CV_32FC4),
-    Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/)));
-
-
-/////////////////////////////// DistanceToCenters //////////////////////////////////////////
-
-CV_ENUM(DistType, NORM_L1, NORM_L2SQR)
-
-PARAM_TEST_CASE(distanceToCenters, DistType, bool)
-{
-    int distType;
-    bool useRoi;
-
-    Mat src, centers, src_roi, centers_roi;
-    ocl::oclMat ocl_src, ocl_centers, ocl_src_roi, ocl_centers_roi;
-
-    virtual void SetUp()
-    {
-        distType = GET_PARAM(0);
-        useRoi = GET_PARAM(1);
-    }
-
-    void random_roi()
-    {
-        Size roiSizeSrc = randomSize(1, MAX_VALUE);
-        Size roiSizeCenters = randomSize(1, MAX_VALUE);
-        roiSizeSrc.width = roiSizeCenters.width;
-
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSizeSrc, srcBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
-
-        Border centersBorder = randomBorder(0, useRoi ? 500 : 0);
-        randomSubMat(centers, centers_roi, roiSizeCenters, centersBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
-
-        for (int i = 0; i < centers.rows; i++)
-            centers.at<float>(i, randomInt(0, centers.cols)) = (float)randomDouble(SHRT_MAX, INT_MAX);
-
-        generateOclMat(ocl_src, ocl_src_roi, src, roiSizeSrc, srcBorder);
-        generateOclMat(ocl_centers, ocl_centers_roi, centers, roiSizeCenters, centersBorder);
-    }
-};
-
-OCL_TEST_P(distanceToCenters, Accuracy)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Mat labels, dists;
-        ocl::distanceToCenters(ocl_src_roi, ocl_centers_roi, dists, labels, distType);
-
-        EXPECT_EQ(dists.size(), labels.size());
-
-        Mat batch_dists;
-        cv::batchDistance(src_roi, centers_roi, batch_dists, CV_32FC1, noArray(), distType);
-
-        std::vector<float> gold_dists_v;
-        gold_dists_v.reserve(batch_dists.rows);
-
-        for (int i = 0; i < batch_dists.rows; i++)
-        {
-            Mat r = batch_dists.row(i);
-            double mVal;
-            Point mLoc;
-            minMaxLoc(r, &mVal, NULL, &mLoc, NULL);
-
-            int ocl_label = labels.at<int>(i, 0);
-            EXPECT_EQ(mLoc.x, ocl_label);
-
-            gold_dists_v.push_back(static_cast<float>(mVal));
-        }
-
-        double relative_error = cv::norm(Mat(gold_dists_v), dists, NORM_INF | NORM_RELATIVE);
-        ASSERT_LE(relative_error, 1e-5);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P (OCL_ML, distanceToCenters, Combine(DistType::all(), Bool()));
-
-#endif
diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp
deleted file mode 100644
index aa63f3d..0000000
--- a/modules/ocl/test/test_match_template.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-////////////////////////////////////////////////////////////////////////////////
-// MatchTemplate
-#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
-
-IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size)
-
-#define MTEMP_SIZES testing::Values(cv::Size(128, 256), cv::Size(1024, 768))
-
-PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMethod)
-{
-    cv::Size size;
-    cv::Size templ_size;
-    int cn;
-    int method;
-
-    virtual void SetUp()
-    {
-        size = GET_PARAM(0);
-        templ_size = GET_PARAM(1);
-        cn = GET_PARAM(2);
-        method = GET_PARAM(3);
-    }
-};
-
-OCL_TEST_P(MatchTemplate8U, Accuracy)
-{
-    cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn), 0, 255);
-    cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn), 0, 255);
-
-    cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
-    cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
-
-    cv::Mat dst_gold;
-    cv::matchTemplate(image, templ, dst_gold, method);
-
-    cv::Mat mat_dst;
-    dst.download(mat_dst);
-
-    EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1);
-}
-
-PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMethod)
-{
-    cv::Size size;
-    cv::Size templ_size;
-    int cn;
-    int method;
-
-    virtual void SetUp()
-    {
-        size = GET_PARAM(0);
-        templ_size = GET_PARAM(1);
-        cn = GET_PARAM(2);
-        method = GET_PARAM(3);
-    }
-};
-
-OCL_TEST_P(MatchTemplate32F, Accuracy)
-{
-    cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn), 0, 255);
-    cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn), 0, 255);
-
-    cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
-    cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
-
-    cv::Mat dst_gold;
-    cv::matchTemplate(image, templ, dst_gold, method);
-
-    cv::Mat mat_dst;
-    dst.download(mat_dst);
-
-    EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate8U,
-                        testing::Combine(
-                            MTEMP_SIZES,
-                            testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
-                            testing::Values(Channels(1), Channels(3), Channels(4)),
-                            ALL_TEMPLATE_METHODS
-                        )
-                       );
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate32F, testing::Combine(
-                            MTEMP_SIZES,
-                            testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
-                            testing::Values(Channels(1), Channels(3), Channels(4)),
-                            testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
-#endif
diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp
deleted file mode 100644
index c7ceef4..0000000
--- a/modules/ocl/test/test_matrix_operation.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace testing;
-using namespace std;
-
-////////////////////////////////converto/////////////////////////////////////////////////
-
-PARAM_TEST_CASE(MatrixTestBase, MatDepth, MatDepth, int, bool)
-{
-    int src_depth, cn, dstType;
-    bool use_roi;
-
-    Mat src, dst, src_roi, dst_roi;
-    ocl::oclMat gdst, gsrc, gdst_roi, gsrc_roi;
-
-    virtual void SetUp()
-    {
-        src_depth = GET_PARAM(0);
-        cn = GET_PARAM(2);
-        dstType = CV_MAKE_TYPE(GET_PARAM(1), cn);
-
-        use_roi = GET_PARAM(3);
-    }
-
-    virtual void random_roi()
-    {
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, CV_MAKE_TYPE(src_depth, cn), -MAX_VALUE, MAX_VALUE);
-
-        Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16);
-
-        generateOclMat(gsrc, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst, gdst_roi, dst, roiSize, dstBorder);
-    }
-};
-
-typedef MatrixTestBase ConvertTo;
-
-OCL_TEST_P(ConvertTo, Accuracy)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        src_roi.convertTo(dst_roi, dstType);
-        gsrc_roi.convertTo(gdst_roi, dstType);
-
-        EXPECT_MAT_NEAR(dst, Mat(gdst), src_depth == CV_64F ? 1.0 : 0.0);
-        EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), src_depth == CV_64F ? 1.0 : 0.0);
-    }
-}
-
-///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
-
-struct CopyTo :
-        public MatrixTestBase
-{
-    Mat mask, mask_roi;
-    ocl::oclMat gmask, gmask_roi;
-
-    virtual void random_roi()
-    {
-        int type = CV_MAKE_TYPE(src_depth, cn);
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
-
-        Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 16);
-
-        Border maskBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC1, 5, 16);
-
-        generateOclMat(gsrc, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst, gdst_roi, dst, roiSize, dstBorder);
-        generateOclMat(gmask, gmask_roi, mask, roiSize, maskBorder);
-    }
-};
-
-OCL_TEST_P(CopyTo, Without_mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        src_roi.copyTo(dst_roi);
-        gsrc_roi.copyTo(gdst_roi);
-
-        EXPECT_MAT_NEAR(dst, Mat(gdst), 0.0);
-        EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), 0.0);
-    }
-}
-
-OCL_TEST_P(CopyTo, With_mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        src_roi.copyTo(dst_roi, mask_roi);
-        gsrc_roi.copyTo(gdst_roi, gmask_roi);
-
-        EXPECT_MAT_NEAR(dst, Mat(gdst), 0.0);
-        EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), 0.0);
-    }
-}
-
-/////////////////////////////////////////// setTo /////////////////////////////////////////////////////////////
-
-typedef CopyTo SetTo;
-
-OCL_TEST_P(SetTo, Without_mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-        Scalar scalar = randomScalar(-MAX_VALUE, MAX_VALUE);
-
-        src_roi.setTo(scalar);
-        gsrc_roi.setTo(scalar);
-
-        EXPECT_MAT_NEAR(dst, Mat(gdst), 0.0);
-        EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), 0.0);;
-    }
-}
-
-OCL_TEST_P(SetTo, With_mask)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-        Scalar scalar = randomScalar(-MAX_VALUE, MAX_VALUE);
-
-        src_roi.setTo(scalar, mask_roi);
-        gsrc_roi.setTo(scalar, gmask_roi);
-
-        EXPECT_MAT_NEAR(src, Mat(gsrc), 1.);
-        EXPECT_MAT_NEAR(src_roi, Mat(gsrc_roi), 1.);
-    }
-}
-
-// convertC3C4
-
-PARAM_TEST_CASE(convertC3C4, MatDepth, bool)
-{
-    int depth;
-    bool use_roi;
-
-    Mat src, src_roi;
-    ocl::oclMat gsrc, gsrc_roi;
-
-    virtual void SetUp()
-    {
-        depth = GET_PARAM(0);
-        use_roi = GET_PARAM(1);
-    }
-
-    void random_roi()
-    {
-        int type = CV_MAKE_TYPE(depth, 3);
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
-        generateOclMat(gsrc, gsrc_roi, src, roiSize, srcBorder);
-    }
-};
-
-OCL_TEST_P(convertC3C4, Accuracy)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        gsrc_roi = src_roi;
-
-        EXPECT_MAT_NEAR(src_roi, Mat(gsrc_roi), 0.0);
-        EXPECT_MAT_NEAR(src, Mat(gsrc), 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
-                            Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
-                            Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
-                            testing::Range(1, 5), Bool()));
-
-INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
-                            Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
-                            Values(MatDepth(0)), // not used
-                            testing::Range(1, 5), Bool()));
-
-INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
-                            Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
-                            Values((MatDepth)0), // not used
-                            testing::Range(1, 5), Bool()));
-
-INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine(
-                            Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
-                            Bool()));
-#endif
diff --git a/modules/ocl/test/test_mean_shift.cpp b/modules/ocl/test/test_mean_shift.cpp
deleted file mode 100644
index 6ee3e35..0000000
--- a/modules/ocl/test/test_mean_shift.cpp
+++ /dev/null
@@ -1,408 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Shengen Yan, yanshengen@gmail.com
-//    Jiang Liyuan, lyuan001.good@163.com
-//    Rock Li, Rock.Li@amd.com
-//    Wu Zailong, bullet@yeah.net
-//    Xu Pang, pangxu010@163.com
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace testing;
-using namespace std;
-using namespace cv;
-
-typedef struct
-{
-    short x;
-    short y;
-} COOR;
-
-COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, Size size, int sp, int sr, int maxIter, float eps, int *tab)
-{
-
-    int isr2 = sr * sr;
-    int c0, c1, c2, c3;
-    int iter;
-    uchar *ptr = NULL;
-    uchar *pstart = NULL;
-    int revx = 0, revy = 0;
-    c0 = sptr[0];
-    c1 = sptr[1];
-    c2 = sptr[2];
-    c3 = sptr[3];
-    // iterate meanshift procedure
-    for(iter = 0; iter < maxIter; iter++ )
-    {
-        int count = 0;
-        int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
-
-        //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
-        int minx = x0 - sp;
-        int miny = y0 - sp;
-        int maxx = x0 + sp;
-        int maxy = y0 + sp;
-
-        //deal with the image boundary
-        if(minx < 0) minx = 0;
-        if(miny < 0) miny = 0;
-        if(maxx >= size.width) maxx = size.width - 1;
-        if(maxy >= size.height) maxy = size.height - 1;
-        if(iter == 0)
-        {
-            pstart = sptr;
-        }
-        else
-        {
-            pstart = pstart + revy * sstep + (revx << 2); //point to the new position
-        }
-        ptr = pstart;
-        ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
-
-        for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
-        {
-            int rowCount = 0;
-            int x = minx;
-#if CV_ENABLE_UNROLLED
-            for( ; x + 4 <= maxx; x += 4, ptr += 16)
-            {
-                int t0, t1, t2;
-                t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x;
-                    rowCount++;
-                }
-                t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x + 1;
-                    rowCount++;
-                }
-                t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x + 2;
-                    rowCount++;
-                }
-                t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x + 3;
-                    rowCount++;
-                }
-            }
-#endif
-            for(; x <= maxx; x++, ptr += 4)
-            {
-                int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
-                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
-                {
-                    s0 += t0;
-                    s1 += t1;
-                    s2 += t2;
-                    sx += x;
-                    rowCount++;
-                }
-            }
-            if(rowCount == 0)
-                continue;
-            count += rowCount;
-            sy += y * rowCount;
-        }
-
-        if( count == 0 )
-            break;
-
-        int x1 = sx / count;
-        int y1 = sy / count;
-        s0 = s0 / count;
-        s1 = s1 / count;
-        s2 = s2 / count;
-
-        bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
-                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
-
-        //revise the pointer corresponding to the new (y0,x0)
-        revx = x1 - x0;
-        revy = y1 - y0;
-
-        x0 = x1;
-        y0 = y1;
-        c0 = s0;
-        c1 = s1;
-        c2 = s2;
-
-        if( stopFlag )
-            break;
-    } //for iter
-
-    dptr[0] = (uchar)c0;
-    dptr[1] = (uchar)c1;
-    dptr[2] = (uchar)c2;
-    dptr[3] = (uchar)c3;
-
-    COOR coor;
-    coor.x = (short)x0;
-    coor.y = (short)y0;
-    return coor;
-}
-
-void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, TermCriteria crit)
-{
-    if( src_roi.empty() )
-        CV_Error( CV_StsBadArg, "The input image is empty" );
-
-    if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
-        CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
-
-    CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
-    CV_Assert( !(dst_roi.step & 0x3) );
-
-    if( !(crit.type & TermCriteria::MAX_ITER) )
-        crit.maxCount = 5;
-    int maxIter = std::min(std::max(crit.maxCount, 1), 100);
-    float eps;
-    if( !(crit.type & TermCriteria::EPS) )
-        eps = 1.f;
-    eps = (float)std::max(crit.epsilon, 0.0);
-
-    int tab[512];
-    for(int i = 0; i < 512; i++)
-        tab[i] = (i - 255) * (i - 255);
-    uchar *sptr = src_roi.data;
-    uchar *dptr = dst_roi.data;
-    int sstep = (int)src_roi.step;
-    int dstep = (int)dst_roi.step;
-    Size size = src_roi.size();
-
-    for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
-            dptr += dstep - (size.width << 2))
-    {
-        for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4)
-        {
-            do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
-        }
-    }
-}
-
-void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, TermCriteria crit)
-{
-    if( src_roi.empty() )
-        CV_Error( CV_StsBadArg, "The input image is empty" );
-    if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
-        CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
-    CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
-               (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
-    CV_Assert( !(dstCoor_roi.step & 0x3) );
-
-    if( !(crit.type & TermCriteria::MAX_ITER) )
-        crit.maxCount = 5;
-    int maxIter = std::min(std::max(crit.maxCount, 1), 100);
-    float eps;
-    if( !(crit.type & TermCriteria::EPS) )
-        eps = 1.f;
-    eps = (float)std::max(crit.epsilon, 0.0);
-
-    int tab[512];
-    for(int i = 0; i < 512; i++)
-        tab[i] = (i - 255) * (i - 255);
-    uchar *sptr = src_roi.data;
-    uchar *dptr = dst_roi.data;
-    short *dCoorptr = (short *)dstCoor_roi.data;
-    int sstep = (int)src_roi.step;
-    int dstep = (int)dst_roi.step;
-    int dCoorstep = (int)dstCoor_roi.step >> 1;
-    Size size = src_roi.size();
-
-    for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
-            dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1))
-    {
-        for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2)
-        {
-            *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
-        }
-    }
-
-}
-
-//////////////////////////////// meanShift //////////////////////////////////////////
-
-PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, TermCriteria, bool)
-{
-    int type, typeCoor;
-    int sp, sr;
-    TermCriteria crit;
-    bool useRoi;
-
-    // src mat
-    Mat src, src_roi;
-    Mat dst, dst_roi;
-    Mat dstCoor, dstCoor_roi;
-
-    // ocl dst mat
-    ocl::oclMat gsrc, gsrc_roi;
-    ocl::oclMat gdst, gdst_roi;
-    ocl::oclMat gdstCoor, gdstCoor_roi;
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        typeCoor = GET_PARAM(1);
-        sp = GET_PARAM(2);
-        sr = GET_PARAM(3);
-        crit = GET_PARAM(4);
-        useRoi = GET_PARAM(5);
-    }
-
-    void random_roi()
-    {
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
-        generateOclMat(gsrc, gsrc_roi, src, roiSize, srcBorder);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 256);
-        generateOclMat(gdst, gdst_roi, dst, roiSize, dstBorder);
-
-        randomSubMat(dstCoor, dstCoor_roi, roiSize, dstBorder, typeCoor, 5, 256);
-        generateOclMat(gdstCoor, gdstCoor_roi, dstCoor, roiSize, dstBorder);
-    }
-
-    void Near(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gdst.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-
-    void Near1(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gdstCoor.download(whole);
-        gdstCoor_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dstCoor, whole, threshold);
-        EXPECT_MAT_NEAR(dstCoor_roi, roi, threshold);
-    }
-};
-
-/////////////////////////meanShiftFiltering/////////////////////////////
-
-typedef meanShiftTestBase meanShiftFiltering;
-
-OCL_TEST_P(meanShiftFiltering, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        meanShiftFiltering_(src_roi, dst_roi, sp, sr, crit);
-        ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
-
-        Near();
-    }
-}
-
-///////////////////////////meanShiftProc//////////////////////////////////
-
-typedef meanShiftTestBase meanShiftProc;
-
-OCL_TEST_P(meanShiftProc, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        meanShiftProc_(src_roi, dst_roi, dstCoor_roi, sp, sr, crit);
-        ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
-
-        Near();
-        Near1();
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-
-INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine(
-                            Values((MatType)CV_8UC4),
-                            Values((MatType)CV_16SC2),
-                            Values(5),
-                            Values(6),
-                            Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)),
-                            Bool()
-                        ));
-
-INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
-                            Values((MatType)CV_8UC4),
-                            Values((MatType)CV_16SC2),
-                            Values(5),
-                            Values(6),
-                            Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)),
-                            Bool()
-                        ));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_ml.cpp b/modules/ocl/test/test_ml.cpp
deleted file mode 100644
index 00f9fa9..0000000
--- a/modules/ocl/test/test_ml.cpp
+++ /dev/null
@@ -1,309 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma,        jin@multicorewareinc.com
-//    Xiaopeng Fu,   fuxiaopeng2222@163.com
-//    Erping Pang,   pang_er_ping@163.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
-
-///////K-NEAREST NEIGHBOR//////////////////////////
-
-static void genTrainData(cv::RNG& rng, Mat& trainData, int trainDataRow, int trainDataCol,
-                         Mat& trainLabel = Mat().setTo(Scalar::all(0)), int nClasses = 0)
-{
-    cv::Size size(trainDataCol, trainDataRow);
-    trainData = randomMat(rng, size, CV_32FC1, 1.0, 1000.0, false);
-    if(nClasses != 0)
-    {
-        cv::Size size1(trainDataRow, 1);
-        trainLabel = randomMat(rng, size1, CV_8UC1, 0, nClasses - 1, false);
-        trainLabel.convertTo(trainLabel, CV_32FC1);
-    }
-}
-
-PARAM_TEST_CASE(KNN, int, Size, int, bool)
-{
-    int k;
-    int trainDataCol;
-    int testDataRow;
-    int nClass;
-    bool regression;
-    virtual void SetUp()
-    {
-        k = GET_PARAM(0);
-        nClass = GET_PARAM(2);
-        trainDataCol = GET_PARAM(1).width;
-        testDataRow = GET_PARAM(1).height;
-        regression = GET_PARAM(3);
-    }
-};
-
-OCL_TEST_P(KNN, Accuracy)
-{
-    Mat trainData, trainLabels;
-    const int trainDataRow = 500;
-    genTrainData(rng, trainData, trainDataRow, trainDataCol, trainLabels, nClass);
-
-    Mat testData, testLabels;
-    genTrainData(rng, testData, testDataRow, trainDataCol);
-
-    KNearestNeighbour knn_ocl;
-    CvKNearest knn_cpu;
-    Mat best_label_cpu;
-    oclMat best_label_ocl;
-
-    /*ocl k-Nearest_Neighbor start*/
-    oclMat trainData_ocl;
-    trainData_ocl.upload(trainData);
-    Mat simpleIdx;
-    knn_ocl.train(trainData, trainLabels, simpleIdx, regression);
-
-    oclMat testdata;
-    testdata.upload(testData);
-    knn_ocl.find_nearest(testdata, k, best_label_ocl);
-    /*ocl k-Nearest_Neighbor end*/
-
-    /*cpu k-Nearest_Neighbor start*/
-    knn_cpu.train(trainData, trainLabels, simpleIdx, regression);
-    knn_cpu.find_nearest(testData, k, &best_label_cpu);
-    /*cpu k-Nearest_Neighbor end*/
-    if(regression)
-    {
-        EXPECT_MAT_SIMILAR(Mat(best_label_ocl), best_label_cpu, 1e-5);
-    }
-    else
-    {
-        EXPECT_MAT_NEAR(Mat(best_label_ocl), best_label_cpu, 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ML, KNN, Combine(Values(6, 5), Values(Size(200, 400), Size(300, 600)),
-    Values(4, 3), Values(false, true)));
-
-////////////////////////////////SVM/////////////////////////////////////////////////
-
-PARAM_TEST_CASE(SVM_OCL, int, int, int)
-{
-    cv::Size size;
-    int kernel_type;
-    int svm_type;
-    Mat src, labels, samples, labels_predict;
-    int K;
-
-    virtual void SetUp()
-    {
-
-        kernel_type = GET_PARAM(0);
-        svm_type = GET_PARAM(1);
-        K = GET_PARAM(2);
-        cv::Size size = cv::Size(MWIDTH, MHEIGHT);
-        src.create(size, CV_32FC1);
-        labels.create(1, size.height, CV_32SC1);
-        int row_idx = 0;
-        const int max_number = size.height / K - 1;
-        CV_Assert(K <= size.height);
-        for(int i = 0; i < K; i++ )
-        {
-            Mat center_row_header = src.row(row_idx);
-            center_row_header.setTo(0);
-            int nchannel = center_row_header.channels();
-            for(int j = 0; j < nchannel; j++)
-            {
-                center_row_header.at<float>(0, i * nchannel + j) = 500.0;
-            }
-            labels.at<int>(0, row_idx) = i;
-            for(int j = 0; (j < max_number) ||
-                    (i == K - 1 && j < max_number + size.height % K); j ++)
-            {
-                Mat cur_row_header = src.row(row_idx + 1 + j);
-                center_row_header.copyTo(cur_row_header);
-                Mat tmpmat = randomMat(cur_row_header.size(), cur_row_header.type(), 1, 100, false);
-                cur_row_header += tmpmat;
-                labels.at<int>(0, row_idx + 1 + j) = i;
-            }
-            row_idx += 1 + max_number;
-        }
-        labels.convertTo(labels, CV_32FC1);
-        cv::Size test_size = cv::Size(MWIDTH, 100);
-        samples.create(test_size, CV_32FC1);
-        labels_predict.create(1, test_size.height, CV_32SC1);
-        const int max_number_test = test_size.height / K - 1;
-        row_idx = 0;
-        for(int i = 0; i < K; i++ )
-        {
-            Mat center_row_header = samples.row(row_idx);
-            center_row_header.setTo(0);
-            int nchannel = center_row_header.channels();
-            for(int j = 0; j < nchannel; j++)
-            {
-                center_row_header.at<float>(0, i * nchannel + j) = 500.0;
-            }
-            labels_predict.at<int>(0, row_idx) = i;
-            for(int j = 0; (j < max_number_test) ||
-                    (i == K - 1 && j < max_number_test + test_size.height % K); j ++)
-            {
-                Mat cur_row_header = samples.row(row_idx + 1 + j);
-                center_row_header.copyTo(cur_row_header);
-                Mat tmpmat = randomMat(cur_row_header.size(), cur_row_header.type(), 1, 100, false);
-                cur_row_header += tmpmat;
-                labels_predict.at<int>(0, row_idx + 1 + j) = i;
-            }
-            row_idx += 1 + max_number_test;
-        }
-        labels_predict.convertTo(labels_predict, CV_32FC1);
-    }
-};
-
-OCL_TEST_P(SVM_OCL, Accuracy)
-{
-    CvSVMParams params;
-    params.degree = 0.4;
-    params.gamma = 1;
-    params.coef0 = 1;
-    params.C = 1;
-    params.nu = 0.5;
-    params.p = 1;
-    params.svm_type = svm_type;
-    params.kernel_type = kernel_type;
-
-    params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 1000, 0.001);
-
-    CvSVM SVM;
-    SVM.train(src, labels, Mat(), Mat(), params);
-
-    cv::ocl::CvSVM_OCL SVM_OCL;
-    SVM_OCL.train(src, labels, Mat(), Mat(), params);
-
-    int c = SVM.get_support_vector_count();
-    int c1 = SVM_OCL.get_support_vector_count();
-
-    Mat sv(c, MHEIGHT, CV_32FC1);
-    Mat sv_ocl(c1, MHEIGHT, CV_32FC1);
-    for(int i = 0; i < c; i++)
-    {
-        const float* v = SVM.get_support_vector(i);
-
-        for(int j = 0; j < MHEIGHT; j++)
-        {
-            sv.at<float>(i, j) = v[j];
-        }
-    }
-    for(int i = 0; i < c1; i++)
-    {
-        const float* v_ocl = SVM_OCL.get_support_vector(i);
-
-        for(int j = 0; j < MHEIGHT; j++)
-        {
-            sv_ocl.at<float>(i, j) = v_ocl[j];
-        }
-    }
-    cv::BFMatcher matcher(cv::NORM_L2);
-    std::vector<cv::DMatch> matches;
-    matcher.match(sv, sv_ocl, matches);
-    int count = 0;
-
-    for(std::vector<cv::DMatch>::iterator itr = matches.begin(); itr != matches.end(); itr++)
-    {
-        if((*itr).distance < 0.1)
-        {
-            count ++;
-        }
-    }
-    if(c != 0)
-    {
-        float matchedRatio = (float)count / c;
-        EXPECT_GT(matchedRatio, 0.95);
-    }
-    if(c != 0)
-    {
-        CvMat *result = cvCreateMat(1, samples.rows, CV_32FC1);
-        CvMat test_samples = samples;
-
-        CvMat *result_ocl = cvCreateMat(1, samples.rows, CV_32FC1);
-
-        SVM.predict(&test_samples, result);
-
-        SVM_OCL.predict(&test_samples, result_ocl);
-
-        int true_resp = 0, true_resp_ocl = 0;
-        for (int i = 0; i < samples.rows; i++)
-        {
-            if (result->data.fl[i] == labels_predict.at<float>(0, i))
-            {
-                true_resp++;
-            }
-        }
-        float matchedRatio = (float)true_resp / samples.rows;
-
-        for (int i = 0; i < samples.rows; i++)
-        {
-            if (result_ocl->data.fl[i] == labels_predict.at<float>(0, i))
-            {
-                true_resp_ocl++;
-            }
-        }
-        float matchedRatio_ocl = (float)true_resp_ocl / samples.rows;
-
-        if(matchedRatio != 0 && true_resp_ocl < true_resp)
-        {
-            EXPECT_NEAR(matchedRatio_ocl, matchedRatio, 0.03);
-        }
-    }
-}
-
-// TODO FIXIT: CvSVM::EPS_SVR case is crashed inside CPU implementation
-// Anonymous enums are not supported well so cast them to 'int'
-
-INSTANTIATE_TEST_CASE_P(OCL_ML, SVM_OCL, testing::Combine(
-                            Values((int)CvSVM::LINEAR, (int)CvSVM::POLY, (int)CvSVM::RBF, (int)CvSVM::SIGMOID),
-                            Values((int)CvSVM::C_SVC, (int)CvSVM::NU_SVC, (int)CvSVM::ONE_CLASS, (int)CvSVM::NU_SVR),
-                            Values(2, 3, 4)
-                        ));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_moments.cpp b/modules/ocl/test/test_moments.cpp
deleted file mode 100644
index e978bb2..0000000
--- a/modules/ocl/test/test_moments.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-#include "test_precomp.hpp"
-#include <iomanip>
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
-PARAM_TEST_CASE(MomentsTest, MatType, bool, bool)
-{
-    int type;
-    cv::Mat mat;
-    bool test_contours;
-    bool binaryImage;
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        test_contours = GET_PARAM(1);
-        cv::Size size(10 * MWIDTH, 10 * MHEIGHT);
-        mat = randomMat(size, type, 0, 256, false);
-        binaryImage = GET_PARAM(2);
-    }
-
-    void Compare(Moments& cpu_moments, Moments& gpu_moments)
-    {
-        Mat gpu_dst, cpu_dst;
-        HuMoments(cpu_moments, cpu_dst);
-        HuMoments(gpu_moments, gpu_dst);
-        EXPECT_MAT_NEAR(gpu_dst, cpu_dst, 1e-3);
-    }
-};
-
-OCL_TEST_P(MomentsTest, Mat)
-{
-    oclMat src_d(mat);
-    for(int j = 0; j < LOOP_TIMES; j++)
-    {
-        if(test_contours)
-        {
-            Mat src = readImage( "cv/shared/pic3.png", IMREAD_GRAYSCALE );
-            ASSERT_FALSE(src.empty());
-            Mat canny_output;
-            vector<vector<Point> > contours;
-            vector<Vec4i> hierarchy;
-            Canny( src, canny_output, 100, 200, 3 );
-            findContours( canny_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0) );
-            for( size_t i = 0; i < contours.size(); i++ )
-            {
-                Moments m = moments( contours[i], false );
-                Moments dm = ocl::ocl_moments( contours[i]);
-                Compare(m, dm);
-            }
-        }
-        cv::Moments CvMom = cv::moments(mat, binaryImage);
-        cv::Moments oclMom = cv::ocl::ocl_moments(src_d, binaryImage);
-
-        Compare(CvMom, oclMom);
-    }
-}
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MomentsTest, Combine(
-    Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1, CV_64FC1), Values(false, true), Values(false, true)));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_objdetect.cpp b/modules/ocl/test/test_objdetect.cpp
deleted file mode 100644
index f179e9e..0000000
--- a/modules/ocl/test/test_objdetect.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//		Yao Wang, bitwangyaoyao@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include "opencv2/objdetect.hpp"
-
-using namespace cv;
-using namespace testing;
-
-///////////////////// HOG /////////////////////////////
-PARAM_TEST_CASE(HOG, Size, int)
-{
-    Size winSize;
-    int type;
-    Mat img_rgb;
-    virtual void SetUp()
-    {
-        winSize = GET_PARAM(0);
-        type = GET_PARAM(1);
-        img_rgb = readImage("gpu/hog/road.png");
-        ASSERT_FALSE(img_rgb.empty());
-    }
-};
-
-OCL_TEST_P(HOG, GetDescriptors)
-{
-    // Convert image
-    Mat img;
-    switch (type)
-    {
-    case CV_8UC1:
-        cvtColor(img_rgb, img, COLOR_BGR2GRAY);
-        break;
-    case CV_8UC4:
-    default:
-        cvtColor(img_rgb, img, COLOR_BGR2BGRA);
-        break;
-    }
-    ocl::oclMat d_img(img);
-
-    // HOGs
-    ocl::HOGDescriptor ocl_hog;
-    ocl_hog.gamma_correction = true;
-    HOGDescriptor hog;
-    hog.gammaCorrection = true;
-
-    // Compute descriptor
-    ocl::oclMat d_descriptors;
-    ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
-    Mat down_descriptors;
-    d_descriptors.download(down_descriptors);
-    down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
-
-    hog.setSVMDetector(hog.getDefaultPeopleDetector());
-    std::vector<float> descriptors;
-    switch (type)
-    {
-    case CV_8UC1:
-        hog.compute(img, descriptors, ocl_hog.win_size);
-        break;
-    case CV_8UC4:
-    default:
-        hog.compute(img_rgb, descriptors, ocl_hog.win_size);
-        break;
-    }
-    Mat cpu_descriptors(descriptors);
-
-    EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2);
-}
-
-OCL_TEST_P(HOG, Detect)
-{
-    // Convert image
-    Mat img;
-    switch (type)
-    {
-    case CV_8UC1:
-        cvtColor(img_rgb, img, COLOR_BGR2GRAY);
-        break;
-    case CV_8UC4:
-    default:
-        cvtColor(img_rgb, img, COLOR_BGR2BGRA);
-        break;
-    }
-    ocl::oclMat d_img(img);
-
-    // HOGs
-    if ((winSize != Size(48, 96)) && (winSize != Size(64, 128)))
-        winSize = Size(64, 128);
-    ocl::HOGDescriptor ocl_hog(winSize);
-    ocl_hog.gamma_correction = true;
-
-    HOGDescriptor hog;
-    hog.winSize = winSize;
-    hog.gammaCorrection = true;
-
-    if (winSize.width == 48 && winSize.height == 96)
-    {
-        // daimler's base
-        ocl_hog.setSVMDetector(hog.getDaimlerPeopleDetector());
-        hog.setSVMDetector(hog.getDaimlerPeopleDetector());
-    }
-    else if (winSize.width == 64 && winSize.height == 128)
-    {
-        ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector());
-        hog.setSVMDetector(hog.getDefaultPeopleDetector());
-    }
-    else
-    {
-        ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector());
-        hog.setSVMDetector(hog.getDefaultPeopleDetector());
-    }
-
-    // OpenCL detection
-    std::vector<Rect> d_found;
-    ocl_hog.detectMultiScale(d_img, d_found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
-
-    // CPU detection
-    std::vector<Rect> found;
-    switch (type)
-    {
-    case CV_8UC1:
-        hog.detectMultiScale(img, found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
-        break;
-    case CV_8UC4:
-    default:
-        hog.detectMultiScale(img_rgb, found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
-        break;
-    }
-
-    EXPECT_LT(checkRectSimilarity(img.size(), found, d_found), 1.0);
-}
-
-
-INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
-                            testing::Values(Size(64, 128), Size(48, 96)),
-                            testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
-
-
-///////////////////////////// Haar //////////////////////////////
-IMPLEMENT_PARAM_CLASS(CascadeName, std::string)
-CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
-CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
-
-PARAM_TEST_CASE(Haar, int, CascadeName)
-{
-    ocl::OclCascadeClassifier cascade, nestedCascade;
-    CascadeClassifier cpucascade, cpunestedCascade;
-
-    int flags;
-    std::string cascadeName;
-    std::vector<Rect> faces, oclfaces;
-    Mat img;
-    ocl::oclMat d_img;
-
-    virtual void SetUp()
-    {
-        flags = GET_PARAM(0);
-        cascadeName = (std::string(cvtest::TS::ptr()->get_data_path()) + "cv/cascadeandhog/cascades/").append(GET_PARAM(1));
-        ASSERT_TRUE(cascade.load( cascadeName ));
-        ASSERT_TRUE(cpucascade.load(cascadeName));
-        img = readImage("cv/shared/lena.png", IMREAD_GRAYSCALE);
-        ASSERT_FALSE(img.empty());
-        equalizeHist(img, img);
-        d_img.upload(img);
-    }
-};
-
-OCL_TEST_P(Haar, FaceDetect)
-{
-    cascade.detectMultiScale(d_img, oclfaces,  1.1, 3,
-                                flags,
-                                Size(30, 30), Size(0, 0));
-
-    cpucascade.detectMultiScale(img, faces,  1.1, 3,
-                                flags,
-                                Size(30, 30), Size(0, 0));
-
-    EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar,
-    Combine(Values((int)CASCADE_SCALE_IMAGE, 0),
-            Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp
deleted file mode 100644
index 7296a6b..0000000
--- a/modules/ocl/test/test_optflow.cpp
+++ /dev/null
@@ -1,341 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include <iomanip>
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-
-//////////////////////////////////////////////////////
-// GoodFeaturesToTrack
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(MinDistance, double)
-}
-PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance)
-{
-    double minDistance;
-
-    virtual void SetUp()
-    {
-        minDistance = GET_PARAM(0);
-    }
-};
-
-OCL_TEST_P(GoodFeaturesToTrack, Accuracy)
-{
-    cv::Mat frame = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame.empty());
-
-    int maxCorners = 1000;
-    double qualityLevel = 0.01;
-
-    cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
-
-    cv::ocl::oclMat d_pts;
-    detector(oclMat(frame), d_pts);
-
-    ASSERT_FALSE(d_pts.empty());
-
-    std::vector<cv::Point2f> pts(d_pts.cols);
-
-    detector.downloadPoints(d_pts, pts);
-
-    std::vector<cv::Point2f> pts_gold;
-    cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance);
-
-    ASSERT_EQ(pts_gold.size(), pts.size());
-
-    size_t mistmatch = 0;
-    for (size_t i = 0; i < pts.size(); ++i)
-    {
-        cv::Point2i a = pts_gold[i];
-        cv::Point2i b = pts[i];
-
-        bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
-
-        if (!eq)
-            ++mistmatch;
-    }
-
-    double bad_ratio = static_cast<double>(mistmatch) / pts.size();
-
-    ASSERT_LE(bad_ratio, 0.01);
-}
-
-OCL_TEST_P(GoodFeaturesToTrack, EmptyCorners)
-{
-    int maxCorners = 1000;
-    double qualityLevel = 0.01;
-
-    cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
-
-    cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
-    cv::ocl::oclMat corners(1, maxCorners, CV_32FC2);
-
-    detector(src, corners);
-
-    ASSERT_TRUE(corners.empty());
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
-    testing::Values(MinDistance(0.0), MinDistance(3.0)));
-
-//////////////////////////////////////////////////////////////////////////
-PARAM_TEST_CASE(TVL1, bool)
-{
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        useRoi = GET_PARAM(0);
-    }
-
-};
-
-OCL_TEST_P(TVL1, DISABLED_Accuracy) // TODO implementations of TV1 in video module are different in 2.4 and master branches
-{
-    cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame0.empty());
-
-    cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame1.empty());
-
-    cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
-    cv::Mat flowx = randomMat(frame0.size(), CV_32FC1, 0, 0, useRoi);
-    cv::Mat flowy = randomMat(frame0.size(), CV_32FC1, 0, 0, useRoi);
-    cv::ocl::oclMat d_flowx(flowx), d_flowy(flowy);
-    d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
-
-    cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
-    cv::Mat flow;
-    alg->calc(frame0, frame1, flow);
-    cv::Mat gold[2];
-    cv::split(flow, gold);
-
-    EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3);
-    EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3);
-}
-INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(false, true));
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// PyrLKOpticalFlow
-
-PARAM_TEST_CASE(Sparse, bool, bool)
-{
-    bool useGray;
-    bool UseSmart;
-
-    virtual void SetUp()
-    {
-        UseSmart = GET_PARAM(0);
-        useGray = GET_PARAM(1);
-    }
-};
-
-OCL_TEST_P(Sparse, Mat)
-{
-    cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
-    ASSERT_FALSE(frame0.empty());
-
-    cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
-    ASSERT_FALSE(frame1.empty());
-
-    cv::Mat gray_frame;
-    if (useGray)
-        gray_frame = frame0;
-    else
-        cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY);
-
-    std::vector<cv::Point2f> pts;
-    cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0);
-
-    cv::ocl::oclMat d_pts;
-    cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]);
-    d_pts.upload(pts_mat);
-
-    cv::ocl::PyrLKOpticalFlow pyrLK;
-
-    cv::ocl::oclMat oclFrame0;
-    cv::ocl::oclMat oclFrame1;
-    cv::ocl::oclMat d_nextPts;
-    cv::ocl::oclMat d_status;
-    cv::ocl::oclMat d_err;
-
-    oclFrame0 = frame0;
-    oclFrame1 = frame1;
-
-    pyrLK.sparse(oclFrame0, oclFrame1, d_pts, d_nextPts, d_status, &d_err);
-
-    std::vector<cv::Point2f> nextPts(d_nextPts.cols);
-    cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void *)&nextPts[0]);
-    d_nextPts.download(nextPts_mat);
-
-    std::vector<unsigned char> status(d_status.cols);
-    cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void *)&status[0]);
-    d_status.download(status_mat);
-
-    std::vector<float> err(d_err.cols);
-    cv::Mat err_mat(1, d_err.cols, CV_32FC1, (void*)&err[0]);
-    d_err.download(err_mat);
-
-    std::vector<cv::Point2f> nextPts_gold;
-    std::vector<unsigned char> status_gold;
-    std::vector<float> err_gold;
-    cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts_gold, status_gold, err_gold);
-
-    ASSERT_EQ(nextPts_gold.size(), nextPts.size());
-    ASSERT_EQ(status_gold.size(), status.size());
-
-    size_t mistmatch = 0;
-    for (size_t i = 0; i < nextPts.size(); ++i)
-    {
-        if (status[i] != status_gold[i])
-        {
-            ++mistmatch;
-            continue;
-        }
-
-        if (status[i])
-        {
-            cv::Point2i a = nextPts[i];
-            cv::Point2i b = nextPts_gold[i];
-
-            bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
-            float errdiff = 0.0f;
-
-            if (!eq || errdiff > 1e-1)
-                ++mistmatch;
-        }
-    }
-
-    double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size());
-
-    ASSERT_LE(bad_ratio, 0.02f);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_Video, Sparse, Combine(Bool(), Bool()));
-
-//////////////////////////////////////////////////////
-// FarnebackOpticalFlow
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(PyrScale, double)
-        IMPLEMENT_PARAM_CLASS(PolyN, int)
-        CV_FLAGS(FarnebackOptFlowFlags, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
-        IMPLEMENT_PARAM_CLASS(UseInitFlow, bool)
-}
-
-PARAM_TEST_CASE(Farneback, PyrScale, PolyN, FarnebackOptFlowFlags, UseInitFlow)
-{
-    double pyrScale;
-    int polyN;
-    int flags;
-    bool useInitFlow;
-
-    virtual void SetUp()
-    {
-        pyrScale = GET_PARAM(0);
-        polyN = GET_PARAM(1);
-        flags = GET_PARAM(2);
-        useInitFlow = GET_PARAM(3);
-    }
-};
-
-OCL_TEST_P(Farneback, Accuracy)
-{
-    cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame0.empty());
-
-    cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame1.empty());
-
-    double polySigma = polyN <= 5 ? 1.1 : 1.5;
-
-    cv::ocl::FarnebackOpticalFlow farn;
-    farn.pyrScale = pyrScale;
-    farn.polyN = polyN;
-    farn.polySigma = polySigma;
-    farn.flags = flags;
-
-    cv::ocl::oclMat d_flowx, d_flowy;
-    farn(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
-
-    cv::Mat flow;
-    if (useInitFlow)
-    {
-        cv::Mat flowxy[] = {cv::Mat(d_flowx), cv::Mat(d_flowy)};
-        cv::merge(flowxy, 2, flow);
-
-        farn.flags |= cv::OPTFLOW_USE_INITIAL_FLOW;
-        farn(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
-    }
-
-    cv::calcOpticalFlowFarneback(
-        frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
-        farn.numIters, farn.polyN, farn.polySigma, farn.flags);
-
-    std::vector<cv::Mat> flowxy;
-    cv::split(flow, flowxy);
-
-    EXPECT_MAT_SIMILAR(flowxy[0], d_flowx, 0.1);
-    EXPECT_MAT_SIMILAR(flowxy[1], d_flowy, 0.1);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_Video, Farneback, testing::Combine(
-    testing::Values(PyrScale(0.3), PyrScale(0.5), PyrScale(0.8)),
-    testing::Values(PolyN(5), PolyN(7)),
-    testing::Values(FarnebackOptFlowFlags(0), FarnebackOptFlowFlags(cv::OPTFLOW_FARNEBACK_GAUSSIAN)),
-    testing::Values(UseInitFlow(false), UseInitFlow(true))));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_orb.cpp b/modules/ocl/test/test_orb.cpp
deleted file mode 100644
index 8df7e48..0000000
--- a/modules/ocl/test/test_orb.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-// Authors:
-//  * Peter Andreas Entschev, peter@entschev.com
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-////////////////////////////////////////////////////////
-// ORB
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(ORB_FeaturesCount, int)
-    IMPLEMENT_PARAM_CLASS(ORB_ScaleFactor, float)
-    IMPLEMENT_PARAM_CLASS(ORB_LevelsCount, int)
-    IMPLEMENT_PARAM_CLASS(ORB_EdgeThreshold, int)
-    IMPLEMENT_PARAM_CLASS(ORB_firstLevel, int)
-    IMPLEMENT_PARAM_CLASS(ORB_WTA_K, int)
-    IMPLEMENT_PARAM_CLASS(ORB_PatchSize, int)
-    IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool)
-}
-
-CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE)
-
-PARAM_TEST_CASE(ORB, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold,
-                ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
-{
-    int nFeatures;
-    float scaleFactor;
-    int nLevels;
-    int edgeThreshold;
-    int firstLevel;
-    int WTA_K;
-    int scoreType;
-    int patchSize;
-    bool blurForDescriptor;
-
-    virtual void SetUp()
-    {
-        nFeatures = GET_PARAM(0);
-        scaleFactor = GET_PARAM(1);
-        nLevels = GET_PARAM(2);
-        edgeThreshold = GET_PARAM(3);
-        firstLevel = GET_PARAM(4);
-        WTA_K = GET_PARAM(5);
-        scoreType = GET_PARAM(6);
-        patchSize = GET_PARAM(7);
-        blurForDescriptor = GET_PARAM(8);
-    }
-};
-
-OCL_TEST_P(ORB, Accuracy)
-{
-    cv::Mat image = readImage("gpu/perf/aloe.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image.empty());
-
-    cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
-    mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
-
-    cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);
-    cv::ocl::oclMat ocl_mask = cv::ocl::oclMat(mask);
-
-    cv::ocl::ORB_OCL orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
-    orb.blurForDescriptor = blurForDescriptor;
-
-    std::vector<cv::KeyPoint> keypoints;
-    cv::ocl::oclMat descriptors;
-    orb(ocl_image, ocl_mask, keypoints, descriptors);
-
-    cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
-
-    std::vector<cv::KeyPoint> keypoints_gold;
-    cv::Mat descriptors_gold;
-    orb_gold(image, mask, keypoints_gold, descriptors_gold);
-
-    cv::BFMatcher matcher(cv::NORM_HAMMING);
-    std::vector<cv::DMatch> matches;
-    matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
-
-    int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches);
-    double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
-
-    EXPECT_GT(matchedRatio, 0.35);
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_Features2D, ORB,  testing::Combine(
-                        testing::Values(ORB_FeaturesCount(1000)),
-                        testing::Values(ORB_ScaleFactor(1.2f)),
-                        testing::Values(ORB_LevelsCount(4), ORB_LevelsCount(8)),
-                        testing::Values(ORB_EdgeThreshold(31)),
-                        testing::Values(ORB_firstLevel(0), ORB_firstLevel(2)),
-                        testing::Values(ORB_WTA_K(2), ORB_WTA_K(3), ORB_WTA_K(4)),
-                        testing::Values(ORB_ScoreType(cv::ORB::HARRIS_SCORE)),
-                        testing::Values(ORB_PatchSize(31), ORB_PatchSize(29)),
-                        testing::Values(ORB_BlurForDescriptor(false), ORB_BlurForDescriptor(true))));
-
-#endif
diff --git a/modules/ocl/test/test_precomp.hpp b/modules/ocl/test/test_precomp.hpp
deleted file mode 100644
index f1887db..0000000
--- a/modules/ocl/test/test_precomp.hpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-declarations"
-#  if defined __clang__ || defined __APPLE__
-#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
-#    pragma GCC diagnostic ignored "-Wextra"
-#  endif
-#endif
-
-#ifndef __OPENCV_TEST_PRECOMP_HPP__
-#define __OPENCV_TEST_PRECOMP_HPP__
-
-#define CV_BUILD_OCL_MODULE
-
-#include <cmath>
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <limits>
-#include <algorithm>
-#include <iterator>
-#include <string>
-#include <cstdarg>
-#include "opencv2/ts.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/features2d.hpp"
-#include "opencv2/video.hpp"
-#include "opencv2/ocl.hpp"
-
-#include "utility.hpp"
-
-#include "opencv2/core/private.hpp"
-
-using namespace cvtest;
-
-#endif
diff --git a/modules/ocl/test/test_pyramids.cpp b/modules/ocl/test/test_pyramids.cpp
deleted file mode 100644
index 2d861b6..0000000
--- a/modules/ocl/test/test_pyramids.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Yao Wang yao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-#include "test_precomp.hpp"
-#include <iomanip>
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace testing;
-using namespace std;
-
-PARAM_TEST_CASE(PyrBase, MatDepth, Channels)
-{
-    int depth;
-    int channels;
-
-    Mat dst_cpu;
-    ocl::oclMat gdst;
-
-    virtual void SetUp()
-    {
-        depth = GET_PARAM(0);
-        channels = GET_PARAM(1);
-    }
-};
-
-/////////////////////// PyrDown //////////////////////////
-
-typedef PyrBase PyrDown;
-
-OCL_TEST_P(PyrDown, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        Size size(MWIDTH, MHEIGHT);
-        Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255);
-        ocl::oclMat gsrc(src);
-
-        pyrDown(src, dst_cpu);
-        ocl::pyrDown(gsrc, gdst);
-
-        EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), depth == CV_32F ? 1e-4f : 1.0f);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine(
-                            Values(CV_8U, CV_16U, CV_16S, CV_32F),
-                            Values(1, 3, 4)));
-
-/////////////////////// PyrUp //////////////////////////
-
-typedef PyrBase PyrUp;
-
-OCL_TEST_P(PyrUp, Accuracy)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        Size size(MWIDTH, MHEIGHT);
-        Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255);
-        ocl::oclMat gsrc(src);
-
-        pyrUp(src, dst_cpu);
-        ocl::pyrUp(gsrc, gdst);
-
-        EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (depth == CV_32F ? 1e-4f : 1.0));
-    }
-}
-
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, Combine(
-                            Values(CV_8U, CV_16U, CV_16S, CV_32F),
-                            Values(1, 3, 4)));
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_sort.cpp b/modules/ocl/test/test_sort.cpp
deleted file mode 100644
index b259149..0000000
--- a/modules/ocl/test/test_sort.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include <map>
-#include <functional>
-#include "test_precomp.hpp"
-
-using namespace std;
-using namespace cvtest;
-using namespace testing;
-using namespace cv;
-
-
-namespace
-{
-IMPLEMENT_PARAM_CLASS(IsGreaterThan, bool)
-IMPLEMENT_PARAM_CLASS(InputSize, int)
-IMPLEMENT_PARAM_CLASS(SortMethod, int)
-
-
-template<class T>
-struct KV_CVTYPE{ static int toType() {return 0;} };
-
-template<> struct KV_CVTYPE<int>  { static int toType() {return CV_32SC1;} };
-template<> struct KV_CVTYPE<float>{ static int toType() {return CV_32FC1;} };
-template<> struct KV_CVTYPE<Vec2i>{ static int toType() {return CV_32SC2;} };
-template<> struct KV_CVTYPE<Vec2f>{ static int toType() {return CV_32FC2;} };
-
-template<class key_type, class val_type>
-bool kvgreater(pair<key_type, val_type> p1, pair<key_type, val_type> p2)
-{
-    return p1.first > p2.first;
-}
-
-template<class key_type, class val_type>
-bool kvless(pair<key_type, val_type> p1, pair<key_type, val_type> p2)
-{
-    return p1.first < p2.first;
-}
-
-template<class key_type, class val_type>
-void toKVPair(
-    MatConstIterator_<key_type> kit,
-    MatConstIterator_<val_type> vit,
-    int vecSize,
-    vector<pair<key_type, val_type> >& kvres
-    )
-{
-    kvres.clear();
-    for(int i = 0; i < vecSize; i ++)
-    {
-        kvres.push_back(make_pair(*kit, *vit));
-        ++kit;
-        ++vit;
-    }
-}
-
-template<class key_type, class val_type>
-void kvquicksort(Mat& keys, Mat& vals, bool isGreater = false)
-{
-    vector<pair<key_type, val_type> > kvres;
-    toKVPair(keys.begin<key_type>(), vals.begin<val_type>(), keys.cols, kvres);
-
-    if(isGreater)
-    {
-        std::sort(kvres.begin(), kvres.end(), kvgreater<key_type, val_type>);
-    }
-    else
-    {
-        std::sort(kvres.begin(), kvres.end(), kvless<key_type, val_type>);
-    }
-    key_type * kptr = keys.ptr<key_type>();
-    val_type * vptr = vals.ptr<val_type>();
-    for(int i = 0; i < keys.cols; i ++)
-    {
-        kptr[i] = kvres[i].first;
-        vptr[i] = kvres[i].second;
-    }
-}
-
-class SortByKey_STL
-{
-public:
-    static void sort(cv::Mat&, cv::Mat&, bool is_gt);
-private:
-    typedef void (*quick_sorter)(cv::Mat&, cv::Mat&, bool);
-    SortByKey_STL();
-    quick_sorter quick_sorters[CV_64FC4][CV_64FC4];
-    static SortByKey_STL instance;
-};
-
-SortByKey_STL SortByKey_STL::instance = SortByKey_STL();
-
-SortByKey_STL::SortByKey_STL()
-{
-    memset(instance.quick_sorters, 0, sizeof(quick_sorters));
-#define NEW_SORTER(KT, VT) \
-    instance.quick_sorters[KV_CVTYPE<KT>::toType()][KV_CVTYPE<VT>::toType()] = kvquicksort<KT, VT>;
-
-    NEW_SORTER(int, int);
-    NEW_SORTER(int, Vec2i);
-    NEW_SORTER(int, float);
-    NEW_SORTER(int, Vec2f);
-
-    NEW_SORTER(float, int);
-    NEW_SORTER(float, Vec2i);
-    NEW_SORTER(float, float);
-    NEW_SORTER(float, Vec2f);
-#undef NEW_SORTER
-}
-
-void SortByKey_STL::sort(cv::Mat& keys, cv::Mat& vals, bool is_gt)
-{
-    instance.quick_sorters[keys.type()][vals.type()](keys, vals, is_gt);
-}
-
-bool checkUnstableSorterResult(const Mat& gkeys_, const Mat& gvals_,
-                               const Mat& /*dkeys_*/, const Mat& dvals_)
-{
-    int cn_val = gvals_.channels();
-    int count  = gkeys_.cols;
-
-    //for convenience we convert depth to float and channels to 1
-    Mat gkeys, gvals, dkeys, dvals;
-    gkeys_.reshape(1).convertTo(gkeys, CV_32F);
-    gvals_.reshape(1).convertTo(gvals, CV_32F);
-    //dkeys_.reshape(1).convertTo(dkeys, CV_32F);
-    dvals_.reshape(1).convertTo(dvals, CV_32F);
-    float * gkptr = gkeys.ptr<float>();
-    float * gvptr = gvals.ptr<float>();
-    //float * dkptr = dkeys.ptr<float>();
-    float * dvptr = dvals.ptr<float>();
-
-    for(int i = 0; i < count - 1; ++i)
-    {
-        int iden_count = 0;
-        // firstly calculate the number of identical keys
-        while(gkptr[i + iden_count] == gkptr[i + 1 + iden_count])
-        {
-            ++ iden_count;
-        }
-
-        // sort dv and gv
-        int num_of_val = (iden_count + 1) * cn_val;
-        std::sort(gvptr + i * cn_val, gvptr + i * cn_val + num_of_val);
-        std::sort(dvptr + i * cn_val, dvptr + i * cn_val + num_of_val);
-
-        // then check if [i, i + iden_count) is the same
-        for(int j = 0; j < num_of_val; ++j)
-        {
-            if(gvptr[i + j] != dvptr[i + j])
-            {
-                return false;
-            }
-        }
-        i += iden_count;
-    }
-    return true;
-}
-}
-
-#define INPUT_SIZES  Values(InputSize(0x10), InputSize(0x100), InputSize(0x10000)) //2^4, 2^8, 2^16
-#define KEY_TYPES    Values(MatType(CV_32SC1), MatType(CV_32FC1))
-#define VAL_TYPES    Values(MatType(CV_32SC1), MatType(CV_32SC2), MatType(CV_32FC1), MatType(CV_32FC2))
-#define SORT_METHODS Values(SortMethod(cv::ocl::SORT_BITONIC),SortMethod(cv::ocl::SORT_MERGE),SortMethod(cv::ocl::SORT_RADIX)/*,SortMethod(cv::ocl::SORT_SELECTION)*/)
-#define F_OR_T       Values(IsGreaterThan(false), IsGreaterThan(true))
-
-PARAM_TEST_CASE(SortByKey, InputSize, MatType, MatType, SortMethod, IsGreaterThan)
-{
-    InputSize input_size;
-    MatType key_type, val_type;
-    SortMethod method;
-    IsGreaterThan is_gt;
-
-    Mat mat_key, mat_val;
-    virtual void SetUp()
-    {
-        input_size = GET_PARAM(0);
-        key_type   = GET_PARAM(1);
-        val_type   = GET_PARAM(2);
-        method     = GET_PARAM(3);
-        is_gt      = GET_PARAM(4);
-
-        using namespace cv;
-        // fill key and val
-        mat_key = randomMat(Size(input_size, 1), key_type, INT_MIN, INT_MAX);
-        mat_val = randomMat(Size(input_size, 1), val_type, INT_MIN, INT_MAX);
-    }
-};
-
-OCL_TEST_P(SortByKey, Accuracy)
-{
-    using namespace cv;
-    ocl::oclMat oclmat_key(mat_key);
-    ocl::oclMat oclmat_val(mat_val);
-
-    ocl::sortByKey(oclmat_key, oclmat_val, method, is_gt);
-    SortByKey_STL::sort(mat_key, mat_val, is_gt);
-
-    EXPECT_MAT_NEAR(mat_key, oclmat_key, 0.0);
-    EXPECT_TRUE(checkUnstableSorterResult(mat_key, mat_val, oclmat_key, oclmat_val));
-}
-INSTANTIATE_TEST_CASE_P(OCL_SORT, SortByKey, Combine(INPUT_SIZES, KEY_TYPES, VAL_TYPES, SORT_METHODS, F_OR_T));
diff --git a/modules/ocl/test/test_split_merge.cpp b/modules/ocl/test/test_split_merge.cpp
deleted file mode 100644
index b21fedd..0000000
--- a/modules/ocl/test/test_split_merge.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-
-#define MAX_CHANNELS 4
-
-PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool)
-{
-    int type;
-    int channels;
-    bool use_roi;
-
-    //src mat
-    cv::Mat mat[MAX_CHANNELS];
-    //dst mat
-    cv::Mat dst;
-
-    // set up roi
-    int roicols, roirows;
-    int srcx[MAX_CHANNELS];
-    int srcy[MAX_CHANNELS];
-    int dstx, dsty;
-
-    //src mat with roi
-    cv::Mat mat_roi[MAX_CHANNELS];
-
-    //dst mat with roi
-    cv::Mat dst_roi;
-
-    //ocl dst mat for testing
-    cv::ocl::oclMat gdst_whole;
-
-    //ocl mat with roi
-    cv::ocl::oclMat gmat[MAX_CHANNELS];
-    cv::ocl::oclMat gdst;
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        channels = GET_PARAM(1);
-        use_roi = GET_PARAM(2);
-
-        cv::Size size(MWIDTH, MHEIGHT);
-
-        for (int i = 0; i < channels; ++i)
-            mat[i] = randomMat(size, CV_MAKETYPE(type, 1), 5, 16, false);
-        dst = randomMat(size, CV_MAKETYPE(type, channels), 5, 16, false);
-    }
-
-    void random_roi()
-    {
-        if (use_roi)
-        {
-            //randomize ROI
-            roicols = rng.uniform(1, mat[0].cols);
-            roirows = rng.uniform(1, mat[0].rows);
-
-            for (int i = 0; i < channels; ++i)
-            {
-                srcx[i] = rng.uniform(0, mat[i].cols - roicols);
-                srcy[i] = rng.uniform(0, mat[i].rows - roirows);
-            }
-
-            dstx = rng.uniform(0, dst.cols  - roicols);
-            dsty = rng.uniform(0, dst.rows  - roirows);
-        }
-        else
-        {
-            roicols = mat[0].cols;
-            roirows = mat[0].rows;
-            for (int i = 0; i < channels; ++i)
-                srcx[i] = srcy[i] = 0;
-
-            dstx = dsty = 0;
-        }
-
-        for (int i = 0; i < channels; ++i)
-            mat_roi[i] = mat[i](Rect(srcx[i], srcy[i], roicols, roirows));
-
-        dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
-
-        gdst_whole = dst;
-        gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
-
-        for (int i = 0; i < channels; ++i)
-            gmat[i] = mat_roi[i];
-    }
-};
-
-struct Merge : MergeTestBase {};
-
-OCL_TEST_P(Merge, Accuracy)
-{
-    for(int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::merge(mat_roi, channels, dst_roi);
-        cv::ocl::merge(gmat, channels, gdst);
-
-        EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0);
-    }
-}
-
-PARAM_TEST_CASE(SplitTestBase, MatType, int, bool)
-{
-    int type;
-    int channels;
-    bool use_roi;
-
-    cv::Mat src, src_roi;
-    cv::Mat dst[MAX_CHANNELS], dst_roi[MAX_CHANNELS];
-
-    cv::ocl::oclMat gsrc_whole, gsrc_roi;
-    cv::ocl::oclMat gdst_whole[MAX_CHANNELS], gdst_roi[MAX_CHANNELS];
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        channels = GET_PARAM(1);
-        use_roi = GET_PARAM(2);
-    }
-
-    void random_roi()
-    {
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, CV_MAKETYPE(type, channels), 0, 256);
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-
-        for (int i = 0; i < channels; ++i)
-        {
-            Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-            randomSubMat(dst[i], dst_roi[i], roiSize, dstBorder, CV_MAKETYPE(type, 1), 5, 16);
-            generateOclMat(gdst_whole[i], gdst_roi[i], dst[i], roiSize, dstBorder);
-        }
-    }
-};
-
-struct Split : SplitTestBase {};
-
-#ifdef ANDROID
-// NOTE: The test fail on Android is the top of the iceberg only
-// The real fail reason is memory access vialation somewhere else
-OCL_TEST_P(Split, DISABLED_Accuracy)
-#else
-OCL_TEST_P(Split, Accuracy)
-#endif
-{
-    for(int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::split(src_roi, dst_roi);
-        cv::ocl::split(gsrc_roi, gdst_roi);
-
-        for (int i = 0; i < channels; ++i)
-        {
-            EXPECT_MAT_NEAR(dst[i], gdst_whole[i], 0.0);
-            EXPECT_MAT_NEAR(dst_roi[i], gdst_roi[i], 0.0);
-        }
-    }
-}
-
-
-INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
-                            Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F), Values(1, 2, 3, 4), Bool()));
-
-
-INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
-                            Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F), Values(1, 2, 3, 4), Bool()));
-
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
deleted file mode 100644
index 85f3375..0000000
--- a/modules/ocl/test/test_warp.cpp
+++ /dev/null
@@ -1,494 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Shengen Yan, yanshengen@gmail.com
-//    Jiang Liyuan, lyuan001.good@163.com
-//    Rock Li, Rock.Li@amd.com
-//    Wu Zailong, bullet@yeah.net
-//    Xu Pang, pangxu010@163.com
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace testing;
-using namespace std;
-
-static MatType noType = -1;
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// warpAffine  & warpPerspective
-
-PARAM_TEST_CASE(WarpTestBase, MatType, Interpolation, bool, bool)
-{
-    int type, interpolation;
-    Size dsize;
-    bool useRoi, mapInverse;
-
-    Mat src, dst_whole, src_roi, dst_roi;
-    ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        interpolation = GET_PARAM(1);
-        mapInverse = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        if (mapInverse)
-            interpolation |= WARP_INVERSE_MAP;
-    }
-
-    void random_roi()
-    {
-        dsize = randomSize(1, MAX_VALUE);
-
-        Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst_whole, dst_roi, dsize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst_whole, dsize, dstBorder);
-    }
-
-    void Near(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-};
-
-/////warpAffine
-
-typedef WarpTestBase WarpAffine;
-
-OCL_TEST_P(WarpAffine, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f),
-            rng.uniform(-180.f, 180.f), rng.uniform(0.4f, 2.0f));
-
-        warpAffine(src_roi, dst_roi, M, dsize, interpolation);
-        ocl::warpAffine(gsrc_roi, gdst_roi, M, dsize, interpolation);
-
-        Near(1.0);
-    }
-}
-
-// warpPerspective
-
-typedef WarpTestBase WarpPerspective;
-
-OCL_TEST_P(WarpPerspective, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        float cols = static_cast<float>(src_roi.cols), rows = static_cast<float>(src_roi.rows);
-        float cols2 = cols / 2.0f, rows2 = rows / 2.0f;
-        Point2f sp[] = { Point2f(0.0f, 0.0f), Point2f(cols, 0.0f), Point2f(0.0f, rows), Point2f(cols, rows) };
-        Point2f dp[] = { Point2f(rng.uniform(0.0f, cols2), rng.uniform(0.0f, rows2)),
-            Point2f(rng.uniform(cols2, cols), rng.uniform(0.0f, rows2)),
-            Point2f(rng.uniform(0.0f, cols2), rng.uniform(rows2, rows)),
-            Point2f(rng.uniform(cols2, cols), rng.uniform(rows2, rows)) };
-        Mat M = getPerspectiveTransform(sp, dp);
-
-        warpPerspective(src_roi, dst_roi, M, dsize, interpolation);
-        ocl::warpPerspective(gsrc_roi, gdst_roi, M, dsize, interpolation);
-
-        Near(1.0);
-    }
-}
-
-// buildWarpPerspectiveMaps
-
-PARAM_TEST_CASE(BuildWarpPerspectiveMaps, bool, bool)
-{
-    bool useRoi, mapInverse;
-    Size dsize;
-
-    Mat xmap_whole, ymap_whole, xmap_roi, ymap_roi;
-    ocl::oclMat gxmap_whole, gymap_whole, gxmap_roi, gymap_roi;
-
-    void SetUp()
-    {
-        mapInverse = GET_PARAM(0);
-        useRoi = GET_PARAM(1);
-    }
-
-    void random_roi()
-    {
-        dsize = randomSize(1, MAX_VALUE);
-
-        Border xmapBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(xmap_whole, xmap_roi, dsize, xmapBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
-
-        Border ymapBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(ymap_whole, ymap_roi, dsize, ymapBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
-
-        generateOclMat(gxmap_whole, gxmap_roi, xmap_whole, dsize, xmapBorder);
-        generateOclMat(gymap_whole, gymap_roi, ymap_whole, dsize, ymapBorder);
-    }
-
-    void Near(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gxmap_whole.download(whole);
-        gxmap_roi.download(roi);
-
-        EXPECT_MAT_NEAR(xmap_whole, whole, threshold);
-        EXPECT_MAT_NEAR(xmap_roi, roi, threshold);
-    }
-
-    void Near1(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gymap_whole.download(whole);
-        gymap_roi.download(roi);
-
-        EXPECT_MAT_NEAR(ymap_whole, whole, threshold);
-        EXPECT_MAT_NEAR(ymap_roi, roi, threshold);
-    }
-};
-
-static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
-{
-    CV_Assert(M.rows == 3 && M.cols == 3);
-    CV_Assert(dsize.area() > 0);
-
-    xmap.create(dsize, CV_32FC1);
-    ymap.create(dsize, CV_32FC1);
-
-    float coeffs[3 * 3];
-    Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
-
-    if (inverse)
-        M.convertTo(coeffsMat, coeffsMat.type());
-    else
-    {
-        cv::Mat iM;
-        invert(M, iM);
-        iM.convertTo(coeffsMat, coeffsMat.type());
-    }
-
-    for (int y = 0; y < dsize.height; ++y)
-    {
-        float * const xmap_ptr = xmap.ptr<float>(y);
-        float * const ymap_ptr = ymap.ptr<float>(y);
-
-        for (int x = 0; x < dsize.width; ++x)
-        {
-            float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
-            xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
-            ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
-        }
-    }
-}
-
-OCL_TEST_P(BuildWarpPerspectiveMaps, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        float cols = static_cast<float>(MAX_VALUE), rows = static_cast<float>(MAX_VALUE);
-        float cols2 = cols / 2.0f, rows2 = rows / 2.0f;
-        Point2f sp[] = { Point2f(0.0f, 0.0f), Point2f(cols, 0.0f), Point2f(0.0f, rows), Point2f(cols, rows) };
-        Point2f dp[] = { Point2f(rng.uniform(0.0f, cols2), rng.uniform(0.0f, rows2)),
-            Point2f(rng.uniform(cols2, cols), rng.uniform(0.0f, rows2)),
-            Point2f(rng.uniform(0.0f, cols2), rng.uniform(rows2, rows)),
-            Point2f(rng.uniform(cols2, cols), rng.uniform(rows2, rows)) };
-        Mat M = getPerspectiveTransform(sp, dp);
-
-        buildWarpPerspectiveMaps(M, mapInverse, dsize, xmap_roi, ymap_roi);
-        ocl::buildWarpPerspectiveMaps(M, mapInverse, dsize, gxmap_roi, gymap_roi);
-
-        Near(5e-3);
-        Near1(5e-3);
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// remap
-
-PARAM_TEST_CASE(Remap, MatDepth, Channels, pair<MatType, MatType>, Border, bool)
-{
-    int srcType, map1Type, map2Type;
-    int borderType;
-    bool useRoi;
-
-    Scalar val;
-
-    Mat src, src_roi;
-    Mat dst, dst_roi;
-    Mat map1, map1_roi;
-    Mat map2, map2_roi;
-
-    // ocl mat with roi
-    ocl::oclMat gsrc, gsrc_roi;
-    ocl::oclMat gdst, gdst_roi;
-    ocl::oclMat gmap1, gmap1_roi;
-    ocl::oclMat gmap2, gmap2_roi;
-
-    virtual void SetUp()
-    {
-        srcType = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
-        map1Type = GET_PARAM(2).first;
-        map2Type = GET_PARAM(2).second;
-        borderType = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-    }
-
-    void random_roi()
-    {
-        val = randomScalar(-MAX_VALUE, MAX_VALUE);
-        Size srcROISize = randomSize(1, MAX_VALUE);
-        Size dstROISize = randomSize(1, MAX_VALUE);
-
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, srcROISize, srcBorder, srcType, 5, 256);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst, dst_roi, dstROISize, dstBorder, srcType, -MAX_VALUE, MAX_VALUE);
-
-        int mapMaxValue = MAX_VALUE << 2;
-        Border map1Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(map1, map1_roi, dstROISize, map1Border, map1Type, -mapMaxValue, mapMaxValue);
-
-        Border map2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        if (map2Type != noType)
-        {
-            int mapMinValue = -mapMaxValue;
-            if (map2Type == CV_16UC1 || map2Type == CV_16SC1)
-                mapMinValue = 0, mapMaxValue = INTER_TAB_SIZE2;
-            randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, mapMinValue, mapMaxValue);
-        }
-
-        generateOclMat(gsrc, gsrc_roi, src, srcROISize, srcBorder);
-        generateOclMat(gdst, gdst_roi, dst, dstROISize, dstBorder);
-        generateOclMat(gmap1, gmap1_roi, map1, dstROISize, map1Border);
-        if (noType != map2Type)
-            generateOclMat(gmap2, gmap2_roi, map2, dstROISize, map2Border);
-    }
-
-    void Near(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gdst.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-};
-
-typedef Remap Remap_INTER_NEAREST;
-
-OCL_TEST_P(Remap_INTER_NEAREST, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_NEAREST, borderType, val);
-        ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, INTER_NEAREST, borderType, val);
-
-        Near(1.0);
-    }
-}
-
-typedef Remap Remap_INTER_LINEAR;
-
-OCL_TEST_P(Remap_INTER_LINEAR, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_LINEAR, borderType, val);
-        ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, INTER_LINEAR, borderType, val);
-
-        Near(2.0);
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// resize
-
-PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool)
-{
-    int type, interpolation;
-    double fx, fy;
-    bool useRoi;
-
-    Mat src, dst_whole, src_roi, dst_roi;
-    ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
-
-    virtual void SetUp()
-    {
-        type = GET_PARAM(0);
-        fx = GET_PARAM(1);
-        fy = GET_PARAM(2);
-        interpolation = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-    }
-
-    void random_roi()
-    {
-        CV_Assert(fx > 0 && fy > 0);
-
-        Size srcRoiSize = randomSize(1, MAX_VALUE), dstRoiSize;
-        dstRoiSize.width = cvRound(srcRoiSize.width * fx);
-        dstRoiSize.height = cvRound(srcRoiSize.height * fy);
-
-        if (dstRoiSize.area() == 0)
-            return random_roi();
-
-        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
-
-        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst_whole, dst_roi, dstRoiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
-
-        generateOclMat(gsrc_whole, gsrc_roi, src, srcRoiSize, srcBorder);
-        generateOclMat(gdst_whole, gdst_roi, dst_whole, dstRoiSize, dstBorder);
-    }
-
-    void Near(double threshold = 0.0)
-    {
-        Mat whole, roi;
-        gdst_whole.download(whole);
-        gdst_roi.download(roi);
-
-        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-    }
-};
-
-OCL_TEST_P(Resize, Mat)
-{
-    for (int j = 0; j < LOOP_TIMES; j++)
-    {
-        random_roi();
-
-        cv::resize(src_roi, dst_roi, Size(), fx, fy, interpolation);
-        ocl::resize(gsrc_roi, gdst_roi, Size(), fx, fy, interpolation);
-
-        Near(1.0);
-    }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-
-INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpAffine, Combine(
-                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC),
-                            Bool(),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
-                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC),
-                            Bool(),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(ImgprocWarp, BuildWarpPerspectiveMaps, Combine(Bool(), Bool()));
-
-INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine(
-                            Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
-                            Values(1, 2, 3, 4),
-                            Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
-                                   pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
-                                   pair<MatType, MatType>((MatType)CV_32FC2, noType)),
-                            Values((Border)BORDER_CONSTANT,
-                                   (Border)BORDER_REPLICATE,
-                                   (Border)BORDER_WRAP,
-                                   (Border)BORDER_REFLECT,
-                                   (Border)BORDER_REFLECT_101),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
-                            Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
-                            Values(1, 2, 3, 4),
-                            Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
-                                   pair<MatType, MatType>((MatType)CV_32FC2, noType),
-                                   pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
-                                   pair<MatType, MatType>((MatType)CV_16SC2, noType)),
-                            Values((Border)BORDER_CONSTANT,
-                                   (Border)BORDER_REPLICATE,
-                                   (Border)BORDER_WRAP,
-                                   (Border)BORDER_REFLECT,
-                                   (Border)BORDER_REFLECT_101),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(ImgprocWarpResize, Resize, Combine(
-                            Values((MatType)CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values(0.7, 0.4, 2.0),
-                            Values(0.3, 0.6, 2.0),
-                            Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
-                            Bool()));
-
-INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine(
-                            Values((MatType)CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values(0.7, 0.4, 0.5),
-                            Values(0.3, 0.6, 0.5),
-                            Values((Interpolation)INTER_AREA),
-                            Bool()));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp
deleted file mode 100644
index 3195019..0000000
--- a/modules/ocl/test/utility.cpp
+++ /dev/null
@@ -1,366 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#define VARNAME(A) #A
-using namespace std;
-using namespace cv;
-using namespace cvtest;
-
-namespace cvtest {
-//std::string generateVarList(int first,...)
-//{
-//	vector<std::string> varname;
-//
-//	va_list argp;
-//	string s;
-//	stringstream ss;
-//	va_start(argp,first);
-//	int i=first;
-//	while(i!=-1)
-//	{
-//		ss<<i<<",";
-//		i=va_arg(argp,int);
-//	};
-//	s=ss.str();
-//	va_end(argp);
-//	return s;
-//};
-
-//std::string generateVarList(int& p1,int& p2)
-//{
-//	stringstream ss;
-//	ss<<VARNAME(p1)<<":"<<src1x<<","<<VARNAME(p2)<<":"<<src1y;
-//	return ss.str();
-//};
-
-cv::ocl::oclMat createMat_ocl(cv::RNG& rng, Size size, int type, bool useRoi)
-{
-    Size size0 = size;
-
-    if (useRoi)
-    {
-        size0.width += rng.uniform(5, 15);
-        size0.height += rng.uniform(5, 15);
-    }
-
-    cv::ocl::oclMat d_m(size0, type);
-
-    if (size0 != size)
-        d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height));
-
-    return d_m;
-}
-
-cv::ocl::oclMat loadMat_ocl(cv::RNG& rng, const Mat& m, bool useRoi)
-{
-    CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3);
-    cv::ocl::oclMat d_m;
-    d_m = createMat_ocl(rng, m.size(), m.type(), useRoi);
-
-    Size ls;
-    Point pt;
-
-    d_m.locateROI(ls, pt);
-
-    Rect roi(pt.x, pt.y, d_m.size().width, d_m.size().height);
-
-    cv::ocl::oclMat m_ocl(m);
-
-    cv::ocl::oclMat d_m_roi(d_m, roi);
-
-    m_ocl.copyTo(d_m);
-    return d_m;
-}
-
-vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
-{
-    vector<MatType> v;
-
-    v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
-
-    for (int depth = depth_start; depth <= depth_end; ++depth)
-    {
-        for (int cn = cn_start; cn <= cn_end; ++cn)
-        {
-            v.push_back(CV_MAKETYPE(depth, cn));
-        }
-    }
-
-    return v;
-}
-
-const vector<MatType> &all_types()
-{
-    static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
-
-    return v;
-}
-
-Mat readImage(const string &fileName, int flags)
-{
-    return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
-}
-
-Mat readImageType(const string &fname, int type)
-{
-    Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
-    if (CV_MAT_CN(type) == 4)
-    {
-        Mat temp;
-        cvtColor(src, temp, cv::COLOR_BGR2BGRA);
-        swap(src, temp);
-    }
-    src.convertTo(src, CV_MAT_DEPTH(type));
-    return src;
-}
-
-double checkNorm(const Mat &m)
-{
-    return norm(m, NORM_INF);
-}
-
-double checkNorm(const Mat &m1, const Mat &m2)
-{
-    return norm(m1, m2, NORM_INF);
-}
-
-double checkSimilarity(const Mat &m1, const Mat &m2)
-{
-    Mat diff;
-    matchTemplate(m1, m2, diff, TM_CCORR_NORMED);
-    return std::abs(diff.at<float>(0, 0) - 1.f);
-}
-
-/*
-void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os)
-{
-    (*os) << info.name();
-}
-*/
-
-void PrintTo(const Inverse &inverse, std::ostream *os)
-{
-    if (inverse)
-        (*os) << "inverse";
-    else
-        (*os) << "direct";
-}
-
-double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
-{
-    double final_test_result = 0.0;
-    size_t sz1 = ob1.size();
-    size_t sz2 = ob2.size();
-
-    if(sz1 != sz2)
-    {
-        return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
-    }
-    else
-    {
-        if(sz1==0 && sz2==0)
-            return 0;
-        cv::Mat cpu_result(sz, CV_8UC1);
-        cpu_result.setTo(0);
-
-        for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
-        {
-            cv::Mat cpu_result_roi(cpu_result, *r);
-            cpu_result_roi.setTo(1);
-            cpu_result.copyTo(cpu_result);
-        }
-        int cpu_area = cv::countNonZero(cpu_result > 0);
-
-        cv::Mat gpu_result(sz, CV_8UC1);
-        gpu_result.setTo(0);
-        for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
-        {
-            cv::Mat gpu_result_roi(gpu_result, *r2);
-            gpu_result_roi.setTo(1);
-            gpu_result.copyTo(gpu_result);
-        }
-
-        cv::Mat result_;
-        multiply(cpu_result, gpu_result, result_);
-        int result = cv::countNonZero(result_ > 0);
-        if(cpu_area!=0 && result!=0)
-            final_test_result = 1.0 - (double)result/(double)cpu_area;
-        else if(cpu_area==0 && result!=0)
-            final_test_result = -1;
-    }
-    return final_test_result;
-}
-
-void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
-{
-    Mat diff, diff_thresh;
-    absdiff(gold, actual, diff);
-    diff.convertTo(diff, CV_32F);
-    threshold(diff, diff_thresh, eps, 255.0, cv::THRESH_BINARY);
-
-    if (alwaysShow || cv::countNonZero(diff_thresh.reshape(1)) > 0)
-    {
-#if 0
-        std::cout << "Src: " << std::endl << src << std::endl;
-        std::cout << "Reference: " << std::endl << gold << std::endl;
-        std::cout << "OpenCL: " << std::endl << actual << std::endl;
-#endif
-
-        namedWindow("src", WINDOW_NORMAL);
-        namedWindow("gold", WINDOW_NORMAL);
-        namedWindow("actual", WINDOW_NORMAL);
-        namedWindow("diff", WINDOW_NORMAL);
-
-        imshow("src", src);
-        imshow("gold", gold);
-        imshow("actual", actual);
-        imshow("diff", diff);
-
-        waitKey();
-    }
-}
-
-namespace
-{
-    bool keyPointsEquals(const cv::KeyPoint& p1, const cv::KeyPoint& p2)
-    {
-        const double maxPtDif = 1.0;
-        const double maxSizeDif = 1.0;
-        const double maxAngleDif = 2.0;
-        const double maxResponseDif = 0.1;
-
-        double dist = cv::norm(p1.pt - p2.pt);
-
-        if (dist < maxPtDif &&
-            fabs(p1.size - p2.size) < maxSizeDif &&
-            abs(p1.angle - p2.angle) < maxAngleDif &&
-            abs(p1.response - p2.response) < maxResponseDif &&
-            p1.octave == p2.octave &&
-            p1.class_id == p2.class_id)
-        {
-            return true;
-        }
-
-        return false;
-    }
-
-    struct KeyPointLess : std::binary_function<cv::KeyPoint, cv::KeyPoint, bool>
-    {
-        bool operator()(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) const
-        {
-            return kp1.pt.y < kp2.pt.y || (kp1.pt.y == kp2.pt.y && kp1.pt.x < kp2.pt.x);
-        }
-    };
-}
-
-testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char* actual_expr, std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
-{
-    if (gold.size() != actual.size())
-    {
-        return testing::AssertionFailure() << "KeyPoints size mistmach\n"
-                                           << "\"" << gold_expr << "\" : " << gold.size() << "\n"
-                                           << "\"" << actual_expr << "\" : " << actual.size();
-    }
-
-    std::sort(actual.begin(), actual.end(), KeyPointLess());
-    std::sort(gold.begin(), gold.end(), KeyPointLess());
-
-    for (size_t i = 0; i < gold.size(); ++i)
-    {
-        const cv::KeyPoint& p1 = gold[i];
-        const cv::KeyPoint& p2 = actual[i];
-
-        if (!keyPointsEquals(p1, p2))
-        {
-            return testing::AssertionFailure() << "KeyPoints differ at " << i << "\n"
-                                               << "\"" << gold_expr << "\" vs \"" << actual_expr << "\" : \n"
-                                               << "pt : " << testing::PrintToString(p1.pt) << " vs " << testing::PrintToString(p2.pt) << "\n"
-                                               << "size : " << p1.size << " vs " << p2.size << "\n"
-                                               << "angle : " << p1.angle << " vs " << p2.angle << "\n"
-                                               << "response : " << p1.response << " vs " << p2.response << "\n"
-                                               << "octave : " << p1.octave << " vs " << p2.octave << "\n"
-                                               << "class_id : " << p1.class_id << " vs " << p2.class_id;
-        }
-    }
-
-    return ::testing::AssertionSuccess();
-}
-
-int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
-{
-    std::sort(actual.begin(), actual.end(), KeyPointLess());
-    std::sort(gold.begin(), gold.end(), KeyPointLess());
-
-    int validCount = 0;
-
-    size_t sz = std::min(gold.size(), actual.size());
-    for (size_t i = 0; i < sz; ++i)
-    {
-        const cv::KeyPoint& p1 = gold[i];
-        const cv::KeyPoint& p2 = actual[i];
-
-        if (keyPointsEquals(p1, p2))
-            ++validCount;
-    }
-
-    return validCount;
-}
-
-int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
-{
-    int validCount = 0;
-
-    for (size_t i = 0; i < matches.size(); ++i)
-    {
-        const cv::DMatch& m = matches[i];
-
-        const cv::KeyPoint& p1 = keypoints1[m.queryIdx];
-        const cv::KeyPoint& p2 = keypoints2[m.trainIdx];
-
-        if (keyPointsEquals(p1, p2))
-            ++validCount;
-    }
-
-    return validCount;
-}
-
-} // namespace cvtest
diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp
deleted file mode 100644
index 4ecc9f0..0000000
--- a/modules/ocl/test/utility.hpp
+++ /dev/null
@@ -1,328 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_TEST_UTILITY_HPP__
-#define __OPENCV_TEST_UTILITY_HPP__
-#include "opencv2/core.hpp"
-
-
-extern int LOOP_TIMES;
-
-#define MWIDTH 256
-#define MHEIGHT 256
-
-#define MIN_VALUE 171
-#define MAX_VALUE 357
-
-namespace cvtest {
-
-testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char* actual_expr, std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual);
-#define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual)
-CV_EXPORTS int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual);
-CV_EXPORTS int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches);
-
-void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow = false);
-
-cv::ocl::oclMat createMat_ocl(cv::RNG& rng, Size size, int type, bool useRoi);
-cv::ocl::oclMat loadMat_ocl(cv::RNG& rng, const Mat& m, bool useRoi);
-
-// This function test if gpu_rst matches cpu_rst.
-// If the two vectors are not equal, it will return the difference in vector size
-// Else it will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
-// The smaller, the better matched
-double checkRectSimilarity(cv::Size sz, std::vector<cv::Rect>& ob1, std::vector<cv::Rect>& ob2);
-
-
-//! read image from testdata folder.
-cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR);
-cv::Mat readImageType(const std::string &fname, int type);
-
-double checkNorm(const cv::Mat &m);
-double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
-double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
-
-inline double checkNormRelative(const Mat &m1, const Mat &m2)
-{
-    return cv::norm(m1, m2, cv::NORM_INF) /
-            std::max((double)std::numeric_limits<float>::epsilon(),
-                     (double)std::max(cv::norm(m1, cv::NORM_INF), norm(m2, cv::NORM_INF)));
-}
-
-#define EXPECT_MAT_NORM(mat, eps) \
-{ \
-    EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
-}
-
-#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
-{ \
-   ASSERT_EQ(mat1.type(), mat2.type()); \
-   ASSERT_EQ(mat1.size(), mat2.size()); \
-   EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps) \
-       << cv::format("Size: %d x %d", mat1.cols, mat1.rows) << std::endl; \
-}
-
-#define EXPECT_MAT_NEAR_RELATIVE(mat1, mat2, eps) \
-{ \
-   ASSERT_EQ(mat1.type(), mat2.type()); \
-   ASSERT_EQ(mat1.size(), mat2.size()); \
-   EXPECT_LE(checkNormRelative(cv::Mat(mat1), cv::Mat(mat2)), eps) \
-       << cv::format("Size: %d x %d", mat1.cols, mat1.rows) << std::endl; \
-}
-
-#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
-{ \
-    ASSERT_EQ(mat1.type(), mat2.type()); \
-    ASSERT_EQ(mat1.size(), mat2.size()); \
-    EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \
-}
-
-
-using perf::MatDepth;
-using perf::MatType;
-
-//! return vector with types from specified range.
-std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
-
-//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
-const std::vector<MatType> &all_types();
-
-class Inverse
-{
-public:
-    inline Inverse(bool val = false) : val_(val) {}
-
-    inline operator bool() const
-    {
-        return val_;
-    }
-
-private:
-    bool val_;
-};
-
-void PrintTo(const Inverse &useRoi, std::ostream *os);
-
-#define OCL_RNG_SEED 123456
-
-template <typename T>
-struct TSTestWithParam : public ::testing::TestWithParam<T>
-{
-    cv::RNG rng;
-
-    TSTestWithParam()
-    {
-        rng = cv::RNG(OCL_RNG_SEED);
-    }
-
-    int randomInt(int minVal, int maxVal)
-    {
-        return rng.uniform(minVal, maxVal);
-    }
-
-    double randomDouble(double minVal, double maxVal)
-    {
-        return rng.uniform(minVal, maxVal);
-    }
-
-    double randomDoubleLog(double minVal, double maxVal)
-    {
-        double logMin = log((double)minVal + 1);
-        double logMax = log((double)maxVal + 1);
-        double pow = rng.uniform(logMin, logMax);
-        double v = exp(pow) - 1;
-        CV_Assert(v >= minVal && (v < maxVal || (v == minVal && v == maxVal)));
-        return v;
-    }
-
-    Size randomSize(int minVal, int maxVal)
-    {
-#if 1
-        return cv::Size((int)randomDoubleLog(minVal, maxVal), (int)randomDoubleLog(minVal, maxVal));
-#else
-        return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal));
-#endif
-    }
-
-    Size randomSize(int minValX, int maxValX, int minValY, int maxValY)
-    {
-#if 1
-        return cv::Size(randomDoubleLog(minValX, maxValX), randomDoubleLog(minValY, maxValY));
-#else
-        return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal));
-#endif
-    }
-
-    Scalar randomScalar(double minVal, double maxVal)
-    {
-        return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal));
-    }
-
-    Mat randomMat(Size size, int type, double minVal, double maxVal, bool useRoi = false)
-    {
-        RNG dataRng(rng.next());
-        return cvtest::randomMat(dataRng, size, type, minVal, maxVal, useRoi);
-    }
-
-    struct Border
-    {
-        int top, bot, lef, rig;
-    };
-
-    Border randomBorder(int minValue = 0, int maxValue = MAX_VALUE)
-    {
-        Border border = {
-                (int)randomDoubleLog(minValue, maxValue),
-                (int)randomDoubleLog(minValue, maxValue),
-                (int)randomDoubleLog(minValue, maxValue),
-                (int)randomDoubleLog(minValue, maxValue)
-        };
-        return border;
-    }
-
-    void randomSubMat(Mat& whole, Mat& subMat, const Size& roiSize, const Border& border, int type, double minVal, double maxVal)
-    {
-        Size wholeSize = Size(roiSize.width + border.lef + border.rig, roiSize.height + border.top + border.bot);
-        whole = randomMat(wholeSize, type, minVal, maxVal, false);
-        subMat = whole(Rect(border.lef, border.top, roiSize.width, roiSize.height));
-    }
-
-    void generateOclMat(cv::ocl::oclMat& whole, cv::ocl::oclMat& subMat, const Mat& wholeMat, const Size& roiSize, const Border& border)
-    {
-        whole = wholeMat;
-        subMat = whole(Rect(border.lef, border.top, roiSize.width, roiSize.height));
-    }
-};
-
-#define PARAM_TEST_CASE(name, ...) struct name : public TSTestWithParam< std::tr1::tuple< __VA_ARGS__ > >
-
-#define GET_PARAM(k) std::tr1::get< k >(GetParam())
-
-#define ALL_TYPES testing::ValuesIn(all_types())
-#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
-
-#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300))
-
-#define IMAGE_CHANNELS testing::Values(Channels(1), Channels(3), Channels(4))
-#ifndef IMPLEMENT_PARAM_CLASS
-#define IMPLEMENT_PARAM_CLASS(name, type) \
-    class name \
-    { \
-    public: \
-        name ( type arg = type ()) : val_(arg) {} \
-        operator type () const {return val_;} \
-    private: \
-        type val_; \
-    }; \
-    inline void PrintTo( name param, std::ostream* os) \
-    { \
-        *os << #name <<  "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
-    }
-
-IMPLEMENT_PARAM_CLASS(Channels, int)
-#endif // IMPLEMENT_PARAM_CLASS
-
-} // namespace cvtest
-
-enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
-CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
-
-CV_ENUM(CmpCode, CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE)
-CV_ENUM(NormCode, NORM_INF, NORM_L1, NORM_L2, NORM_TYPE_MASK, NORM_RELATIVE, NORM_MINMAX)
-CV_ENUM(ReduceOp, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN)
-CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT)
-CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
-CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA)
-CV_ENUM(Border, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP)
-CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)
-
-CV_FLAGS(GemmFlags, GEMM_1_T, GEMM_2_T, GEMM_3_T)
-CV_FLAGS(WarpFlags, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, WARP_INVERSE_MAP)
-CV_FLAGS(DftFlags, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REAL_OUTPUT)
-
-# define OCL_TEST_P(test_case_name, test_name) \
-    class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : \
-        public test_case_name { \
-    public: \
-        GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() { } \
-        virtual void TestBody(); \
-        void OCLTestBody(); \
-    private: \
-        static int AddToRegistry() \
-        { \
-            ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
-              GetTestCasePatternHolder<test_case_name>(\
-                  #test_case_name, __FILE__, __LINE__)->AddTestPattern(\
-                      #test_case_name, \
-                      #test_name, \
-                      new ::testing::internal::TestMetaFactory< \
-                          GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
-            return 0; \
-        } \
-    \
-        static int gtest_registering_dummy_; \
-        GTEST_DISALLOW_COPY_AND_ASSIGN_(\
-            GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
-    }; \
-    \
-    int GTEST_TEST_CLASS_NAME_(test_case_name, \
-                             test_name)::gtest_registering_dummy_ = \
-      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
-    \
-    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() \
-    { \
-        try \
-        { \
-            OCLTestBody(); \
-        } \
-        catch (const cv::Exception & ex) \
-        { \
-            if (ex.code == cv::Error::OpenCLDoubleNotSupported)\
-                std::cout << "Test skipped (selected device does not support double)" << std::endl; \
-            else if (ex.code == cv::Error::OpenCLNoAMDBlasFft) \
-                std::cout << "Test skipped (AMD Blas / Fft libraries are not available)" << std::endl; \
-            else \
-                throw; \
-        } \
-    } \
-    \
-    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::OCLTestBody()
-
-#endif // __OPENCV_TEST_UTILITY_HPP__
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index 01f376d..1d30f5b 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -13,7 +13,6 @@ if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_LIST_DIR)
 add_subdirectory(c)
 add_subdirectory(cpp)
 add_subdirectory(gpu)
-add_subdirectory(ocl)
 add_subdirectory(tapi)
 
 if(WIN32 AND HAVE_DIRECTX)
diff --git a/samples/ocl/CMakeLists.txt b/samples/ocl/CMakeLists.txt
deleted file mode 100644
index 04f5d17..0000000
--- a/samples/ocl/CMakeLists.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-SET(OPENCV_OCL_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc opencv_highgui
-                                     opencv_ml opencv_video opencv_objdetect opencv_features2d
-                                     opencv_calib3d opencv_legacy opencv_contrib opencv_ocl opencv_nonfree)
-
-ocv_check_dependencies(${OPENCV_OCL_SAMPLES_REQUIRED_DEPS})
-
-if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
-  set(project "ocl")
-  string(TOUPPER "${project}" project_upper)
-
-  project("${project}_samples")
-
-  ocv_include_modules(${OPENCV_OCL_SAMPLES_REQUIRED_DEPS})
-
-  if(HAVE_OPENCL)
-    ocv_include_directories(${OPENCL_INCLUDE_DIR})
-  endif()
-
-  # ---------------------------------------------
-  #      Define executable targets
-  # ---------------------------------------------
-  MACRO(OPENCV_DEFINE_OCL_EXAMPLE name srcs)
-    set(the_target "example_${project}_${name}")
-    add_executable(${the_target} ${srcs})
-
-    target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_OCL_SAMPLES_REQUIRED_DEPS})
-
-    set_target_properties(${the_target} PROPERTIES
-      OUTPUT_NAME "${project}-example-${name}"
-      PROJECT_LABEL "(EXAMPLE_${project_upper}) ${name}")
-
-    if(ENABLE_SOLUTION_FOLDERS)
-      set_target_properties(${the_target} PROPERTIES FOLDER "samples//${project}")
-    endif()
-
-    if(WIN32)
-      if(MSVC AND NOT BUILD_SHARED_LIBS)
-        set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG")
-      endif()
-      install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${project}" COMPONENT samples)
-    endif()
-  ENDMACRO()
-
-  file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
-
-  foreach(sample_filename ${all_samples})
-    get_filename_component(sample ${sample_filename} NAME_WE)
-    file(GLOB sample_srcs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${sample}.*)
-    OPENCV_DEFINE_OCL_EXAMPLE(${sample} ${sample_srcs})
-  endforeach()
-endif()
-
-if(INSTALL_C_EXAMPLES AND NOT WIN32)
-  file(GLOB install_list *.c *.cpp *.jpg *.png *.data makefile.* build_all.sh *.dsp *.cmd )
-  install(FILES ${install_list}
-          DESTINATION share/OpenCV/samples/${project}
-          PERMISSIONS OWNER_READ GROUP_READ WORLD_READ COMPONENT samples)
-endif()
diff --git a/samples/ocl/adaptive_bilateral_filter.cpp b/samples/ocl/adaptive_bilateral_filter.cpp
deleted file mode 100644
index df085c4..0000000
--- a/samples/ocl/adaptive_bilateral_filter.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-// This sample shows the difference of adaptive bilateral filter and bilateral filter.
-#include "opencv2/core.hpp"
-#include "opencv2/core/utility.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/ocl.hpp"
-
-using namespace cv;
-using namespace std;
-
-
-int main( int argc, const char** argv )
-{
-    const char* keys =
-        "{ i input   |          | specify input image }"
-        "{ k ksize   |     11   | specify kernel size }"
-        "{ s sSpace  |     3    | specify sigma space }"
-        "{ c sColor  |     30   | specify max color }"
-        "{ h help    | false    | print help message }";
-
-    CommandLineParser cmd(argc, argv, keys);
-    if (cmd.has("help"))
-    {
-        cout << "Usage : adaptive_bilateral_filter [options]" << endl;
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-
-    string src_path = cmd.get<string>("i");
-    int ks = cmd.get<int>("k");
-    const char * winName[] = {"input", "ABF OpenCL", "BF OpenCL"};
-
-    Mat src = imread(src_path);
-    if (src.empty())
-    {
-        cout << "error read image: " << src_path << endl;
-        return EXIT_FAILURE;
-    }
-
-    double sigmaSpace = cmd.get<int>("s");
-
-    // sigma for checking pixel values. This is used as is in the "normal" bilateral filter,
-    // and it is used as an upper clamp on the adaptive case.
-    double sigmacolor = cmd.get<int>("c");
-
-    ocl::oclMat dsrc(src), dABFilter, dBFilter;
-    Size ksize(ks, ks);
-
-    // ksize is the total width/height of neighborhood used to calculate local variance.
-    // sigmaSpace is not a priori related to ksize/2.
-    ocl::adaptiveBilateralFilter(dsrc, dABFilter, ksize, sigmaSpace, sigmacolor);
-    ocl::bilateralFilter(dsrc, dBFilter, ks, sigmacolor, sigmaSpace);
-    Mat abFilter = dABFilter, bFilter = dBFilter;
-
-    ocl::finish();
-
-    imshow(winName[0], src);
-    imshow(winName[1], abFilter);
-    imshow(winName[2], bFilter);
-
-    waitKey();
-
-    return EXIT_SUCCESS;
-}
diff --git a/samples/ocl/bgfg_segm.cpp b/samples/ocl/bgfg_segm.cpp
deleted file mode 100644
index 19d87ef..0000000
--- a/samples/ocl/bgfg_segm.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-#include <iostream>
-#include <string>
-
-#include "opencv2/core.hpp"
-#include "opencv2/core/utility.hpp"
-#include "opencv2/ocl.hpp"
-#include "opencv2/highgui.hpp"
-
-using namespace std;
-using namespace cv;
-using namespace cv::ocl;
-
-#define M_MOG  1
-#define M_MOG2 2
-
-int main(int argc, const char** argv)
-{
-    cv::CommandLineParser cmd(argc, argv,
-        "{ c camera | false       | use camera }"
-        "{ f file   | 768x576.avi | input video file }"
-        "{ m method | mog         | method (mog, mog2) }"
-        "{ h help   | false       | print help message }");
-
-    if (cmd.has("help"))
-    {
-        cout << "Usage : bgfg_segm [options]" << endl;
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-
-    bool useCamera = cmd.get<bool>("camera");
-    string file = cmd.get<string>("file");
-    string method = cmd.get<string>("method");
-
-    if (method != "mog" && method != "mog2")
-    {
-        cerr << "Incorrect method" << endl;
-        return EXIT_FAILURE;
-    }
-
-    int m = method == "mog" ? M_MOG : M_MOG2;
-
-    VideoCapture cap;
-    if (useCamera)
-        cap.open(0);
-    else
-        cap.open(file);
-
-    if (!cap.isOpened())
-    {
-        cout << "can not open camera or video file" << endl;
-        return EXIT_FAILURE;
-    }
-
-    Mat frame;
-    cap >> frame;
-
-    oclMat d_frame(frame);
-
-    cv::ocl::MOG mog;
-    cv::ocl::MOG2 mog2;
-
-    oclMat d_fgmask, d_fgimg, d_bgimg;
-
-    d_fgimg.create(d_frame.size(), d_frame.type());
-
-    Mat fgmask, fgimg, bgimg;
-
-    switch (m)
-    {
-    case M_MOG:
-        mog(d_frame, d_fgmask, 0.01f);
-        break;
-
-    case M_MOG2:
-        mog2(d_frame, d_fgmask);
-        break;
-    }
-
-    for (;;)
-    {
-        cap >> frame;
-        if (frame.empty())
-            break;
-        d_frame.upload(frame);
-
-        int64 start = cv::getTickCount();
-
-        //update the model
-        switch (m)
-        {
-        case M_MOG:
-            mog(d_frame, d_fgmask, 0.01f);
-            mog.getBackgroundImage(d_bgimg);
-            break;
-
-        case M_MOG2:
-            mog2(d_frame, d_fgmask);
-            mog2.getBackgroundImage(d_bgimg);
-            break;
-        }
-
-        double fps = cv::getTickFrequency() / (cv::getTickCount() - start);
-        std::cout << "FPS : " << fps << std::endl;
-
-        d_fgimg.setTo(Scalar::all(0));
-        d_frame.copyTo(d_fgimg, d_fgmask);
-
-        d_fgmask.download(fgmask);
-        d_fgimg.download(fgimg);
-        if (!d_bgimg.empty())
-            d_bgimg.download(bgimg);
-
-        imshow("image", frame);
-        imshow("foreground mask", fgmask);
-        imshow("foreground image", fgimg);
-        if (!bgimg.empty())
-            imshow("mean background image", bgimg);
-
-        if (27 == waitKey(30))
-            break;
-    }
-
-    return EXIT_SUCCESS;
-}
diff --git a/samples/ocl/clahe.cpp b/samples/ocl/clahe.cpp
deleted file mode 100644
index 894a414..0000000
--- a/samples/ocl/clahe.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-#include <iostream>
-#include "opencv2/core/core.hpp"
-#include "opencv2/core/utility.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/ocl/ocl.hpp"
-using namespace cv;
-using namespace std;
-
-Ptr<CLAHE> pFilter;
-int tilesize;
-int cliplimit;
-
-static void TSize_Callback(int pos)
-{
-    if(pos==0)
-        pFilter->setTilesGridSize(Size(1,1));
-    else
-        pFilter->setTilesGridSize(Size(tilesize,tilesize));
-}
-
-static void Clip_Callback(int)
-{
-    pFilter->setClipLimit(cliplimit);
-}
-
-int main(int argc, char** argv)
-{
-    const char* keys =
-        "{ i input   |                    | specify input image }"
-        "{ c camera  |    0               | specify camera id   }"
-        "{ s use_cpu |    false           | use cpu algorithm   }"
-        "{ o output  | clahe_output.jpg   | specify output save path}"
-        "{ h help    | false              | print help message }";
-
-    cv::CommandLineParser cmd(argc, argv, keys);
-    if (cmd.has("help"))
-    {
-        cout << "Usage : clahe [options]" << endl;
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-
-    string infile = cmd.get<string>("i"), outfile = cmd.get<string>("o");
-    int camid = cmd.get<int>("c");
-    bool use_cpu = cmd.get<bool>("s");
-    VideoCapture capture;
-
-    namedWindow("CLAHE");
-    createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback);
-    createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback);
-
-    Mat frame, outframe;
-    ocl::oclMat d_outframe, d_frame;
-
-    int cur_clip;
-    Size cur_tilesize;
-    pFilter = use_cpu ? createCLAHE() : ocl::createCLAHE();
-
-    cur_clip = (int)pFilter->getClipLimit();
-    cur_tilesize = pFilter->getTilesGridSize();
-    setTrackbarPos("Tile Size", "CLAHE", cur_tilesize.width);
-    setTrackbarPos("Clip Limit", "CLAHE", cur_clip);
-
-    if(infile != "")
-    {
-        frame = imread(infile);
-        if(frame.empty())
-        {
-            cout << "error read image: " << infile << endl;
-            return EXIT_FAILURE;
-        }
-    }
-    else
-        capture.open(camid);
-
-    cout << "\nControls:\n"
-         << "\to - save output image\n"
-         << "\tESC - exit\n";
-
-    for (;;)
-    {
-        if(capture.isOpened())
-            capture.read(frame);
-        else
-            frame = imread(infile);
-        if(frame.empty())
-            continue;
-
-        if(use_cpu)
-        {
-            cvtColor(frame, frame, COLOR_BGR2GRAY);
-            pFilter->apply(frame, outframe);
-        }
-        else
-        {
-            ocl::cvtColor(d_frame = frame, d_outframe, COLOR_BGR2GRAY);
-            pFilter->apply(d_outframe, d_outframe);
-            d_outframe.download(outframe);
-        }
-
-        imshow("CLAHE", outframe);
-
-        char key = (char)waitKey(3);
-        if(key == 'o')
-            imwrite(outfile, outframe);
-        else if(key == 27)
-            break;
-    }
-    return EXIT_SUCCESS;
-}
diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp
deleted file mode 100644
index c505932..0000000
--- a/samples/ocl/facedetect.cpp
+++ /dev/null
@@ -1,390 +0,0 @@
-#include "opencv2/objdetect/objdetect.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/ocl/ocl.hpp"
-
-#include "opencv2/highgui/highgui_c.h"
-
-#include <iostream>
-#include <stdio.h>
-
-#if defined(_MSC_VER) && (_MSC_VER >= 1700)
-    # include <thread>
-#endif
-
-using namespace std;
-using namespace cv;
-
-#define LOOP_NUM 1
-#define MAX_THREADS 10
-
-
-///////////////////////////single-threading faces detecting///////////////////////////////
-
-const static Scalar colors[] =  { CV_RGB(0,0,255),
-                                  CV_RGB(0,128,255),
-                                  CV_RGB(0,255,255),
-                                  CV_RGB(0,255,0),
-                                  CV_RGB(255,128,0),
-                                  CV_RGB(255,255,0),
-                                  CV_RGB(255,0,0),
-                                  CV_RGB(255,0,255)
-                                } ;
-
-
-int64 work_begin[MAX_THREADS] = {0};
-int64 work_total[MAX_THREADS] = {0};
-string inputName, outputName, cascadeName;
-
-static void workBegin(int i = 0)
-{
-    work_begin[i] = getTickCount();
-}
-
-static void workEnd(int i = 0)
-{
-    work_total[i] += (getTickCount() - work_begin[i]);
-}
-
-static double getTotalTime(int i = 0)
-{
-    return work_total[i] /getTickFrequency() * 1000.;
-}
-
-
-static void detect( Mat& img, vector<Rect>& faces,
-             ocl::OclCascadeClassifier& cascade,
-             double scale);
-
-
-static void detectCPU( Mat& img, vector<Rect>& faces,
-                CascadeClassifier& cascade,
-                double scale);
-
-static void Draw(Mat& img, vector<Rect>& faces, double scale);
-
-
-// This function test if gpu_rst matches cpu_rst.
-// If the two vectors are not equal, it will return the difference in vector size
-// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
-double checkRectSimilarity(Size sz, vector<Rect>& cpu_rst, vector<Rect>& gpu_rst);
-
-static int facedetect_one_thread(bool useCPU, double scale )
-{
-    CvCapture* capture = 0;
-    Mat frame, frameCopy0, frameCopy, image;
-
-    ocl::OclCascadeClassifier cascade;
-    CascadeClassifier  cpu_cascade;
-
-    if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
-    {
-        cout << "ERROR: Could not load classifier cascade: " << cascadeName << endl;
-        return EXIT_FAILURE;
-    }
-
-    if( inputName.empty() )
-    {
-        capture = cvCaptureFromCAM(0);
-        if(!capture)
-            cout << "Capture from CAM 0 didn't work" << endl;
-    }
-    else
-    {
-        image = imread( inputName, CV_LOAD_IMAGE_COLOR );
-        if( image.empty() )
-        {
-            capture = cvCaptureFromAVI( inputName.c_str() );
-            if(!capture)
-                cout << "Capture from AVI didn't work" << endl;
-            return EXIT_FAILURE;
-        }
-    }
-
-    if( capture )
-    {
-        cout << "In capture ..." << endl;
-        for(;;)
-        {
-            IplImage* iplImg = cvQueryFrame( capture );
-            frame = cv::cvarrToMat(iplImg);
-            vector<Rect> faces;
-            if( frame.empty() )
-                break;
-            if( iplImg->origin == IPL_ORIGIN_TL )
-                frame.copyTo( frameCopy0 );
-            else
-                flip( frame, frameCopy0, 0 );
-            if( scale == 1)
-                frameCopy0.copyTo(frameCopy);
-            else
-                resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR);
-
-            if(useCPU)
-                detectCPU(frameCopy, faces, cpu_cascade, 1);
-            else
-                detect(frameCopy, faces, cascade, 1);
-
-            Draw(frameCopy, faces, 1);
-            if( waitKey( 10 ) >= 0 )
-                break;
-        }
-        cvReleaseCapture( &capture );
-    }
-    else
-    {
-        cout << "In image read " << image.size() << endl;
-        vector<Rect> faces;
-        vector<Rect> ref_rst;
-        double accuracy = 0.;
-        detectCPU(image, ref_rst, cpu_cascade, scale);
-
-        cout << "loops: ";
-        for(int i = 0; i <= LOOP_NUM; i ++)
-        {
-            cout << i << ", ";
-            if(useCPU)
-                detectCPU(image, faces, cpu_cascade, scale);
-            else
-            {
-                detect(image, faces, cascade, scale);
-                if(i == 0)
-                {
-                    accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
-                }
-            }
-        }
-        cout << "done!" << endl;
-        if (useCPU)
-            cout << "average CPU time (noCamera) : ";
-        else
-            cout << "average GPU time (noCamera) : ";
-        cout << getTotalTime() / LOOP_NUM << " ms" << endl;
-        cout << "accuracy value: " << accuracy <<endl;
-
-        Draw(image, faces, scale);
-        waitKey(0);
-    }
-
-    cvDestroyWindow("result");
-    std::cout<< "single-threaded sample has finished" <<std::endl;
-    return 0;
-}
-
-///////////////////////////////////////detectfaces with multithreading////////////////////////////////////////////
-#if defined(_MSC_VER) && (_MSC_VER >= 1700)
-
-static void detectFaces(std::string fileName, int threadNum)
-{
-    ocl::OclCascadeClassifier cascade;
-    if(!cascade.load(cascadeName))
-    {
-        std::cout << "ERROR: Could not load classifier cascade: " << cascadeName << std::endl;
-        return;
-    }
-
-    Mat img = imread(fileName, CV_LOAD_IMAGE_COLOR);
-    if (img.empty())
-    {
-        std::cout << '[' << threadNum << "] " << "can't open file " + fileName <<std::endl;
-        return;
-    }
-
-    ocl::oclMat d_img;
-    d_img.upload(img);
-
-    std::vector<Rect> oclfaces;
-    std::thread::id tid = std::this_thread::get_id();
-    std::cout << '[' << threadNum << "] "
-        << "ThreadID = " << tid
-        << ", CommandQueue = " << *(void**)ocl::getClCommandQueuePtr()
-        << endl;
-    for(int i = 0; i <= LOOP_NUM; i++)
-    {
-        if(i>0) workBegin(threadNum);
-        cascade.detectMultiScale(d_img, oclfaces,  1.1, 3, 0|CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0));
-        if(i>0) workEnd(threadNum);
-    }
-    std::cout << '[' << threadNum << "] " << "Average time = " << getTotalTime(threadNum) / LOOP_NUM << " ms" << endl;
-
-    for(unsigned int i = 0; i<oclfaces.size(); i++)
-        rectangle(img, Point(oclfaces[i].x, oclfaces[i].y), Point(oclfaces[i].x + oclfaces[i].width, oclfaces[i].y + oclfaces[i].height), colors[i%8], 3);
-
-    std::string::size_type pos = outputName.rfind('.');
-    std::string strTid = std::to_string(_threadid);
-    if( !outputName.empty() )
-    {
-        if(pos == std::string::npos)
-        {
-            std::cout << "Invalid output file name: " << outputName << std::endl;
-        }
-        else
-        {
-            std::string outputNameTid = outputName.substr(0, pos) + "_" + strTid + outputName.substr(pos);
-            imwrite(outputNameTid, img);
-        }
-    }
-    imshow(strTid, img);
-    waitKey(0);
-}
-
-static void facedetect_multithreading(int nthreads)
-{
-    int thread_number = MAX_THREADS < nthreads ? MAX_THREADS : nthreads;
-    std::vector<std::thread> threads;
-    for(int i = 0; i<thread_number; i++)
-        threads.push_back(std::thread(detectFaces, inputName, i));
-    for(int i = 0; i<thread_number; i++)
-        threads[i].join();
-}
-#endif
-
-int main( int argc, const char** argv )
-{
-
-    const char* keys =
-        "{ h help       | false       | print help message }"
-        "{ i input      |             | specify input image }"
-        "{ t template   | haarcascade_frontalface_alt.xml |"
-        " specify template file path }"
-        "{ c scale      |   1.0       | scale image }"
-        "{ s use_cpu    | false       | use cpu or gpu to process the image }"
-        "{ o output     | | specify output image save path(only works when input is images) }"
-        "{ n thread_num |      1      | set number of threads >= 1 }";
-
-    CommandLineParser cmd(argc, argv, keys);
-    if (cmd.has("help"))
-    {
-        cout << "Usage : facedetect [options]" << endl;
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-    bool useCPU = cmd.get<bool>("s");
-    inputName = cmd.get<string>("i");
-    outputName = cmd.get<string>("o");
-    cascadeName = cmd.get<string>("t");
-    double scale = cmd.get<double>("c");
-    int n = cmd.get<int>("n");
-
-    if(n > 1)
-    {
-#if defined(_MSC_VER) && (_MSC_VER >= 1700)
-            std::cout<<"multi-threaded sample is running" <<std::endl;
-            facedetect_multithreading(n);
-            std::cout<<"multi-threaded sample has finished" <<std::endl;
-            return 0;
-#else
-            std::cout << "std::thread is not supported, running a single-threaded version" << std::endl;
-#endif
-    }
-    if (n<0)
-        std::cout<<"incorrect number of threads:" << n << ", running a single-threaded version" <<std::endl;
-    else
-        std::cout<<"single-threaded sample is running" <<std::endl;
-    return facedetect_one_thread(useCPU, scale);
-
-}
-
-void detect( Mat& img, vector<Rect>& faces,
-             ocl::OclCascadeClassifier& cascade,
-             double scale)
-{
-    ocl::oclMat image(img);
-    ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
-    workBegin();
-    ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
-    ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
-    ocl::equalizeHist( smallImg, smallImg );
-
-    cascade.detectMultiScale( smallImg, faces, 1.1,
-                              3, 0
-                              |CASCADE_SCALE_IMAGE
-                              , Size(30,30), Size(0, 0) );
-    workEnd();
-}
-
-void detectCPU( Mat& img, vector<Rect>& faces,
-                CascadeClassifier& cascade,
-                double scale)
-{
-    workBegin();
-    Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
-    cvtColor(img, cpu_gray, COLOR_BGR2GRAY);
-    resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
-    equalizeHist(cpu_smallImg, cpu_smallImg);
-    cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
-                             3, 0 | CASCADE_SCALE_IMAGE,
-                             Size(30, 30), Size(0, 0));
-    workEnd();
-}
-
-
-void Draw(Mat& img, vector<Rect>& faces, double scale)
-{
-    int i = 0;
-    for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
-    {
-        Point center;
-        Scalar color = colors[i%8];
-        int radius;
-        center.x = cvRound((r->x + r->width*0.5)*scale);
-        center.y = cvRound((r->y + r->height*0.5)*scale);
-        radius = cvRound((r->width + r->height)*0.25*scale);
-        circle( img, center, radius, color, 3, 8, 0 );
-    }
-    //if( !outputName.empty() ) imwrite( outputName, img );
-    if( abs(scale-1.0)>.001 )
-    {
-        resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));
-    }
-    imshow( "result", img );
-
-}
-
-
-double checkRectSimilarity(Size sz, vector<Rect>& ob1, vector<Rect>& ob2)
-{
-    double final_test_result = 0.0;
-    size_t sz1 = ob1.size();
-    size_t sz2 = ob2.size();
-
-    if(sz1 != sz2)
-    {
-        return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
-    }
-    else
-    {
-        if(sz1==0 && sz2==0)
-            return 0;
-        Mat cpu_result(sz, CV_8UC1);
-        cpu_result.setTo(0);
-
-        for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
-        {
-            Mat cpu_result_roi(cpu_result, *r);
-            cpu_result_roi.setTo(1);
-            cpu_result.copyTo(cpu_result);
-        }
-        int cpu_area = countNonZero(cpu_result > 0);
-
-
-        Mat gpu_result(sz, CV_8UC1);
-        gpu_result.setTo(0);
-        for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
-        {
-            cv::Mat gpu_result_roi(gpu_result, *r2);
-            gpu_result_roi.setTo(1);
-            gpu_result.copyTo(gpu_result);
-        }
-
-        Mat result_;
-        multiply(cpu_result, gpu_result, result_);
-        int result = countNonZero(result_ > 0);
-        if(cpu_area!=0 && result!=0)
-            final_test_result = 1.0 - (double)result/(double)cpu_area;
-        else if(cpu_area==0 && result!=0)
-            final_test_result = -1;
-    }
-    return final_test_result;
-}
diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp
deleted file mode 100644
index a3c5c99..0000000
--- a/samples/ocl/hog.cpp
+++ /dev/null
@@ -1,448 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <sstream>
-#include <iomanip>
-#include <stdexcept>
-#include <opencv2/core/utility.hpp>
-#include "opencv2/ocl.hpp"
-#include "opencv2/highgui.hpp"
-
-using namespace std;
-using namespace cv;
-
-class App
-{
-public:
-    App(CommandLineParser& cmd);
-    void run();
-    void handleKey(char key);
-    void hogWorkBegin();
-    void hogWorkEnd();
-    string hogWorkFps() const;
-    void workBegin();
-    void workEnd();
-    string workFps() const;
-    string message() const;
-
-
-// This function test if gpu_rst matches cpu_rst.
-// If the two vectors are not equal, it will return the difference in vector size
-// Else if will return
-// (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
-    double checkRectSimilarity(Size sz,
-                               std::vector<Rect>& cpu_rst,
-                               std::vector<Rect>& gpu_rst);
-private:
-    App operator=(App&);
-
-    //Args args;
-    bool running;
-    bool use_gpu;
-    bool make_gray;
-    double scale;
-    double resize_scale;
-    int win_width;
-    int win_stride_width, win_stride_height;
-    int gr_threshold;
-    int nlevels;
-    double hit_threshold;
-    bool gamma_corr;
-
-    int64 hog_work_begin;
-    double hog_work_fps;
-    int64 work_begin;
-    double work_fps;
-
-    string img_source;
-    string vdo_source;
-    string output;
-    int camera_id;
-    bool write_once;
-};
-
-int main(int argc, char** argv)
-{
-    const char* keys =
-        "{ h |  help    | false          | print help message }"
-        "{ i |  input   |                | specify input image}"
-        "{ c | camera   | -1             | enable camera capturing }"
-        "{ v | video    |                | use video as input }"
-        "{ g |  gray    | false          | convert image to gray one or not}"
-        "{ s |  scale   | 1.0            | resize the image before detect}"
-        "{ l |larger_win| false          | use 64x128 window}"
-        "{ o |  output  |                | specify output path when input is images}";
-    CommandLineParser cmd(argc, argv, keys);
-    if (cmd.has("help"))
-    {
-        cout << "Usage : hog [options]" << endl;
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-
-    App app(cmd);
-    try
-    {
-        app.run();
-    }
-    catch (const Exception& e)
-    {
-        return cout << "error: "  << e.what() << endl, 1;
-    }
-    catch (const exception& e)
-    {
-        return cout << "error: "  << e.what() << endl, 1;
-    }
-    catch(...)
-    {
-        return cout << "unknown exception" << endl, 1;
-    }
-    return EXIT_SUCCESS;
-}
-
-App::App(CommandLineParser& cmd)
-{
-    cout << "\nControls:\n"
-         << "\tESC - exit\n"
-         << "\tm - change mode GPU <-> CPU\n"
-         << "\tg - convert image to gray or not\n"
-         << "\to - save output image once, or switch on/off video save\n"
-         << "\t1/q - increase/decrease HOG scale\n"
-         << "\t2/w - increase/decrease levels count\n"
-         << "\t3/e - increase/decrease HOG group threshold\n"
-         << "\t4/r - increase/decrease hit threshold\n"
-         << endl;
-
-
-    use_gpu = true;
-    make_gray = cmd.get<bool>("g");
-    resize_scale = cmd.get<double>("s");
-    win_width = cmd.get<bool>("l") == true ? 64 : 48;
-    vdo_source = cmd.get<string>("v");
-    img_source = cmd.get<string>("i");
-    output = cmd.get<string>("o");
-    camera_id = cmd.get<int>("c");
-
-    win_stride_width = 8;
-    win_stride_height = 8;
-    gr_threshold = 8;
-    nlevels = 13;
-    hit_threshold = win_width == 48 ? 1.4 : 0.;
-    scale = 1.05;
-    gamma_corr = true;
-    write_once = false;
-
-    cout << "Group threshold: " << gr_threshold << endl;
-    cout << "Levels number: " << nlevels << endl;
-    cout << "Win width: " << win_width << endl;
-    cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n";
-    cout << "Hit threshold: " << hit_threshold << endl;
-    cout << "Gamma correction: " << gamma_corr << endl;
-    cout << endl;
-}
-
-void App::run()
-{
-    running = true;
-    VideoWriter video_writer;
-
-    Size win_size(win_width, win_width * 2);
-    Size win_stride(win_stride_width, win_stride_height);
-
-    // Create HOG descriptors and detectors here
-    vector<float> detector;
-    if (win_size == Size(64, 128))
-        detector = ocl::HOGDescriptor::getPeopleDetector64x128();
-    else
-        detector = ocl::HOGDescriptor::getPeopleDetector48x96();
-
-
-    ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
-                               ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
-                               ocl::HOGDescriptor::DEFAULT_NLEVELS);
-    HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
-                          HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
-    gpu_hog.setSVMDetector(detector);
-    cpu_hog.setSVMDetector(detector);
-
-    while (running)
-    {
-        VideoCapture vc;
-        Mat frame;
-
-        if (vdo_source!="")
-        {
-            vc.open(vdo_source.c_str());
-            if (!vc.isOpened())
-                throw runtime_error(string("can't open video file: " + vdo_source));
-            vc >> frame;
-        }
-        else if (camera_id != -1)
-        {
-            vc.open(camera_id);
-            if (!vc.isOpened())
-            {
-                stringstream msg;
-                msg << "can't open camera: " << camera_id;
-                throw runtime_error(msg.str());
-            }
-            vc >> frame;
-        }
-        else
-        {
-            frame = imread(img_source);
-            if (frame.empty())
-                throw runtime_error(string("can't open image file: " + img_source));
-        }
-
-        Mat img_aux, img, img_to_show;
-        ocl::oclMat gpu_img;
-
-        // Iterate over all frames
-        bool verify = false;
-        while (running && !frame.empty())
-        {
-            workBegin();
-
-            // Change format of the image
-            if (make_gray) cvtColor(frame, img_aux, COLOR_BGR2GRAY);
-            else if (use_gpu) cvtColor(frame, img_aux, COLOR_BGR2BGRA);
-            else frame.copyTo(img_aux);
-
-            // Resize image
-            if (abs(scale-1.0)>0.001)
-            {
-                Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale));
-                resize(img_aux, img, sz);
-            }
-            else img = img_aux;
-            img_to_show = img;
-            gpu_hog.nlevels = nlevels;
-            cpu_hog.nlevels = nlevels;
-            vector<Rect> found;
-
-            // Perform HOG classification
-            hogWorkBegin();
-            if (use_gpu)
-            {
-                gpu_img.upload(img);
-                gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
-                                         Size(0, 0), scale, gr_threshold);
-                if (!verify)
-                {
-                    // verify if GPU output same objects with CPU at 1st run
-                    verify = true;
-                    vector<Rect> ref_rst;
-                    cvtColor(img, img, COLOR_BGRA2BGR);
-                    cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride,
-                                             Size(0, 0), scale, gr_threshold-2);
-                    double accuracy = checkRectSimilarity(img.size(), ref_rst, found);
-                    cout << "\naccuracy value: " << accuracy << endl;
-                }
-            }
-            else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
-                                              Size(0, 0), scale, gr_threshold);
-            hogWorkEnd();
-
-
-            // Draw positive classified windows
-            for (size_t i = 0; i < found.size(); i++)
-            {
-                Rect r = found[i];
-                rectangle(img_to_show, r.tl(), r.br(), Scalar(0, 255, 0), 3);
-            }
-
-            if (use_gpu)
-                putText(img_to_show, "Mode: GPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
-            else
-                putText(img_to_show, "Mode: CPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
-            putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
-            putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
-            imshow("opencv_gpu_hog", img_to_show);
-            if (vdo_source!="" || camera_id!=-1) vc >> frame;
-
-            workEnd();
-
-            if (output!="" && write_once)
-            {
-                if (img_source!="")     // wirte image
-                {
-                    write_once = false;
-                    imwrite(output, img_to_show);
-                }
-                else                    //write video
-                {
-                    if (!video_writer.isOpened())
-                    {
-                        video_writer.open(output, VideoWriter::fourcc('x','v','i','d'), 24,
-                                          img_to_show.size(), true);
-                        if (!video_writer.isOpened())
-                            throw std::runtime_error("can't create video writer");
-                    }
-
-                    if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
-                    else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
-
-                    video_writer << img;
-                }
-            }
-
-            handleKey((char)waitKey(3));
-        }
-    }
-}
-
-void App::handleKey(char key)
-{
-    switch (key)
-    {
-    case 27:
-        running = false;
-        break;
-    case 'm':
-    case 'M':
-        use_gpu = !use_gpu;
-        cout << "Switched to " << (use_gpu ? "CUDA" : "CPU") << " mode\n";
-        break;
-    case 'g':
-    case 'G':
-        make_gray = !make_gray;
-        cout << "Convert image to gray: " << (make_gray ? "YES" : "NO") << endl;
-        break;
-    case '1':
-        scale *= 1.05;
-        cout << "Scale: " << scale << endl;
-        break;
-    case 'q':
-    case 'Q':
-        scale /= 1.05;
-        cout << "Scale: " << scale << endl;
-        break;
-    case '2':
-        nlevels++;
-        cout << "Levels number: " << nlevels << endl;
-        break;
-    case 'w':
-    case 'W':
-        nlevels = max(nlevels - 1, 1);
-        cout << "Levels number: " << nlevels << endl;
-        break;
-    case '3':
-        gr_threshold++;
-        cout << "Group threshold: " << gr_threshold << endl;
-        break;
-    case 'e':
-    case 'E':
-        gr_threshold = max(0, gr_threshold - 1);
-        cout << "Group threshold: " << gr_threshold << endl;
-        break;
-    case '4':
-        hit_threshold+=0.25;
-        cout << "Hit threshold: " << hit_threshold << endl;
-        break;
-    case 'r':
-    case 'R':
-        hit_threshold = max(0.0, hit_threshold - 0.25);
-        cout << "Hit threshold: " << hit_threshold << endl;
-        break;
-    case 'c':
-    case 'C':
-        gamma_corr = !gamma_corr;
-        cout << "Gamma correction: " << gamma_corr << endl;
-        break;
-    case 'o':
-    case 'O':
-        write_once = !write_once;
-        break;
-    }
-}
-
-
-inline void App::hogWorkBegin()
-{
-    hog_work_begin = getTickCount();
-}
-
-inline void App::hogWorkEnd()
-{
-    int64 delta = getTickCount() - hog_work_begin;
-    double freq = getTickFrequency();
-    hog_work_fps = freq / delta;
-}
-
-inline string App::hogWorkFps() const
-{
-    stringstream ss;
-    ss << hog_work_fps;
-    return ss.str();
-}
-
-inline void App::workBegin()
-{
-    work_begin = getTickCount();
-}
-
-inline void App::workEnd()
-{
-    int64 delta = getTickCount() - work_begin;
-    double freq = getTickFrequency();
-    work_fps = freq / delta;
-}
-
-inline string App::workFps() const
-{
-    stringstream ss;
-    ss << work_fps;
-    return ss.str();
-}
-
-
-double App::checkRectSimilarity(Size sz,
-                                std::vector<Rect>& ob1,
-                                std::vector<Rect>& ob2)
-{
-    double final_test_result = 0.0;
-    size_t sz1 = ob1.size();
-    size_t sz2 = ob2.size();
-
-    if(sz1 != sz2)
-    {
-        return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
-    }
-    else
-    {
-        if(sz1==0 && sz2==0)
-            return 0;
-        cv::Mat cpu_result(sz, CV_8UC1);
-        cpu_result.setTo(0);
-
-
-        for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
-        {
-            cv::Mat cpu_result_roi(cpu_result, *r);
-            cpu_result_roi.setTo(1);
-            cpu_result.copyTo(cpu_result);
-        }
-        int cpu_area = cv::countNonZero(cpu_result > 0);
-
-
-        cv::Mat gpu_result(sz, CV_8UC1);
-        gpu_result.setTo(0);
-        for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
-        {
-            cv::Mat gpu_result_roi(gpu_result, *r2);
-            gpu_result_roi.setTo(1);
-            gpu_result.copyTo(gpu_result);
-        }
-
-        cv::Mat result_;
-        multiply(cpu_result, gpu_result, result_);
-        int result = cv::countNonZero(result_ > 0);
-        if(cpu_area!=0 && result!=0)
-            final_test_result = 1.0 - (double)result/(double)cpu_area;
-        else if(cpu_area==0 && result!=0)
-            final_test_result = -1;
-    }
-    return final_test_result;
-}
diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp
deleted file mode 100644
index 89137d9..0000000
--- a/samples/ocl/pyrlk_optical_flow.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <iomanip>
-
-#include "opencv2/core/utility.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/ocl/ocl.hpp"
-#include "opencv2/video/video.hpp"
-
-using namespace std;
-using namespace cv;
-using namespace cv::ocl;
-
-typedef unsigned char uchar;
-#define LOOP_NUM 10
-int64 work_begin = 0;
-int64 work_end = 0;
-
-static void workBegin()
-{
-    work_begin = getTickCount();
-}
-static void workEnd()
-{
-    work_end += (getTickCount() - work_begin);
-}
-static double getTime()
-{
-    return work_end * 1000. / getTickFrequency();
-}
-
-static void download(const oclMat& d_mat, vector<Point2f>& vec)
-{
-    vec.clear();
-    vec.resize(d_mat.cols);
-    Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]);
-    d_mat.download(mat);
-}
-
-static void download(const oclMat& d_mat, vector<uchar>& vec)
-{
-    vec.clear();
-    vec.resize(d_mat.cols);
-    Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]);
-    d_mat.download(mat);
-}
-
-static void drawArrows(Mat& frame, const vector<Point2f>& prevPts, const vector<Point2f>& nextPts, const vector<uchar>& status,
-                       Scalar line_color = Scalar(0, 0, 255))
-{
-    for (size_t i = 0; i < prevPts.size(); ++i)
-    {
-        if (status[i])
-        {
-            int line_thickness = 1;
-
-            Point p = prevPts[i];
-            Point q = nextPts[i];
-
-            double angle = atan2((double) p.y - q.y, (double) p.x - q.x);
-
-            double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) );
-
-            if (hypotenuse < 1.0)
-                continue;
-
-            // Here we lengthen the arrow by a factor of three.
-            q.x = (int) (p.x - 3 * hypotenuse * cos(angle));
-            q.y = (int) (p.y - 3 * hypotenuse * sin(angle));
-
-            // Now we draw the main line of the arrow.
-            line(frame, p, q, line_color, line_thickness);
-
-            // Now draw the tips of the arrow. I do some scaling so that the
-            // tips look proportional to the main line of the arrow.
-
-            p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4));
-            p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4));
-            line(frame, p, q, line_color, line_thickness);
-
-            p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4));
-            p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4));
-            line(frame, p, q, line_color, line_thickness);
-        }
-    }
-}
-
-
-int main(int argc, const char* argv[])
-{
-    const char* keys =
-        "{ help h           | false           | print help message }"
-        "{ left l           |                 | specify left image }"
-        "{ right r          |                 | specify right image }"
-        "{ camera c         | 0               | enable camera capturing }"
-        "{ use_cpu s        | false           | use cpu or gpu to process the image }"
-        "{ video v          |                 | use video as input }"
-        "{ output o         | pyrlk_output.jpg| specify output save path when input is images }"
-        "{ points           | 1000            | specify points count [GoodFeatureToTrack] }"
-        "{ min_dist         | 0               | specify minimal distance between points [GoodFeatureToTrack] }";
-
-    CommandLineParser cmd(argc, argv, keys);
-
-    if (cmd.has("help"))
-    {
-        cout << "Usage: pyrlk_optical_flow [options]" << endl;
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-
-    bool defaultPicturesFail = false;
-    string fname0 = cmd.get<string>("left");
-    string fname1 = cmd.get<string>("right");
-    string vdofile = cmd.get<string>("video");
-    string outfile = cmd.get<string>("output");
-    int points = cmd.get<int>("points");
-    double minDist = cmd.get<double>("min_dist");
-    bool useCPU = cmd.has("s");
-    int inputName = cmd.get<int>("c");
-
-    oclMat d_nextPts, d_status;
-    GoodFeaturesToTrackDetector_OCL d_features(points);
-    Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
-    Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
-    PyrLKOpticalFlow d_pyrLK;
-    vector<cv::Point2f> pts(points);
-    vector<cv::Point2f> nextPts(points);
-    vector<unsigned char> status(points);
-    vector<float> err;
-
-    cout << "Points count : " << points << endl << endl;
-
-    if (frame0.empty() || frame1.empty())
-    {
-        VideoCapture capture;
-        Mat frame, frameCopy;
-        Mat frame0Gray, frame1Gray;
-        Mat ptr0, ptr1;
-
-        if(vdofile.empty())
-            capture.open( inputName );
-        else
-            capture.open(vdofile.c_str());
-
-        int c = inputName ;
-        if(!capture.isOpened())
-        {
-            if(vdofile.empty())
-                cout << "Capture from CAM " << c << " didn't work" << endl;
-            else
-                cout << "Capture from file " << vdofile << " failed" <<endl;
-            if (defaultPicturesFail)
-                return EXIT_FAILURE;
-            goto nocamera;
-        }
-
-        cout << "In capture ..." << endl;
-        for(int i = 0;; i++)
-        {
-            if( !capture.read(frame) )
-                break;
-
-            if (i == 0)
-            {
-                frame.copyTo( frame0 );
-                cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
-            }
-            else
-            {
-                if (i%2 == 1)
-                {
-                    frame.copyTo(frame1);
-                    cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
-                    ptr0 = frame0Gray;
-                    ptr1 = frame1Gray;
-                }
-                else
-                {
-                    frame.copyTo(frame0);
-                    cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
-                    ptr0 = frame1Gray;
-                    ptr1 = frame0Gray;
-                }
-
-                if (useCPU)
-                {
-                    pts.clear();
-                    goodFeaturesToTrack(ptr0, pts, points, 0.01, 0.0);
-                    if(pts.size() == 0)
-                        continue;
-                    calcOpticalFlowPyrLK(ptr0, ptr1, pts, nextPts, status, err);
-                }
-                else
-                {
-                    oclMat d_img(ptr0), d_prevPts;
-                    d_features(d_img, d_prevPts);
-                    if(!d_prevPts.rows || !d_prevPts.cols)
-                        continue;
-                    d_pyrLK.sparse(d_img, oclMat(ptr1), d_prevPts, d_nextPts, d_status);
-                    d_features.downloadPoints(d_prevPts,pts);
-                    download(d_nextPts, nextPts);
-                    download(d_status, status);
-                }
-                if (i%2 == 1)
-                    frame1.copyTo(frameCopy);
-                else
-                    frame0.copyTo(frameCopy);
-                drawArrows(frameCopy, pts, nextPts, status, Scalar(255, 0, 0));
-                imshow("PyrLK [Sparse]", frameCopy);
-            }
-
-            if( waitKey( 10 ) >= 0 )
-                break;
-        }
-
-        capture.release();
-    }
-    else
-    {
-nocamera:
-        for(int i = 0; i <= LOOP_NUM; i ++)
-        {
-            cout << "loop" << i << endl;
-            if (i > 0) workBegin();
-
-            if (useCPU)
-            {
-                goodFeaturesToTrack(frame0, pts, points, 0.01, minDist);
-                calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
-            }
-            else
-            {
-                oclMat d_img(frame0), d_prevPts;
-                d_features(d_img, d_prevPts);
-                d_pyrLK.sparse(d_img, oclMat(frame1), d_prevPts, d_nextPts, d_status);
-                d_features.downloadPoints(d_prevPts, pts);
-                download(d_nextPts, nextPts);
-                download(d_status, status);
-            }
-
-            if (i > 0 && i <= LOOP_NUM)
-                workEnd();
-
-            if (i == LOOP_NUM)
-            {
-                if (useCPU)
-                    cout << "average CPU time (noCamera) : ";
-                else
-                    cout << "average GPU time (noCamera) : ";
-
-                cout << getTime() / LOOP_NUM << " ms" << endl;
-
-                drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
-                imshow("PyrLK [Sparse]", frame0);
-                imwrite(outfile, frame0);
-            }
-        }
-    }
-
-    waitKey();
-
-    return EXIT_SUCCESS;
-}
diff --git a/samples/ocl/squares.cpp b/samples/ocl/squares.cpp
deleted file mode 100644
index b53648f..0000000
--- a/samples/ocl/squares.cpp
+++ /dev/null
@@ -1,341 +0,0 @@
-// The "Square Detector" program.
-// It loads several images sequentially and tries to find squares in
-// each image
-
-#include "opencv2/core.hpp"
-#include "opencv2/core/utility.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/ocl/ocl.hpp"
-#include <iostream>
-#include <math.h>
-#include <string.h>
-
-using namespace cv;
-using namespace std;
-
-#define ACCURACY_CHECK
-
-#ifdef ACCURACY_CHECK
-// check if two vectors of vector of points are near or not
-// prior assumption is that they are in correct order
-static bool checkPoints(
-    vector< vector<Point> > set1,
-    vector< vector<Point> > set2,
-    int maxDiff = 5)
-{
-    if(set1.size() != set2.size())
-    {
-        return false;
-    }
-
-    for(vector< vector<Point> >::iterator it1 = set1.begin(), it2 = set2.begin();
-            it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++)
-    {
-        vector<Point> pts1 = *it1;
-        vector<Point> pts2 = *it2;
-
-
-        if(pts1.size() != pts2.size())
-        {
-            return false;
-        }
-        for(size_t i = 0; i < pts1.size(); i ++)
-        {
-            Point pt1 = pts1[i], pt2 = pts2[i];
-            if(std::abs(pt1.x - pt2.x) > maxDiff ||
-                    std::abs(pt1.y - pt2.y) > maxDiff)
-            {
-                return false;
-            }
-        }
-    }
-    return true;
-}
-#endif
-
-int thresh = 50, N = 11;
-const char* wndname = "OpenCL Square Detection Demo";
-
-
-// helper function:
-// finds a cosine of angle between vectors
-// from pt0->pt1 and from pt0->pt2
-static double angle( Point pt1, Point pt2, Point pt0 )
-{
-    double dx1 = pt1.x - pt0.x;
-    double dy1 = pt1.y - pt0.y;
-    double dx2 = pt2.x - pt0.x;
-    double dy2 = pt2.y - pt0.y;
-    return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
-}
-
-
-// returns sequence of squares detected on the image.
-// the sequence is stored in the specified memory storage
-static void findSquares( const Mat& image, vector<vector<Point> >& squares )
-{
-    squares.clear();
-    Mat pyr, timg, gray0(image.size(), CV_8U), gray;
-
-    // down-scale and upscale the image to filter out the noise
-    pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
-    pyrUp(pyr, timg, image.size());
-    vector<vector<Point> > contours;
-
-    // find squares in every color plane of the image
-    for( int c = 0; c < 3; c++ )
-    {
-        int ch[] = {c, 0};
-        mixChannels(&timg, 1, &gray0, 1, ch, 1);
-
-        // try several threshold levels
-        for( int l = 0; l < N; l++ )
-        {
-            // hack: use Canny instead of zero threshold level.
-            // Canny helps to catch squares with gradient shading
-            if( l == 0 )
-            {
-                // apply Canny. Take the upper threshold from slider
-                // and set the lower to 0 (which forces edges merging)
-                Canny(gray0, gray, 0, thresh, 5);
-                // dilate canny output to remove potential
-                // holes between edge segments
-                dilate(gray, gray, Mat(), Point(-1,-1));
-            }
-            else
-            {
-                // apply threshold if l!=0:
-                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
-                cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
-            }
-
-            // find contours and store them all as a list
-            findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
-
-            vector<Point> approx;
-
-            // test each contour
-            for( size_t i = 0; i < contours.size(); i++ )
-            {
-                // approximate contour with accuracy proportional
-                // to the contour perimeter
-                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
-
-                // square contours should have 4 vertices after approximation
-                // relatively large area (to filter out noisy contours)
-                // and be convex.
-                // Note: absolute value of an area is used because
-                // area may be positive or negative - in accordance with the
-                // contour orientation
-                if( approx.size() == 4 &&
-                        fabs(contourArea(Mat(approx))) > 1000 &&
-                        isContourConvex(Mat(approx)) )
-                {
-                    double maxCosine = 0;
-
-                    for( int j = 2; j < 5; j++ )
-                    {
-                        // find the maximum cosine of the angle between joint edges
-                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
-                        maxCosine = MAX(maxCosine, cosine);
-                    }
-
-                    // if cosines of all angles are small
-                    // (all angles are ~90 degree) then write quandrange
-                    // vertices to resultant sequence
-                    if( maxCosine < 0.3 )
-                        squares.push_back(approx);
-                }
-            }
-        }
-    }
-}
-
-
-// returns sequence of squares detected on the image.
-// the sequence is stored in the specified memory storage
-static void findSquares_ocl( const Mat& image, vector<vector<Point> >& squares )
-{
-    squares.clear();
-
-    Mat gray;
-    cv::ocl::oclMat pyr_ocl, timg_ocl, gray0_ocl, gray_ocl;
-
-    // down-scale and upscale the image to filter out the noise
-    ocl::pyrDown(ocl::oclMat(image), pyr_ocl);
-    ocl::pyrUp(pyr_ocl, timg_ocl);
-
-    vector<vector<Point> > contours;
-    vector<cv::ocl::oclMat> gray0s;
-    ocl::split(timg_ocl, gray0s); // split 3 channels into a vector of oclMat
-    // find squares in every color plane of the image
-    for( int c = 0; c < 3; c++ )
-    {
-        gray0_ocl = gray0s[c];
-        // try several threshold levels
-        for( int l = 0; l < N; l++ )
-        {
-            // hack: use Canny instead of zero threshold level.
-            // Canny helps to catch squares with gradient shading
-            if( l == 0 )
-            {
-                // do canny on OpenCL device
-                // apply Canny. Take the upper threshold from slider
-                // and set the lower to 0 (which forces edges merging)
-                cv::ocl::Canny(gray0_ocl, gray_ocl, 0, thresh, 5);
-                // dilate canny output to remove potential
-                // holes between edge segments
-                ocl::dilate(gray_ocl, gray_ocl, Mat(), Point(-1,-1));
-                gray = Mat(gray_ocl);
-            }
-            else
-            {
-                // apply threshold if l!=0:
-                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
-                cv::ocl::threshold(gray0_ocl, gray_ocl, (l+1)*255/N, 255, THRESH_BINARY);
-                gray = gray_ocl;
-            }
-
-            // find contours and store them all as a list
-            findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
-
-            vector<Point> approx;
-            // test each contour
-            for( size_t i = 0; i < contours.size(); i++ )
-            {
-                // approximate contour with accuracy proportional
-                // to the contour perimeter
-                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
-
-                // square contours should have 4 vertices after approximation
-                // relatively large area (to filter out noisy contours)
-                // and be convex.
-                // Note: absolute value of an area is used because
-                // area may be positive or negative - in accordance with the
-                // contour orientation
-                if( approx.size() == 4 &&
-                        fabs(contourArea(Mat(approx))) > 1000 &&
-                        isContourConvex(Mat(approx)) )
-                {
-                    double maxCosine = 0;
-                    for( int j = 2; j < 5; j++ )
-                    {
-                        // find the maximum cosine of the angle between joint edges
-                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
-                        maxCosine = MAX(maxCosine, cosine);
-                    }
-
-                    // if cosines of all angles are small
-                    // (all angles are ~90 degree) then write quandrange
-                    // vertices to resultant sequence
-                    if( maxCosine < 0.3 )
-                        squares.push_back(approx);
-                }
-            }
-        }
-    }
-}
-
-
-// the function draws all the squares in the image
-static void drawSquares( Mat& image, const vector<vector<Point> >& squares )
-{
-    for( size_t i = 0; i < squares.size(); i++ )
-    {
-        const Point* p = &squares[i][0];
-        int n = (int)squares[i].size();
-        polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, LINE_AA);
-    }
-}
-
-
-// draw both pure-C++ and ocl square results onto a single image
-static Mat drawSquaresBoth( const Mat& image,
-                            const vector<vector<Point> >& sqsCPP,
-                            const vector<vector<Point> >& sqsOCL
-)
-{
-    Mat imgToShow(Size(image.cols * 2, image.rows), image.type());
-    Mat lImg = imgToShow(Rect(Point(0, 0), image.size()));
-    Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size()));
-    image.copyTo(lImg);
-    image.copyTo(rImg);
-    drawSquares(lImg, sqsCPP);
-    drawSquares(rImg, sqsOCL);
-    float fontScale = 0.8f;
-    Scalar white = Scalar::all(255), black = Scalar::all(0);
-
-    putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
-    putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
-    putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
-    putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
-
-    return imgToShow;
-}
-
-
-int main(int argc, char** argv)
-{
-    const char* keys =
-        "{ i | input   |                    | specify input image }"
-        "{ o | output  | squares_output.jpg | specify output save path}"
-        "{ h | help    | false              | print help message }";
-    CommandLineParser cmd(argc, argv, keys);
-    string inputName = cmd.get<string>("i");
-    string outfile = cmd.get<string>("o");
-
-    if(cmd.get<bool>("help"))
-    {
-        cout << "Usage : squares [options]" << endl;
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-
-    int iterations = 10;
-    namedWindow( wndname, WINDOW_AUTOSIZE );
-    vector<vector<Point> > squares_cpu, squares_ocl;
-
-    Mat image = imread(inputName, 1);
-    if( image.empty() )
-    {
-        cout << "Couldn't load " << inputName << endl;
-        return EXIT_FAILURE;
-    }
-
-    int j = iterations;
-    int64 t_ocl = 0, t_cpp = 0;
-    //warm-ups
-    cout << "warming up ..." << endl;
-    findSquares(image, squares_cpu);
-    findSquares_ocl(image, squares_ocl);
-
-
-#ifdef ACCURACY_CHECK
-    cout << "Checking ocl accuracy ... " << endl;
-    cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl;
-#endif
-    do
-    {
-        int64 t_start = cv::getTickCount();
-        findSquares(image, squares_cpu);
-        t_cpp += cv::getTickCount() - t_start;
-
-
-        t_start  = cv::getTickCount();
-        findSquares_ocl(image, squares_ocl);
-        t_ocl += cv::getTickCount() - t_start;
-        cout << "run loop: " << j << endl;
-    }
-    while(--j);
-    cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl;
-    cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl;
-
-    Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl);
-    imshow(wndname, result);
-    imwrite(outfile, result);
-    waitKey(0);
-
-    return EXIT_SUCCESS;
-}
diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp
deleted file mode 100644
index 880ad51..0000000
--- a/samples/ocl/stereo_match.cpp
+++ /dev/null
@@ -1,384 +0,0 @@
-#include <iostream>
-#include <string>
-#include <sstream>
-#include <iomanip>
-#include <stdexcept>
-
-#include "opencv2/core/utility.hpp"
-#include "opencv2/ocl/ocl.hpp"
-#include "opencv2/highgui/highgui.hpp"
-
-using namespace cv;
-using namespace std;
-using namespace ocl;
-
-
-struct App
-{
-    App(CommandLineParser& cmd);
-    void run();
-    void handleKey(char key);
-    void printParams() const;
-
-    void workBegin()
-    {
-        work_begin = getTickCount();
-    }
-    void workEnd()
-    {
-        int64 d = getTickCount() - work_begin;
-        double f = getTickFrequency();
-        work_fps = f / d;
-    }
-    string method_str() const
-    {
-        switch (method)
-        {
-        case BM:
-            return "BM";
-        case BP:
-            return "BP";
-        case CSBP:
-            return "CSBP";
-        }
-        return "";
-    }
-    string text() const
-    {
-        stringstream ss;
-        ss << "(" << method_str() << ") FPS: " << setiosflags(ios::left)
-           << setprecision(4) << work_fps;
-        return ss.str();
-    }
-private:
-    bool running, write_once;
-
-    Mat left_src, right_src;
-    Mat left, right;
-    oclMat d_left, d_right;
-
-    StereoBM_OCL bm;
-    StereoBeliefPropagation bp;
-    StereoConstantSpaceBP csbp;
-
-    int64 work_begin;
-    double work_fps;
-
-    string l_img, r_img;
-    string out_img;
-    enum {BM, BP, CSBP} method;
-    int ndisp; // Max disparity + 1
-    enum {GPU, CPU} type;
-};
-
-int main(int argc, char** argv)
-{
-    const char* keys =
-        "{ h | help     | false                     | print help message }"
-        "{ l | left     |                           | specify left image }"
-        "{ r | right    |                           | specify right image }"
-        "{ m | method   | BM                        | specify match method(BM/BP/CSBP) }"
-        "{ n | ndisp    | 64                        | specify number of disparity levels }"
-        "{ o | output   | stereo_match_output.jpg   | specify output path when input is images}";
-
-    CommandLineParser cmd(argc, argv, keys);
-    if (cmd.get<bool>("help"))
-    {
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return 0;
-    }
-
-    try
-    {
-        App app(cmd);
-        cout << "Device name:" << cv::ocl::Context::getContext()->getDeviceInfo().deviceName << endl;
-
-        app.run();
-    }
-    catch (const exception& e)
-    {
-        cout << "error: " << e.what() << endl;
-    }
-
-    return EXIT_SUCCESS;
-}
-
-App::App(CommandLineParser& cmd)
-    : running(false),method(BM)
-{
-    cout << "stereo_match_ocl sample\n";
-    cout << "\nControls:\n"
-         << "\tesc - exit\n"
-         << "\to - save output image once\n"
-         << "\tp - print current parameters\n"
-         << "\tg - convert source images into gray\n"
-         << "\tm - change stereo match method\n"
-         << "\ts - change Sobel prefiltering flag (for BM only)\n"
-         << "\t1/q - increase/decrease maximum disparity\n"
-         << "\t2/w - increase/decrease window size (for BM only)\n"
-         << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
-         << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
-
-    l_img = cmd.get<string>("l");
-    r_img = cmd.get<string>("r");
-    string mstr = cmd.get<string>("m");
-    if(mstr == "BM") method = BM;
-    else if(mstr == "BP") method = BP;
-    else if(mstr == "CSBP") method = CSBP;
-    else cout << "unknown method!\n";
-    ndisp = cmd.get<int>("n");
-    out_img = cmd.get<string>("o");
-    write_once = false;
-}
-
-
-void App::run()
-{
-    // Load images
-    left_src = imread(l_img);
-    right_src = imread(r_img);
-    if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\"");
-    if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\"");
-
-    cvtColor(left_src, left, COLOR_BGR2GRAY);
-    cvtColor(right_src, right, COLOR_BGR2GRAY);
-
-    d_left.upload(left);
-    d_right.upload(right);
-
-    imshow("left", left);
-    imshow("right", right);
-
-    // Set common parameters
-    bm.ndisp = ndisp;
-    bp.ndisp = ndisp;
-    csbp.ndisp = ndisp;
-
-    cout << endl;
-    printParams();
-
-    running = true;
-    while (running)
-    {
-        // Prepare disparity map of specified type
-        Mat disp;
-        oclMat d_disp;
-        workBegin();
-        switch (method)
-        {
-        case BM:
-            if (d_left.channels() > 1 || d_right.channels() > 1)
-            {
-                cout << "BM doesn't support color images\n";
-                cvtColor(left_src, left, COLOR_BGR2GRAY);
-                cvtColor(right_src, right, COLOR_BGR2GRAY);
-                cout << "image_channels: " << left.channels() << endl;
-                d_left.upload(left);
-                d_right.upload(right);
-                imshow("left", left);
-                imshow("right", right);
-            }
-            bm(d_left, d_right, d_disp);
-            break;
-        case BP:
-            bp(d_left, d_right, d_disp);
-            break;
-        case CSBP:
-            csbp(d_left, d_right, d_disp);
-            break;
-        }
-
-        // Show results
-        d_disp.download(disp);
-        workEnd();
-
-        if (method != BM)
-        {
-            disp.convertTo(disp, 0);
-        }
-        putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
-        imshow("disparity", disp);
-        if(write_once)
-        {
-            imwrite(out_img, disp);
-            write_once = false;
-        }
-        handleKey((char)waitKey(3));
-    }
-}
-
-
-void App::printParams() const
-{
-    cout << "--- Parameters ---\n";
-    cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
-    cout << "image_channels: " << left.channels() << endl;
-    cout << "method: " << method_str() << endl
-         << "ndisp: " << ndisp << endl;
-    switch (method)
-    {
-    case BM:
-        cout << "win_size: " << bm.winSize << endl;
-        cout << "prefilter_sobel: " << bm.preset << endl;
-        break;
-    case BP:
-        cout << "iter_count: " << bp.iters << endl;
-        cout << "level_count: " << bp.levels << endl;
-        break;
-    case CSBP:
-        cout << "iter_count: " << csbp.iters << endl;
-        cout << "level_count: " << csbp.levels << endl;
-        break;
-    }
-    cout << endl;
-}
-
-
-void App::handleKey(char key)
-{
-    switch (key)
-    {
-    case 27:
-        running = false;
-        break;
-    case 'p':
-    case 'P':
-        printParams();
-        break;
-    case 'g':
-    case 'G':
-        if (left.channels() == 1 && method != BM)
-        {
-            left = left_src;
-            right = right_src;
-        }
-        else
-        {
-            cvtColor(left_src, left, COLOR_BGR2GRAY);
-            cvtColor(right_src, right, COLOR_BGR2GRAY);
-        }
-        d_left.upload(left);
-        d_right.upload(right);
-        cout << "image_channels: " << left.channels() << endl;
-        imshow("left", left);
-        imshow("right", right);
-        break;
-    case 'm':
-    case 'M':
-        switch (method)
-        {
-        case BM:
-            method = BP;
-            break;
-        case BP:
-            method = CSBP;
-            break;
-        case CSBP:
-            method = BM;
-            break;
-        }
-        cout << "method: " << method_str() << endl;
-        break;
-    case 's':
-    case 'S':
-        if (method == BM)
-        {
-            switch (bm.preset)
-            {
-            case StereoBM_OCL::BASIC_PRESET:
-                bm.preset = StereoBM_OCL::PREFILTER_XSOBEL;
-                break;
-            case StereoBM_OCL::PREFILTER_XSOBEL:
-                bm.preset = StereoBM_OCL::BASIC_PRESET;
-                break;
-            }
-            cout << "prefilter_sobel: " << bm.preset << endl;
-        }
-        break;
-    case '1':
-        ndisp == 1 ? ndisp = 8 : ndisp += 8;
-        cout << "ndisp: " << ndisp << endl;
-        bm.ndisp = ndisp;
-        bp.ndisp = ndisp;
-        csbp.ndisp = ndisp;
-        break;
-    case 'q':
-    case 'Q':
-        ndisp = max(ndisp - 8, 1);
-        cout << "ndisp: " << ndisp << endl;
-        bm.ndisp = ndisp;
-        bp.ndisp = ndisp;
-        csbp.ndisp = ndisp;
-        break;
-    case '2':
-        if (method == BM)
-        {
-            bm.winSize = min(bm.winSize + 1, 51);
-            cout << "win_size: " << bm.winSize << endl;
-        }
-        break;
-    case 'w':
-    case 'W':
-        if (method == BM)
-        {
-            bm.winSize = max(bm.winSize - 1, 2);
-            cout << "win_size: " << bm.winSize << endl;
-        }
-        break;
-    case '3':
-        if (method == BP)
-        {
-            bp.iters += 1;
-            cout << "iter_count: " << bp.iters << endl;
-        }
-        else if (method == CSBP)
-        {
-            csbp.iters += 1;
-            cout << "iter_count: " << csbp.iters << endl;
-        }
-        break;
-    case 'e':
-    case 'E':
-        if (method == BP)
-        {
-            bp.iters = max(bp.iters - 1, 1);
-            cout << "iter_count: " << bp.iters << endl;
-        }
-        else if (method == CSBP)
-        {
-            csbp.iters = max(csbp.iters - 1, 1);
-            cout << "iter_count: " << csbp.iters << endl;
-        }
-        break;
-    case '4':
-        if (method == BP)
-        {
-            bp.levels += 1;
-            cout << "level_count: " << bp.levels << endl;
-        }
-        else if (method == CSBP)
-        {
-            csbp.levels += 1;
-            cout << "level_count: " << csbp.levels << endl;
-        }
-        break;
-    case 'r':
-    case 'R':
-        if (method == BP)
-        {
-            bp.levels = max(bp.levels - 1, 1);
-            cout << "level_count: " << bp.levels << endl;
-        }
-        else if (method == CSBP)
-        {
-            csbp.levels = max(csbp.levels - 1, 1);
-            cout << "level_count: " << csbp.levels << endl;
-        }
-        break;
-    case 'o':
-    case 'O':
-        write_once = true;
-        break;
-    }
-}
diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp
deleted file mode 100644
index f88678b..0000000
--- a/samples/ocl/surf_matcher.cpp
+++ /dev/null
@@ -1,329 +0,0 @@
-#include <iostream>
-#include <stdio.h>
-#include "opencv2/core/core.hpp"
-#include "opencv2/core/utility.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/ocl/ocl.hpp"
-#include "opencv2/nonfree/ocl.hpp"
-#include "opencv2/calib3d/calib3d.hpp"
-#include "opencv2/nonfree/nonfree.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-
-const int LOOP_NUM = 10;
-const int GOOD_PTS_MAX = 50;
-const float GOOD_PORTION = 0.15f;
-
-int64 work_begin = 0;
-int64 work_end = 0;
-
-static void workBegin()
-{
-    work_begin = getTickCount();
-}
-
-static void workEnd()
-{
-    work_end = getTickCount() - work_begin;
-}
-
-static double getTime()
-{
-    return work_end /((double)getTickFrequency() * 1000.);
-}
-
-template<class KPDetector>
-struct SURFDetector
-{
-    KPDetector surf;
-    SURFDetector(double hessian = 800.0)
-        :surf(hessian)
-    {
-    }
-    template<class T>
-    void operator()(const T& in, const T& mask, std::vector<cv::KeyPoint>& pts, T& descriptors, bool useProvided = false)
-    {
-        surf(in, mask, pts, descriptors, useProvided);
-    }
-};
-
-template<class KPMatcher>
-struct SURFMatcher
-{
-    KPMatcher matcher;
-    template<class T>
-    void match(const T& in1, const T& in2, std::vector<cv::DMatch>& matches)
-    {
-        matcher.match(in1, in2, matches);
-    }
-};
-
-static Mat drawGoodMatches(
-    const Mat& cpu_img1,
-    const Mat& cpu_img2,
-    const std::vector<KeyPoint>& keypoints1,
-    const std::vector<KeyPoint>& keypoints2,
-    std::vector<DMatch>& matches,
-    std::vector<Point2f>& scene_corners_
-    )
-{
-    //-- Sort matches and preserve top 10% matches
-    std::sort(matches.begin(), matches.end());
-    std::vector< DMatch > good_matches;
-    double minDist = matches.front().distance,
-           maxDist = matches.back().distance;
-
-    const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION));
-    for( int i = 0; i < ptsPairs; i++ )
-    {
-        good_matches.push_back( matches[i] );
-    }
-    std::cout << "\nMax distance: " << maxDist << std::endl;
-    std::cout << "Min distance: " << minDist << std::endl;
-
-    std::cout << "Calculating homography using " << ptsPairs << " point pairs." << std::endl;
-
-    // drawing the results
-    Mat img_matches;
-    drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2,
-                 good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
-                 std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
-
-    //-- Localize the object
-    std::vector<Point2f> obj;
-    std::vector<Point2f> scene;
-
-    for( size_t i = 0; i < good_matches.size(); i++ )
-    {
-        //-- Get the keypoints from the good matches
-        obj.push_back( keypoints1[ good_matches[i].queryIdx ].pt );
-        scene.push_back( keypoints2[ good_matches[i].trainIdx ].pt );
-    }
-    //-- Get the corners from the image_1 ( the object to be "detected" )
-    std::vector<Point2f> obj_corners(4);
-    obj_corners[0] = Point(0,0);
-    obj_corners[1] = Point( cpu_img1.cols, 0 );
-    obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows );
-    obj_corners[3] = Point( 0, cpu_img1.rows );
-    std::vector<Point2f> scene_corners(4);
-
-    Mat H = findHomography( obj, scene, RANSAC );
-    perspectiveTransform( obj_corners, scene_corners, H);
-
-    scene_corners_ = scene_corners;
-
-    //-- Draw lines between the corners (the mapped object in the scene - image_2 )
-    line( img_matches,
-          scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
-          Scalar( 0, 255, 0), 2, LINE_AA );
-    line( img_matches,
-          scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
-          Scalar( 0, 255, 0), 2, LINE_AA );
-    line( img_matches,
-          scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
-          Scalar( 0, 255, 0), 2, LINE_AA );
-    line( img_matches,
-          scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
-          Scalar( 0, 255, 0), 2, LINE_AA );
-    return img_matches;
-}
-
-////////////////////////////////////////////////////
-// This program demonstrates the usage of SURF_OCL.
-// use cpu findHomography interface to calculate the transformation matrix
-int main(int argc, char* argv[])
-{
-    const char* keys =
-        "{ help h    | false           | print help message  }"
-        "{ left l    |                 | specify left image  }"
-        "{ right r   |                 | specify right image }"
-        "{ output o  | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }"
-        "{ use_cpu c | false           | use CPU algorithms  }"
-        "{ use_all a | false           | use both CPU and GPU algorithms}";
-
-    CommandLineParser cmd(argc, argv, keys);
-    if (cmd.get<bool>("help"))
-    {
-        std::cout << "Usage: surf_matcher [options]" << std::endl;
-        std::cout << "Available options:" << std::endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-
-    Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
-    oclMat img1, img2;
-    bool useCPU = cmd.get<bool>("c");
-    bool useGPU = false;
-    bool useALL = cmd.get<bool>("a");
-
-    std::string outpath = cmd.get<std::string>("o");
-
-    cpu_img1 = imread(cmd.get<std::string>("l"));
-    CV_Assert(!cpu_img1.empty());
-    cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY);
-    img1 = cpu_img1_grey;
-
-    cpu_img2 = imread(cmd.get<std::string>("r"));
-    CV_Assert(!cpu_img2.empty());
-    cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY);
-    img2 = cpu_img2_grey;
-
-    if (useALL)
-        useCPU = useGPU = false;
-    else if(!useCPU && !useALL)
-        useGPU = true;
-
-    if(!useCPU)
-        std::cout
-                << "Device name:"
-                << cv::ocl::Context::getContext()->getDeviceInfo().deviceName
-                << std::endl;
-
-    double surf_time = 0.;
-
-    //declare input/output
-    std::vector<KeyPoint> keypoints1, keypoints2;
-    std::vector<DMatch> matches;
-
-    std::vector<KeyPoint> gpu_keypoints1;
-    std::vector<KeyPoint> gpu_keypoints2;
-    std::vector<DMatch> gpu_matches;
-
-    Mat descriptors1CPU, descriptors2CPU;
-
-    oclMat keypoints1GPU, keypoints2GPU;
-    oclMat descriptors1GPU, descriptors2GPU;
-
-    //instantiate detectors/matchers
-    SURFDetector<SURF>     cpp_surf;
-    SURFDetector<SURF_OCL> ocl_surf;
-
-    SURFMatcher<BFMatcher>      cpp_matcher;
-    SURFMatcher<BFMatcher_OCL>  ocl_matcher;
-
-    //-- start of timing section
-    if (useCPU)
-    {
-        for (int i = 0; i <= LOOP_NUM; i++)
-        {
-            if(i == 1) workBegin();
-            cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU);
-            cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU);
-            cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches);
-        }
-        workEnd();
-        std::cout << "CPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
-        std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
-
-        surf_time = getTime();
-        std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
-    }
-    else if(useGPU)
-    {
-        for (int i = 0; i <= LOOP_NUM; i++)
-        {
-            if(i == 1) workBegin();
-            ocl_surf(img1, oclMat(), keypoints1, descriptors1GPU);
-            ocl_surf(img2, oclMat(), keypoints2, descriptors2GPU);
-            ocl_matcher.match(descriptors1GPU, descriptors2GPU, matches);
-        }
-        workEnd();
-        std::cout << "OCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
-        std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
-
-        surf_time = getTime();
-        std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
-    }
-    else
-    {
-        //cpu runs
-        for (int i = 0; i <= LOOP_NUM; i++)
-        {
-            if(i == 1) workBegin();
-            cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU);
-            cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU);
-            cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches);
-        }
-        workEnd();
-        std::cout << "\nCPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
-        std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
-
-        surf_time = getTime();
-        std::cout << "(CPP)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl;
-
-        //gpu runs
-        for (int i = 0; i <= LOOP_NUM; i++)
-        {
-            if(i == 1) workBegin();
-            ocl_surf(img1, oclMat(), gpu_keypoints1, descriptors1GPU);
-            ocl_surf(img2, oclMat(), gpu_keypoints2, descriptors2GPU);
-            ocl_matcher.match(descriptors1GPU, descriptors2GPU, gpu_matches);
-        }
-        workEnd();
-        std::cout << "\nOCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
-        std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
-
-        surf_time = getTime();
-        std::cout << "(OCL)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
-
-    }
-
-    //--------------------------------------------------------------------------
-    std::vector<Point2f> cpu_corner;
-    Mat img_matches = drawGoodMatches(cpu_img1, cpu_img2, keypoints1, keypoints2, matches, cpu_corner);
-
-    std::vector<Point2f> gpu_corner;
-    Mat ocl_img_matches;
-    if(useALL || (!useCPU&&!useGPU))
-    {
-        ocl_img_matches = drawGoodMatches(cpu_img1, cpu_img2, gpu_keypoints1, gpu_keypoints2, gpu_matches, gpu_corner);
-
-        //check accuracy
-        std::cout<<"\nCheck accuracy:\n";
-
-        if(cpu_corner.size()!=gpu_corner.size())
-            std::cout<<"Failed\n";
-        else
-        {
-            bool result = false;
-            for(size_t i = 0; i < cpu_corner.size(); i++)
-            {
-                if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10)
-                        ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
-                {
-                    std::cout<<"Failed\n";
-                    result = false;
-                    break;
-                }
-                result = true;
-            }
-            if(result)
-                std::cout<<"Passed\n";
-        }
-    }
-
-    //-- Show detected matches
-    if (useCPU)
-    {
-        namedWindow("cpu surf matches", 0);
-        imshow("cpu surf matches", img_matches);
-        imwrite(outpath, img_matches);
-    }
-    else if(useGPU)
-    {
-        namedWindow("ocl surf matches", 0);
-        imshow("ocl surf matches", img_matches);
-        imwrite(outpath, img_matches);
-    }
-    else
-    {
-        namedWindow("cpu surf matches", 0);
-        imshow("cpu surf matches", img_matches);
-
-        namedWindow("ocl surf matches", 0);
-        imshow("ocl surf matches", ocl_img_matches);
-    }
-    waitKey(0);
-    return EXIT_SUCCESS;
-}
diff --git a/samples/ocl/tvl1_optical_flow.cpp b/samples/ocl/tvl1_optical_flow.cpp
deleted file mode 100644
index f678dd6..0000000
--- a/samples/ocl/tvl1_optical_flow.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <iomanip>
-
-#include "opencv2/core/utility.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/ocl/ocl.hpp"
-#include "opencv2/video/video.hpp"
-
-using namespace std;
-using namespace cv;
-using namespace cv::ocl;
-
-typedef unsigned char uchar;
-#define LOOP_NUM 10
-int64 work_begin = 0;
-int64 work_end = 0;
-
-static void workBegin()
-{
-    work_begin = getTickCount();
-}
-static void workEnd()
-{
-    work_end += (getTickCount() - work_begin);
-}
-static double getTime()
-{
-    return work_end * 1000. / getTickFrequency();
-}
-
-template <typename T> inline T clamp (T x, T a, T b)
-{
-    return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
-}
-
-template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
-{
-    x = clamp(x, a, b);
-    return c + (d - c) * (x - a) / (b - a);
-}
-
-static void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
-{
-    float maxDisplacement = 1.0f;
-
-    for (int i = 0; i < u.rows; ++i)
-    {
-        const float* ptr_u = u.ptr<float>(i);
-        const float* ptr_v = v.ptr<float>(i);
-
-        for (int j = 0; j < u.cols; ++j)
-        {
-            float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
-
-            if (d > maxDisplacement)
-                maxDisplacement = d;
-        }
-    }
-
-    flowField.create(u.size(), CV_8UC4);
-
-    for (int i = 0; i < flowField.rows; ++i)
-    {
-        const float* ptr_u = u.ptr<float>(i);
-        const float* ptr_v = v.ptr<float>(i);
-
-
-        Vec4b* row = flowField.ptr<Vec4b>(i);
-
-        for (int j = 0; j < flowField.cols; ++j)
-        {
-            row[j][0] = 0;
-            row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
-            row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
-            row[j][3] = 255;
-        }
-    }
-}
-
-
-int main(int argc, const char* argv[])
-{
-    const char* keys =
-        "{ h   | help       | false           | print help message }"
-        "{ l   | left       |                 | specify left image }"
-        "{ r   | right      |                 | specify right image }"
-        "{ o   | output     | tvl1_output.jpg | specify output save path }"
-        "{ c   | camera     | 0               | enable camera capturing }"
-        "{ s   | use_cpu    | false           | use cpu or gpu to process the image }"
-        "{ v   | video      |                 | use video as input }";
-
-    CommandLineParser cmd(argc, argv, keys);
-
-    if (cmd.get<bool>("help"))
-    {
-        cout << "Usage: pyrlk_optical_flow [options]" << endl;
-        cout << "Available options:" << endl;
-        cmd.printMessage();
-        return EXIT_SUCCESS;
-    }
-
-    string fname0 = cmd.get<string>("l");
-    string fname1 = cmd.get<string>("r");
-    string vdofile = cmd.get<string>("v");
-    string outpath = cmd.get<string>("o");
-    bool useCPU = cmd.get<bool>("s");
-    bool useCamera = cmd.get<bool>("c");
-    int inputName = cmd.get<int>("c");
-
-    Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
-    Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
-    cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
-    cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
-
-    Mat flow, show_flow;
-    Mat flow_vec[2];
-    if (frame0.empty() || frame1.empty())
-        useCamera = true;
-
-    if (useCamera)
-    {
-        VideoCapture capture;
-        Mat frame, frameCopy;
-        Mat frame0Gray, frame1Gray;
-        Mat ptr0, ptr1;
-
-        if(vdofile.empty())
-            capture.open( inputName );
-        else
-            capture.open(vdofile.c_str());
-
-        if(!capture.isOpened())
-        {
-            if(vdofile.empty())
-                cout << "Capture from CAM " << inputName << " didn't work" << endl;
-            else
-                cout << "Capture from file " << vdofile << " failed" <<endl;
-            goto nocamera;
-        }
-
-        cout << "In capture ..." << endl;
-        for(int i = 0;; i++)
-        {
-            if( !capture.read(frame) )
-                break;
-
-            if (i == 0)
-            {
-                frame.copyTo( frame0 );
-                cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
-            }
-            else
-            {
-                if (i%2 == 1)
-                {
-                    frame.copyTo(frame1);
-                    cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
-                    ptr0 = frame0Gray;
-                    ptr1 = frame1Gray;
-                }
-                else
-                {
-                    frame.copyTo(frame0);
-                    cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
-                    ptr0 = frame1Gray;
-                    ptr1 = frame0Gray;
-                }
-
-                if (useCPU)
-                {
-                    alg->calc(ptr0, ptr1, flow);
-                    split(flow, flow_vec);
-                }
-                else
-                {
-                    oclMat d_flowx, d_flowy;
-                    d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy);
-                    d_flowx.download(flow_vec[0]);
-                    d_flowy.download(flow_vec[1]);
-                }
-                if (i%2 == 1)
-                    frame1.copyTo(frameCopy);
-                else
-                    frame0.copyTo(frameCopy);
-                getFlowField(flow_vec[0], flow_vec[1], show_flow);
-                imshow("tvl1 optical flow field", show_flow);
-            }
-
-            if( waitKey( 10 ) >= 0 )
-                break;
-        }
-
-        capture.release();
-    }
-    else
-    {
-nocamera:
-        oclMat d_flowx, d_flowy;
-        for(int i = 0; i <= LOOP_NUM; i ++)
-        {
-            cout << "loop" << i << endl;
-
-            if (i > 0) workBegin();
-            if (useCPU)
-            {
-                alg->calc(frame0, frame1, flow);
-                split(flow, flow_vec);
-            }
-            else
-            {
-                d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
-                d_flowx.download(flow_vec[0]);
-                d_flowy.download(flow_vec[1]);
-            }
-            if (i > 0 && i <= LOOP_NUM)
-                workEnd();
-
-            if (i == LOOP_NUM)
-            {
-                if (useCPU)
-                    cout << "average CPU time (noCamera) : ";
-                else
-                    cout << "average GPU time (noCamera) : ";
-                cout << getTime() / LOOP_NUM << " ms" << endl;
-
-                getFlowField(flow_vec[0], flow_vec[1], show_flow);
-                imshow("PyrLK [Sparse]", show_flow);
-                imwrite(outpath, show_flow);
-            }
-        }
-    }
-
-    waitKey();
-
-    return EXIT_SUCCESS;
-}
-- 
2.7.4